ecm-6.4.4/0000755023561000001540000000000012113421640007265 500000000000000ecm-6.4.4/median.c0000644023561000001540000005030012106741273010615 00000000000000/* Median/middle product. Copyright 2003, 2004, 2005, 2006, 2007, 2008 Laurent Fousse, Paul Zimmermann, Alexander Kruppa, Dave Newman. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ /* Reference: [1] Tellegen's Principle into Practice, by A. Bostan, G. Lecerf and E. Schost, Proc. of ISSAC'03, Philadelphia, 2003. */ #include #include "ecm-impl.h" #ifndef MAX #define MAX(a,b) (((a) > (b)) ? (a) : (b)) #endif #ifndef MIN #define MIN(a,b) (((a) < (b)) ? (a) : (b)) #endif extern unsigned int Fermat; static void list_add_wrapper (listz_t, listz_t, listz_t, unsigned int, unsigned int); static void list_sub_wrapper (listz_t, listz_t, listz_t, unsigned int, unsigned int); static unsigned int TKarMul (listz_t, unsigned int, listz_t, unsigned int, listz_t, unsigned int, listz_t); static void list_sub_safe (listz_t, listz_t, listz_t, unsigned int, unsigned int, unsigned int); static void list_add_safe (listz_t, listz_t, listz_t, unsigned int, unsigned int, unsigned int); static unsigned int TToomCookMul (listz_t, unsigned int, listz_t, unsigned int, listz_t, unsigned int, listz_t); static unsigned int TToomCookMul_space (unsigned int, unsigned int, unsigned int); static void list_add_wrapper (listz_t p, listz_t q, listz_t r, unsigned int n, unsigned int max_r) { list_add (p, q, r, MIN (n, max_r)); if (n > max_r) list_set (p + max_r, q + max_r, n - max_r); } static void list_sub_wrapper (listz_t p, listz_t q, listz_t r, unsigned int n, unsigned int max_r) { list_sub (p, q, r, MIN (n, max_r)); if (n > max_r) list_set (p + max_r, q + max_r, n - max_r); } /* Given a[0..m] and c[0..l], puts in b[0..n] the coefficients of degree m to n+m of rev(a)*c, i.e. b[0] = a[0]*c[0] + ... + a[i]*c[i] with i = min(m, l) ... b[k] = a[0]*c[k] + ... + a[i]*c[i+k] with i = min(m, l-k) ... b[n] = a[0]*c[n] + ... + a[i]*c[i+n] with i = min(m, l-n) [=l-n]. Using auxiliary memory in t. Implements algorithm TKarMul of [1]. Assumes deg(c) = l <= m+n. */ static unsigned int TKarMul (listz_t b, unsigned int n, listz_t a, unsigned int m, listz_t c, unsigned int l, listz_t t) { unsigned int k, mu, nu, h; unsigned int s1; unsigned tot_muls = 0; #ifdef DEBUG fprintf (ECM_STDOUT, "Enter TKarMul.\nm = %d\nn = %d\nl = %d\n", m, n, l); fprintf (ECM_STDOUT, "a = "); print_list (a, m + 1); fprintf (ECM_STDOUT, "\nc = "); print_list (c, l + 1); fprintf (ECM_STDOUT, "\n"); #endif if (n == 0) { #ifdef DEBUG fprintf (ECM_STDOUT, "Case n = 0.\n"); #endif mpz_mul (b[0], a[0], c[0]); for (k = 1; (k <= m) && (k <= l); k++) mpz_addmul (b[0], a[k], c[k]); #ifdef DEBUG fprintf (ECM_STDOUT, "Exit TKarMul.\n"); #endif return MIN (m, l) + 1; } if (m == 0) { #ifdef DEBUG fprintf (ECM_STDOUT, "Case m = 0.\n"); #endif for (k = 0; (k <= l) && (k <= n); k++) mpz_mul (b[k], a[0], c[k]); for (k = l + 1; k <= n; k++) mpz_set_ui (b[k], 0); #ifdef DEBUG fprintf (ECM_STDOUT, "Exit TKarMul.\n"); #endif return MIN (n, l) + 1; } mu = (m / 2) + 1; /* 1 <= mu <= m */ nu = (n / 2) + 1; /* 1 <= nu <= n */ h = MAX (mu, nu); /* h >= 1 */ #ifdef DEBUG fprintf (ECM_STDOUT, "mu = %d\nnu = %d\nh = %d\n", mu, nu, h); #endif if (mu > n) { #ifdef DEBUG fprintf (ECM_STDOUT, "Case mu > n.\n"); #endif tot_muls += TKarMul (b, n, a, mu - 1, c, l, t); if (l >= mu) { /* we have to check l-mu <= n + (m-mu), i.e. l <= n+m */ tot_muls += TKarMul (t, n, a + mu, m - mu, c + mu, l - mu, t + n + 1); list_add (b, b, t, n + 1); } #ifdef DEBUG fprintf (ECM_STDOUT, "Exit TKarMul.\n"); #endif return tot_muls; } if (nu > m) { #ifdef DEBUG fprintf (ECM_STDOUT, "Case nu > m.\n"); #endif /* we have to check MIN(l,m+nu-1) <= nu-1+m: trivial */ tot_muls += TKarMul (b, nu - 1, a, m, c, MIN (l, m + nu - 1), t); /* Description broken in reference. Should be a list * concatenation, not an addition. * Fixed now. */ if (l >= nu) { /* we have to check l-nu <= n-nu+m, i.e. l <= n+m: trivial */ tot_muls += TKarMul (b + nu, n - nu, a, m, c + nu, l - nu, t); } else list_zero (b + nu, n - nu + 1); #ifdef DEBUG fprintf (ECM_STDOUT, "Exit TKarMul.\n"); #endif return tot_muls; } /* We want nu = mu */ mu = nu = h; #ifdef DEBUG fprintf (ECM_STDOUT, "Base Case.\n"); #endif s1 = MIN (l + 1, n + mu); if (l + 1 > nu) list_sub_wrapper (t, c, c + nu, s1, l - nu + 1); else list_set (t, c, s1); #ifdef DEBUG fprintf (ECM_STDOUT, "DEBUG c - c[nu].\n"); print_list (t, s1); fprintf (ECM_STDOUT, "We compute (1) - (3)\n"); #endif tot_muls += TKarMul (b, nu - 1, a, mu - 1, t, s1 - 1, t + s1); /* (1) - (3) */ #ifdef DEBUG print_list (b, nu); fprintf (ECM_STDOUT, "We compute (2) - (4)\n"); #endif if (s1 >= nu + 1) { /* nu - 1 */ tot_muls += TKarMul (b + nu, n - nu, a + mu, m - mu, t + nu, s1 - nu - 1, t + s1); /* (2) - (4) */ } else { list_zero (b + nu, n - nu + 1); } #ifdef DEBUG print_list (b + nu, n - nu + 1); #endif list_add_wrapper (t, a, a + mu, mu, m + 1 - mu); #ifdef DEBUG fprintf (ECM_STDOUT, "We compute (2) + (3)\n"); #endif if (l >= nu) { tot_muls += TKarMul (t + mu, nu - 1, t, mu - 1, c + nu, l - nu, t + mu + nu); } else list_zero (t + mu, nu); /* (2) + (3) */ #ifdef DEBUG print_list (t + mu, nu); #endif list_add (b, b, t + mu, nu); list_sub (b + nu, t + mu, b + nu, n - nu + 1); return tot_muls; } /* Computes the space needed for TKarMul of b[0..n], * a[0..m] and c[0..l] */ static unsigned int TKarMul_space (unsigned int n, unsigned int m, unsigned int l) { unsigned int mu, nu, h; unsigned int s1; unsigned int r1, r2; if (n == 0) return 0; if (m == 0) return 0; mu = (m / 2) + 1; nu = (n / 2) + 1; h = MAX (mu, nu); if (mu > n) { r1 = TKarMul_space (n, mu - 1, l); if (l >= mu) { r2 = TKarMul_space (n, m - mu, l - mu) + n + 1; r1 = MAX (r1, r2); } return r1; } if (nu > m) { r1 = TKarMul_space (nu - 1, m, MIN (l, m + nu - 1)); if (l >= nu) { r2 = TKarMul_space (n - nu, m,l - nu); r1 = MAX (r1, r2); } return r1; } mu = nu = h; s1 = MIN (l + 1, n + mu); r1 = TKarMul_space (nu - 1, mu - 1, s1 - 1) + s1; if (s1 >= nu + 1) { r2 = TKarMul_space (n - nu, m - mu, s1 - nu - 1) + s1; r1 = MAX (r1, r2); } if (l >= nu) { r2 = TKarMul_space (nu - 1, mu - 1, l - nu) + mu + nu; r1 = MAX (r1, r2); } return r1; } /* list_sub with bound checking */ static void list_sub_safe (listz_t ret, listz_t a, listz_t b, unsigned int sizea, unsigned int sizeb, unsigned int needed) { unsigned int i; unsigned int safe; safe = MIN(sizea, sizeb); safe = MIN(safe, needed); list_sub (ret, a, b, safe); i = safe; while (i < needed) { if (i < sizea) { if (i < sizeb) mpz_sub (ret[i], a[i], b[i]); else mpz_set (ret[i], a[i]); } else { if (i < sizeb) mpz_neg (ret[i], b[i]); else mpz_set_ui (ret[i], 0); } i++; } } /* list_add with bound checking */ static void list_add_safe (listz_t ret, listz_t a, listz_t b, unsigned int sizea, unsigned int sizeb, unsigned int needed) { unsigned int i; unsigned int safe; safe = MIN(sizea, sizeb); safe = MIN(safe, needed); list_add (ret, a, b, i = safe); while (i < needed) { if (i < sizea) { if (i < sizeb) mpz_add (ret[i], a[i], b[i]); else mpz_set (ret[i], a[i]); } else { if (i < sizeb) mpz_set (ret[i], b[i]); else mpz_set_ui (ret[i], 0); } i++; } } static unsigned int TToomCookMul (listz_t b, unsigned int n, listz_t a, unsigned int m, listz_t c, unsigned int l, listz_t tmp) { unsigned int nu, mu, h; unsigned int i; unsigned int btmp; unsigned int tot_muls = 0; nu = n / 3 + 1; mu = m / 3 + 1; /* ensures n + 1 > 2 * nu */ if ((n < 2 * nu) || (m < 2 * mu)) { #ifdef DEBUG fprintf (ECM_STDOUT, "Too small operands, calling TKara.\n"); #endif return TKarMul (b, n, a, m, c, l, tmp); } /* First strip unnecessary trailing coefficients of c: */ l = MIN(l, n + m); /* Now the degenerate cases. We want 2 * nu <= m. * */ if (m < 2 * nu) { #ifdef DEBUG fprintf (ECM_STDOUT, "Degenerate Case 1.\n"); #endif tot_muls += TToomCookMul (b, nu - 1, a, m, c, l, tmp); if (l >= nu) tot_muls += TToomCookMul (b + nu, nu - 1, a, m, c + nu, l - nu, tmp); else list_zero (b + nu, nu); if (l >= 2 * nu) /* n >= 2 * nu is assured. Hopefully */ tot_muls += TToomCookMul (b + 2 * nu, n - 2 * nu, a, m, c + 2 * nu, l - 2 * nu, tmp); else list_zero (b + 2 * nu, n - 2 * nu + 1); return tot_muls; } /* Second degenerate case. We want 2 * mu <= n. */ if (n < 2 * mu) { #ifdef DEBUG fprintf (ECM_STDOUT, "Degenerate Case 2.\n"); #endif tot_muls += TToomCookMul (b, n, a, mu - 1, c, l, tmp); if (l >= mu) { tot_muls += TToomCookMul (tmp, n, a + mu, mu - 1, c + mu, l - mu, tmp + n + 1); list_add (b, b, tmp, n + 1); } if (l >= 2 * mu) { tot_muls += TToomCookMul (tmp, n, a + 2 * mu, m - 2 * mu, c + 2 * mu, l - 2 * mu, tmp + n + 1); list_add (b, b, tmp, n + 1); } return tot_muls; } #ifdef DEBUG fprintf (ECM_STDOUT, "Base Case.\n"); fprintf (ECM_STDOUT, "a = "); print_list (a, m + 1); fprintf (ECM_STDOUT, "\nc = "); print_list (c, l + 1); #endif h = MAX(nu, mu); nu = mu = h; list_sub_safe (tmp, c + 3 * h, c + h, (l + 1 > 3 * h ? l + 1 - 3 * h : 0), (l + 1 > h ? l + 1 - h : 0), 2 * h - 1); list_sub_safe (tmp + 2 * h - 1, c, c + 2 * h, l + 1, (l + 1 > 2 * h ? l + 1 - 2 * h : 0), 2 * h - 1); for (i = 0; i < 2 * h - 1; i++) mpz_mul_2exp (tmp[2 * h - 1 + i], tmp[2 * h - 1 + i], 1); #ifdef DEBUG print_list (tmp, 4 * h - 2); #endif /* -------------------------------- * | 0 .. 2*h-2 | 2*h-1 .. 4*h-3 | * -------------------------------- * | c3 - c1 | 2(c0 - c2) | * -------------------------------- */ list_add (tmp + 2 * h - 1, tmp + 2 * h - 1, tmp, 2 * h - 1); tot_muls += TToomCookMul (b, h - 1, a, h - 1, tmp + 2 * h - 1, 2 * h - 2, tmp + 4 * h - 2); /* b[0 .. h - 1] = 2 * m0 */ #ifdef DEBUG fprintf (ECM_STDOUT, "2 * m0 = "); print_list (b, h); #endif list_add (tmp + 2 * h - 1, a, a + h, h); list_add (tmp + 2 * h - 1, tmp + 2 * h - 1, a + 2 * h, MIN(h, m + 1 - 2 * h)); /* tmp[2*h-1 .. 3*h-2] = a0 + a1 + a2 */ #ifdef DEBUG fprintf (ECM_STDOUT, "\na0 + a1 + a2 = "); print_list (tmp + 2 * h - 1, h); #endif list_sub_safe (tmp + 3 * h - 1, c + 2 * h, c + 3 * h, (l + 1 > 2 * h ? l + 1 - 2 * h : 0), (l + 1 > 3 * h ? l + 1 - 3 * h : 0), 2 * h - 1); /* ------------------------------------------------- * | 0 .. 2*h-2 | 2*h-1 .. 3*h-2 | 3*h-1 .. 5*h-3 | * ------------------------------------------------- * | c3 - c1 | a0 + a1 + a2 | c2 - c3 | * ------------------------------------------------- */ btmp = (l + 1 > h ? l + 1 - h : 0); btmp = MIN(btmp, 2 * h - 1); for (i = 0; i < btmp; i++) { mpz_mul_2exp (tmp[5 * h - 2 + i], c[h + i], 1); mpz_add (tmp[5 * h - 2 + i], tmp[5 * h - 2 + i], tmp[3 * h - 1 + i]); } while (i < 2 * h - 1) { mpz_set (tmp[5 * h - 2 + i], tmp[3 * h - 1 + i]); i++; } tot_muls += TToomCookMul (b + h, h - 1, tmp + 2 * h - 1, h - 1, tmp + 5 * h - 2, 2 * h - 2, tmp + 7 * h - 3); /* b[h .. 2 * h - 1] = 2 * m1 */ #ifdef DEBUG fprintf (ECM_STDOUT, "\n2 * m1 = "); print_list (b + h, h); #endif /* ------------------------------------------------------------------ * | 0 .. 2*h-2 | 2*h-1 .. 3*h-2 | 3*h-1 .. 5*h-3 | 5*h-2 .. 7*h-4 | * ------------------------------------------------------------------ * | c3 - c1 | a0 + a1 + a2 | c2 - c3 | c2 - c3 + 2c1 | * ------------------------------------------------------------------ */ for (i = 0; i < h; i++) { mpz_add (tmp[2 * h - 1 + i], tmp[2 * h - 1 + i], a[i + h]); if (2 * h + i <= m) mpz_addmul_ui (tmp[2 * h - 1 + i], a[2 * h + i], 3); } tot_muls += TToomCookMul (tmp + 5 * h - 2, h - 1, tmp + 2 * h - 1, h - 1, tmp, 2 * h - 2, tmp + 6 * h - 2); /* tmp[5*h-2 .. 6*h - 3] = 6 * m2 */ #ifdef DEBUG fprintf (ECM_STDOUT, "\n6 * m2 = "); print_list (tmp + 5 * h - 2, h); #endif for (i = 0; i < h; i++) { mpz_sub (tmp[2 * h - 1 + i], a[i], a[h + i]); if (i + 2 * h <= m) mpz_add (tmp[2 * h - 1 + i], tmp[2 * h - 1 + i], a[2 * h + i]); } for (i = 0; i < 2 * h - 1; i++) { mpz_mul_ui (tmp[3 * h - 1 + i], tmp[3 * h - 1 + i], 3); mpz_mul_2exp (tmp[i], tmp[i], 1); } list_add (tmp + 3 * h - 1, tmp + 3 * h - 1, tmp, 2 * h - 1); tot_muls += TToomCookMul (tmp + 6 * h - 2, h - 1, tmp + 2 * h - 1, h - 1, tmp + 3 * h - 1, 2 * h - 2, tmp + 7 * h - 2); /* tmp[6h-2 .. 7h - 3] = 6 * mm1 */ #ifdef DEBUG fprintf (ECM_STDOUT, "\n6 * mm1 = "); print_list (tmp + 6 * h - 2, h); #endif list_add_safe (tmp, tmp, c + 2 * h, 2 * h, (l + 1 > 2 * h ? l + 1 - 2 * h : 0), 2 * h - 1); list_sub_safe (tmp, c + 4 * h, tmp, (l + 1 > 4 * h ? l + 1 - 4 * h : 0), 2 * h - 1, 2 * h - 1); tot_muls += TToomCookMul (b + 2 * h, n - 2 * h, a + 2 * h, m - 2 * h, tmp, 2 * h - 1, tmp + 7 * h - 2); /* b[2 * h .. n] = minf */ #ifdef DEBUG fprintf (ECM_STDOUT, "\nminf = "); print_list (b + 2 * h, n + 1 - 2 * h); #endif /* Layout of b : * --------------------------------------- * | 0 ... h-1 | h ... 2*h-1 | 2*h ... n | * --------------------------------------- * | 2 * m0 | 2 * m1 | minf | * --------------------------------------- * * Layout of tmp : * --------------------------------------------------- * | 0 ... 5*h-1 | 5*h-2 ... 6*h-3 | 6*h-2 ... 7*h-3 | * --------------------------------------------------- * | ?????? | 6 * m2 | 6 * mm1 | * --------------------------------------------------- */ list_add (tmp, tmp + 5 * h - 2, tmp + 6 * h - 2, h); for (i = 0; i < h; i++) mpz_divby3_1op (tmp[i]); /* t1 = 2 (m2 + mm1) * tmp[0 .. h - 1] = t1 */ list_add (b, b, b + h, h); list_add (b, b, tmp, h); for (i = 0; i < h; i++) mpz_tdiv_q_2exp (b[i], b[i], 1); /* b_{low} should be correct */ list_add (tmp + h, b + h, tmp, h); /* t2 = t1 + 2 m1 * tmp[h .. 2h - 1] = t2 */ list_add (b + h, tmp, tmp + h, h); list_sub (b + h, b + h, tmp + 6 * h - 2, h); for (i = 0; i < h; i++) mpz_tdiv_q_2exp (b[h + i], b[h + i], 1); /* b_{mid} should be correct */ list_add (tmp + h, tmp + h, tmp + 5 * h - 2, n + 1 - 2 * h); for (i = 0; i < n + 1 - 2 * h; i++) mpz_tdiv_q_2exp (tmp[h + i], tmp[h + i], 1); list_add (b + 2 * h, b + 2 * h, tmp + h, n + 1 - 2 * h); /* b_{high} should be correct */ return tot_muls; } /* Returns space needed by TToomCookMul */ unsigned int TToomCookMul_space (unsigned int n, unsigned int m, unsigned int l) { unsigned int nu, mu, h; unsigned int stmp1, stmp2; nu = n / 3 + 1; mu = m / 3 + 1; stmp1 = stmp2 = 0; /* ensures n + 1 > 2 * nu */ if ((n < 2 * nu) || (m < 2 * mu)) return TKarMul_space (n, m, l); /* First strip unnecessary trailing coefficients of c: */ l = MIN(l, n + m); /* Now the degenerate cases. We want 2 * nu < m. * */ if (m <= 2 * nu) { stmp1 = TToomCookMul_space (nu - 1, m, l); if (l >= 2 * nu) stmp2 = TToomCookMul_space (n - 2 * nu, m, l - 2 * nu); else if (l >= nu) stmp2 = TToomCookMul_space (nu - 1, m, l - nu); return MAX(stmp1, stmp2); } /* Second degenerate case. We want 2 * mu < n. */ if (n <= 2 * mu) { stmp1 += TToomCookMul_space (n, mu - 1, l); if (l >= 2 * mu) stmp2 = TToomCookMul_space (n, m - 2 * mu, l - 2 * mu) + n + 1; else if (l >= mu) stmp2 = TToomCookMul_space (n, mu - 1, l - mu) + n + 1; return MAX(stmp1, stmp2); } h = MAX(nu, mu); stmp1 = TToomCookMul_space (h - 1, h - 1, 2 * h - 2); stmp2 = stmp1 + 7 * h - 2; stmp1 = stmp1 + 6 * h - 2; stmp1 = MAX(stmp1, stmp2); stmp2 = TToomCookMul_space (n - 2 * h, m - 2 * h, 2 * h - 1) + 7*h-2; return MAX(stmp1, stmp2); } /* Given a[0..m] and c[0..l], puts in b[0..n] the coefficients of degree m to n+m of rev(a)*c, i.e. b[0] = a[0]*c[0] + ... + a[i]*c[i] with i = min(m, l) ... b[k] = a[0]*c[k] + ... + a[i]*c[i+k] with i = min(m, l-k) ... b[n] = a[0]*c[n] + ... + a[i]*c[i+n] with i = min(m, l-n) [=l-n]. Using auxiliary memory in tmp. Assumes n <= l. Returns number of multiplications if known, 0 if not known, and -1 for error. */ int TMulGen (listz_t b, unsigned int n, listz_t a, unsigned int m, listz_t c, unsigned int l, listz_t tmp, mpz_t modulus) { ASSERT (n <= l); if (Fermat) { unsigned int i; for (i = l + 1; i > 1 && (i&1) == 0; i >>= 1); ASSERT(i == 1); ASSERT(n + 1 == (l + 1) / 2); ASSERT(m == l - n || m + 1 == l - n); return F_mul_trans (b, a, c, m + 1, l + 1, Fermat, tmp); } #ifdef KS_MULTIPLY if ((double) n * (double) mpz_sizeinbase (modulus, 2) >= KS_TMUL_THRESHOLD) { if (TMulKS (b, n, a, m, c, l, modulus, 1)) /* Non-zero means error */ return -1; return 0; /* We have no mul count so we return 0 */ } #endif return TToomCookMul (b, n, a, m, c, l, tmp); } unsigned int TMulGen_space (unsigned int n, unsigned int m, unsigned int l) { if (Fermat) return 2 * (l + 1); else return TToomCookMul_space (n, m, l); } ecm-6.4.4/resume.c0000644023561000001540000003460212106741273010667 00000000000000/* Functions for reading a writing resume file lines. Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2010, 2011, 2012 Paul Zimmermann, Alexander Kruppa and Cyril Bouvier. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #if !defined (_MSC_VER) #include #endif #include #include "ecm.h" #include "ecm-ecm.h" #ifdef HAVE_FCNTL_H #include #endif #if defined (_MSC_VER) || defined (__MINGW32__) /* needed to declare GetComputerName() for write_resumefile_line() */ #include #endif /* Reads a string of characters from fd while they match the string s. Returns the number of matching characters that were read. */ static int facceptstr (FILE *fd, char *s) { int c; unsigned i = 0; while (s[i] != 0 && (c = fgetc (fd)) != EOF) { if (c != s[i++]) { ungetc (c, fd); return i-1; } } return i; } /* Accepts "\n" or "\r\n" or "\r". Returns 1 if any of the three was read, 0 otherwise */ static int facceptnl (FILE *fd) { int c, r = 0; c = fgetc (fd); if (c == '\r') { c = fgetc (fd); r = 1; } if (c == '\n') r = 1; else if (c != EOF) ungetc (c, fd); return r; } /* Reads a string from fd until the character "delim" or newline is seen, or "len" characters have been written to "s" (including terminating null), or EOF is reached. The "delim" and newline characters are left on the stream. If s is NULL, characters are read from fd but not written anywhere. Returns the number of characters read. */ static int freadstrn (FILE *fd, char *s, char delim, unsigned int len) { unsigned int i = 0; int c; while (i + 1 < len && (c = fgetc (fd)) != EOF) if (c == delim || IS_NEWLINE(c)) { ungetc (c, fd); break; } else if (s != NULL) s[i++] = (char) c; if (i < len && s != NULL) s[i++] = 0; return i; } /* Reads an assignment from a save file. Return 1 if an assignment was successfully read, 0 if there are no more lines to read (at EOF) */ int read_resumefile_line (int *method, mpz_t x, mpcandi_t *n, mpz_t sigma, mpz_t A, mpz_t x0, double *b1, char *program, char *who, char *rtime, char *comment, FILE *fd) { int a, have_method, have_x, have_z, have_n, have_sigma, have_a, have_b1, have_checksum, have_qx; unsigned int saved_checksum; char tag[16]; mpz_t z; while (!feof (fd)) { /* Ignore empty lines */ if (facceptnl (fd)) { continue; } /* Ignore lines beginning with '#'*/ if (facceptstr (fd, "#")) { while (!facceptnl (fd) && !feof (fd)) fgetc (fd); continue; } if (feof (fd)) break; have_method = have_x = have_z = have_n = have_sigma = have_a = have_b1 = have_qx = have_checksum = 0; /* Set optional fields to zero */ mpz_set_ui (sigma, 0); mpz_set_ui (A, 0); if (program != NULL) program[0] = 0; if (who != NULL) who[0] = 0; if (rtime != NULL) rtime[0] = 0; if (comment != NULL) comment[0] = 0; while (!facceptnl (fd) && !feof (fd)) { freadstrn (fd, tag, '=', 16); if (!facceptstr (fd, "=")) { printf ("Save file line has no equal sign after: %s\n", tag); goto error; } if (strcmp (tag, "METHOD") == 0) { if (facceptstr (fd, "ECM") == 3) { *method = ECM_ECM; } else if (facceptstr (fd, "P")) { a = facceptstr (fd, "-1"); if (a == 2) { *method = ECM_PM1; } else if (a == 0 && facceptstr (fd, "+1") == 2) { *method = ECM_PP1; } else goto error; } else goto error; have_method = 1; } else if (strcmp (tag, "X") == 0) { mpz_inp_str (x, fd, 0); have_x = 1; } else if (strcmp (tag, "Z") == 0) { mpz_init (z); mpz_inp_str (z, fd, 0); have_z = 1; } else if (strcmp (tag, "QX") == 0) { mpz_inp_str (x, fd, 0); have_qx = 1; } else if (strcmp (tag, "X0") == 0) { mpz_inp_str (x0, fd, 0); } else if (strcmp (tag, "CHECKSUM") == 0) { if (fscanf (fd, "%u", &saved_checksum) != 1) goto error; have_checksum = 1; } else if (strcmp (tag, "COMMENT") == 0) { freadstrn (fd, comment, ';', 255); } else if (strcmp (tag, "N") == 0) { /*mpz_inp_str (n, fd, 0);*/ /* we want to "maintain" any expressions, which were possibly stored in the file for N */ have_n = read_number (n, fd, 0); } else if (strcmp (tag, "SIGMA") == 0) { mpz_inp_str (sigma, fd, 0); have_sigma = 1; } else if (strcmp (tag, "A") == 0) { mpz_inp_str (A, fd, 0); have_a = 1; } else if (strcmp (tag, "B1") == 0) { if (fscanf (fd, "%lf", b1) != 1) goto error; have_b1 = 1; } else if (strcmp (tag, "PROGRAM") == 0) { freadstrn (fd, program, ';', 255); } else if (strcmp (tag, "WHO") == 0) { freadstrn (fd, who, ';', 255); } else if (strcmp (tag, "TIME") == 0) { freadstrn (fd, rtime, ';', 255); } else /* Not a tag we know about */ { printf ("Save file line has unknown tag: %s\n", tag); goto error; } /* Prime95 lines have no semicolon after SIGMA */ if (!facceptstr (fd, ";") && ! (have_qx && have_n && have_sigma)) { printf ("%s field not followed by semicolon\n", tag); goto error; } while (facceptstr (fd, " ")); } /* Finished reading tags */ /* Handle Prime95 v22 lines. These have no METHOD=ECM field and QX= instead of X= */ if (have_qx) { if (have_n && have_sigma) { *method = ECM_ECM; /* *b1 = 1.0; */ strcpy (program, "Prime95"); mpz_mod (x, x, n->n); return 1; } goto error; } #ifdef DEBUG if (*method != ECM_ECM && (have_sigma || have_a || have_z)) { int count = have_sigma + have_a + have_z; printf ("Warning: Save file line has"); if (have_sigma) { printf (" SIGMA"); mpz_set_ui (sigma, 0); if (--count > 1) printf (","); else if (count > 0) printf (" and"); } if (have_a) { printf (" A"); mpz_set_ui (A, 0); if (--count > 0) printf (" and"); } if (have_z) { printf (" Z"); mpz_clear (Z); have_z = 0; } printf (" value for method other than ECM.\n"); } #endif if (!have_method || !have_x || !have_n || !have_b1 || (method == ECM_ECM && !have_sigma && !have_a)) { fprintf (stderr, "Save file line lacks fields\n"); continue; } if (have_checksum) { mpz_t checksum; mpz_init (checksum); mpz_set_d (checksum, *b1); if (have_sigma) mpz_mul_ui (checksum, checksum, mpz_fdiv_ui (sigma, CHKSUMMOD)); if (have_a) mpz_mul_ui (checksum, checksum, mpz_fdiv_ui (A, CHKSUMMOD)); mpz_mul_ui (checksum, checksum, mpz_fdiv_ui (n->n, CHKSUMMOD)); mpz_mul_ui (checksum, checksum, mpz_fdiv_ui (x, CHKSUMMOD)); if (have_z) mpz_mul_ui (checksum, checksum, mpz_fdiv_ui (z, CHKSUMMOD)); if (mpz_fdiv_ui (checksum, CHKSUMMOD) != saved_checksum) { fprintf (stderr, "Resume file line has bad checksum %u, expected %lu\n", saved_checksum, mpz_fdiv_ui (checksum, CHKSUMMOD)); mpz_clear (checksum); continue; } mpz_clear (checksum); } mpz_mod (x, x, n->n); if (have_z) /* Must normalize */ { if (!mpz_invert (z, z, n->n)) /* Factor found? */ { /* Oh great. What do we do with it now? */ /* mpres_gcd (f, z, n); */ printf ("Oops, factor found while reading from save file.\n"); } mpz_mul (z, z, x); mpz_mod (x, z, n->n); } return 1; error: /* In case of error, read rest of line and try next line */ while (!facceptnl (fd) && !feof (fd)) fgetc (fd); } /* We hit EOF without reading a proper save line */ return 0; } /* Append a residue to the savefile with name given in fn. Returns 1 on success, 0 on error */ int write_resumefile_line (char *fn, int method, double B1, mpz_t sigma, mpz_t A, mpz_t x, mpcandi_t *n, mpz_t x0, const char *comment) { FILE *file; mpz_t checksum; time_t t; char text[256]; char *uname, mname[32]; #if defined(HAVE_FCNTL) && defined(HAVE_FILENO) struct flock lock; int r, fd; #endif #ifdef DEBUG if (fn == NULL) { fprintf (stderr, "write_resumefile_line: fn == NULL\n"); exit (EXIT_FAILURE); } #endif file = fopen (fn, "a"); if (file == NULL) { fprintf (stderr, "Could not open file %s for writing\n", fn); return 0; } #if defined(HAVE_FCNTL) && defined(HAVE_FILENO) /* Try to get a lock on the file so several processes can append to the same file safely */ /* Supposedly some implementations of fcntl() can get confused over garbage in unused fields in a flock struct, so zero it */ memset (&lock, 0, sizeof (struct flock)); fd = fileno (file); lock.l_type = F_WRLCK; lock.l_whence = SEEK_SET; lock.l_start = 0; lock.l_len = 1; /* F_SETLKW: blocking exclusive lock request */ r = fcntl (fd, F_SETLKW, &lock); if (r != 0) { fclose (file); return 0; } fseek (file, 0, SEEK_END); #endif mpz_init (checksum); mpz_set_d (checksum, B1); fprintf (file, "METHOD="); if (method == ECM_PM1) fprintf (file, "P-1"); else if (method == ECM_PP1) fprintf (file, "P+1"); else { fprintf (file, "ECM"); if (mpz_sgn (sigma) != 0) { fprintf (file, "; SIGMA="); mpz_out_str (file, 10, sigma); mpz_mul_ui (checksum, checksum, mpz_fdiv_ui (sigma, CHKSUMMOD)); } else if (mpz_sgn (A) != 0) { fprintf (file, "; A="); mpz_out_str (file, 10, A); mpz_mul_ui (checksum, checksum, mpz_fdiv_ui (A, CHKSUMMOD)); } } fprintf (file, "; B1=%.0f; N=", B1); if (n->cpExpr) fprintf(file, "%s", n->cpExpr); else mpz_out_str (file, 10, n->n); fprintf (file, "; X=0x"); mpz_out_str (file, 16, x); mpz_mul_ui (checksum, checksum, mpz_fdiv_ui (n->n, CHKSUMMOD)); mpz_mul_ui (checksum, checksum, mpz_fdiv_ui (x, CHKSUMMOD)); #ifdef GPUECM fprintf (file, "; CHECKSUM=%lu; PROGRAM=GPU-ECM %s;", mpz_fdiv_ui (checksum, CHKSUMMOD), VERSION_GPU); #else fprintf (file, "; CHECKSUM=%lu; PROGRAM=GMP-ECM %s;", mpz_fdiv_ui (checksum, CHKSUMMOD), VERSION); #endif mpz_clear (checksum); if (mpz_sgn (x0) != 0) { fprintf (file, " X0=0x"); mpz_out_str (file, 16, x0); fprintf (file, ";"); } /* Try to get the users and his machines name */ /* TODO: how to make portable? */ uname = getenv ("LOGNAME"); if (uname == NULL) uname = getenv ("USERNAME"); if (uname == NULL) uname = ""; #if defined (_MSC_VER) || defined (__MINGW32__) /* dummy block, so that the vars needed here don't need to "spill" over to the rest of the function. */ { DWORD size, i; TCHAR T[MAX_COMPUTERNAME_LENGTH+2]; size=MAX_COMPUTERNAME_LENGTH+1; if (!GetComputerName(T, &size)) strcpy(mname, "localPC"); else { for (i = 0; i < sizeof(mname)-1; ++i) mname[i] = T[i]; mname[sizeof(mname)-1] = 0; } } #else if (gethostname (mname, 32) != 0) mname[0] = 0; mname[31] = 0; /* gethostname() may omit trailing 0 if hostname >31 chars */ #endif if (uname[0] != 0 || mname[0] != 0) { fprintf (file, " WHO=%.233s@%.32s;", uname, mname); } if (comment[0] != 0) fprintf (file, " COMMENT=%.255s;", comment); t = time (NULL); strncpy (text, ctime (&t), 255); text[255] = 0; text[strlen (text) - 1] = 0; /* Remove newline */ fprintf (file, " TIME=%s;", text); fprintf (file, "\n"); fflush (file); #if defined(HAVE_FCNTL) && defined(HAVE_FILENO) lock.l_type = F_UNLCK; lock.l_whence = SEEK_SET; lock.l_start = 0; lock.l_len = 1; fcntl (fd, F_SETLKW, &lock); /* F_SETLKW: blocking lock request */ #endif fclose (file); return 1; } ecm-6.4.4/bestd.c0000644023561000001540000002517412106741273010474 00000000000000/* Choice of best parameters for stage 2. Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2010 Paul Zimmermann, Alexander Kruppa, Dave Newman. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include "ecm-impl.h" /* Compute (d, d2, k) such that: (0) k >= k0 (1) d is a multiple of 6 (2) k * d * (eulerphi(d)/2) * d2 / eulerphi(d2) >= B2 - B2min (3) gcd(d, d2) == 1 (4) k is minimal, subject to previous conditions (5) if parameter po2 is != 0, rounds dF up to a power of 2 Return non-zero iff an error occurred (too large step 2 bound). */ /* How we test whether given d,d2,dF,k,i0 parameters cover the desired B2min-B2 range: In stage 2 we generate all values p = f(i * d) +- f(j * d2) with 1. gcd (i, d2) == 1, 2. gcd (j, d) == 1, 3. j == 1 (mod 6), 4. 6|d 5. 1 <= j <= d - 5, (it's -5, not just -1, because of 3. and 4.) 6. i0 <= i <= i1 7. gcd (d, d2) == 1 where f(x) is x^S or the S-th Dickson polynomial g_{S,-1}(x). Extra factors included by S>1 are not considered in this analysis, we assume S=1, f(x)=x so that p = i * d +- j * d2. (Note: i values greater than stated in 3. may be generated if we have to round up dF, for example to a power of 2. However, the root generation code can put anything it likes in those extra roots, so we make no assumption here that this will extend the range of the i values.) Hence the values at the high end of the stage 2 range that are not generated are p = (i1 + n) * d +- j * d2, n > 0 and the smallest one of those is p = (i1 + 1) * d - (d - 5) * d2 = d * (i1 - d2 + 1) + 5 * d2 At the low end of stage 2, values not generated are p = (i0 - n) * d +- j * d2, n > 0 the largest one being p = (i0 - 1) * d + (d - 5) * d2 = d * (i0 + d2 - 1) - 5*d2 Thus, values p that are coprime do d*d2 and d * (i0 + d2 - 1) - 5*d2 + 1 <= p <= d * (i1 - d2 + 1) + 5 * d2 - 1 are included in stage 2. The number of roots of G we compute is k * dF. For d2 == 1, this means i1 = i0 + k * dF - 1 (-1 because both i0 and i1 are included). For d2 > 1, values j not coprime to d2 are skipped (see condition 1). The number of values in [1, i0] that are not coprime to d2 (with d2 prime) is floor (i0 / d2); in [1, i1] it is floor (i1 / d2). So we require that k * dF >= i1 - i0 + 1 - (floor (i1 / d2) - floor (i0 / d2)) */ int bestD (root_params_t *root_params, unsigned long *finalk, unsigned long *finaldF, mpz_t B2min, mpz_t B2, int po2, int use_ntt, double maxmem, int treefile, mpmod_t modulus) { /* the following list contains successive values of b with increasing values of eulerphi(b). It was generated by the following Maple program: l := [[1,1]]: for b from 12 by 6 do d:=numtheory[phi](b)/2; while d <= l[nops(l)][2] do l:=subsop(nops(l)=NULL, l) od; n := nops(l); if b>1.1*l[n][1] then l := [op(l), [b,d]]; lprint(l) fi; od: */ #define N 109 static unsigned int l[N] = {12, 18, 30, 42, 60, 90, 120, 150, 210, 240, 270, 330, 420, 510, 630, 840, 1050, 1260, 1470, 1680, 1890, 2310, 2730, 3150, 3570, 3990, 4620, 5460, 6090, 6930, 8190, 9240, 10920, 12180, 13860, 16170, 18480, 20790, 23100, 30030, 34650, 39270, 43890, 48510, 60060, 66990, 78540, 90090, 99330, 120120, 133980, 150150, 180180, 210210, 240240, 270270, 300300, 334950, 371280, 420420, 510510, 570570, 600600, 630630, 746130, 870870, 1021020, 1141140, 1291290, 1531530, 1711710, 1891890, 2081310, 2312310, 2552550, 2852850, 3183180, 3573570, 3993990, 4594590, 5105100, 5705700, 6322470, 7147140, 7987980, 8978970, 10210200, 11741730, 13123110, 14804790, 16546530, 19399380, 21411390, 23993970, 26816790, 29609580, 33093060, 36606570, 40330290, 44414370, 49639590, 54624570, 60090030, 67897830, 77597520, 87297210, 96996900, 107056950, 118107990}; #define Npo2 23 static unsigned int lpo2[Npo2] = {12, 30, 60, 120, 240, 510, 1020, 2310, 4620, 9240, 19110, 39270, 79170, 158340, 324870, 690690, 1345890, 2852850, 5705700, 11741730, 23130030, 48498450, 96996900}; unsigned long i, d1 = 0, d2 = 0, dF = 0, phid, k, maxN; mpz_t j, t, i0, i1; int r = 0; if (mpz_cmp (B2, B2min) < 0) { /* No stage 2. Set relevant parameters to 0. Leave B2, B2min the same */ *finalk = 0; *finaldF = 0; return 0; } MPZ_INIT (i0); MPZ_INIT (i1); MPZ_INIT (j); MPZ_INIT (t); k = *finalk; /* User specified k value passed in via finalk */ /* Look for largest dF we can use while satisfying the maxmem parameter */ maxN = (po2) ? Npo2 : N; if (maxmem != 0.) { for (i = 0; i < maxN; i++) { int lg_dF, sp_num = 0; double memory; d1 = (po2) ? lpo2[i] : l[i]; phid = eulerphi (d1) / 2; dF = (po2) ? 1U << ceil_log2 (phid) : phid; lg_dF = ceil_log2 (dF); if (use_ntt) sp_num = (2 * mpz_sizeinbase (modulus->orig_modulus, 2) + lg_dF) / SP_NUMB_BITS + 4; memory = memory_use (dF, sp_num, (treefile) ? 0 : lg_dF, modulus); outputf (OUTPUT_DEVVERBOSE, "Estimated mem for dF = %.0d, sp_num = %d: %.0f\n", dF, sp_num, memory); if (memory > maxmem) break; } maxN = i; } for (i = 0; i < maxN; i++) { d1 = (po2) ? lpo2[i] : l[i]; phid = eulerphi (d1) / 2; dF = (po2) ? 1U << ceil_log2 (phid) : phid; /* Look for smallest prime < 25 that does not divide d1 */ /* The caller can force d2 = 1 by setting root_params->d2 != 0 */ d2 = 1; if (root_params->d2 == 0) for (d2 = 5; d2 < 25; d2 += 2) { if (d2 % 3 == 0) continue; if (d1 % d2 > 0) break; } if (d2 >= 25 || d2 - 1 > dF) d2 = 1; #if 0 /* The code to init roots of G can handle negative i0 now. */ if (d2 > 1 && mpz_cmp_ui (B2min, (d1 - 1) * d2 - d1) <= 0) d2 = 1; /* Would make i0 < 0 */ #endif mpz_set_ui (i0, d1 - 1); mpz_mul_ui (i0, i0, d2); mpz_set (j, B2); mpz_add (i1, j, i0); /* i1 = B2 + (d1 - 1) * d2 */ mpz_set (j, B2min); mpz_sub (i0, j, i0); /* i0 = B2min - (d1 - 1) * d2 */ mpz_cdiv_q_ui (i0, i0, d1); /* i0 = ceil ((B2min - (d1 - 1) * d2) / d1) */ mpz_fdiv_q_ui (i1, i1, d1); /* i1 = floor ((B2 + (d1 - 1) * d2) / d1) */ /* How many roots of G will we need ? */ mpz_sub (j, i1, i0); mpz_add_ui (j, j, 1); /* Integer multiples of d2 are skipped (if d2 > 1) */ if (d2 > 1) { mpz_fdiv_q_ui (t, i1, d2); mpz_sub (j, j, t); mpz_fdiv_q_ui (t, i0, d2); mpz_add (j, j, t); /* j -= floor (i1 / d2) - floor (i0 / d2) */ } /* How many blocks will we need ? Divide lines by dF, rounding up */ mpz_cdiv_q_ui (j, j, dF); if ((k != ECM_DEFAULT_K && mpz_cmp_ui (j, k) <= 0) || (k == ECM_DEFAULT_K && mpz_cmp_ui (j, (po2) ? 6 : 2) <= 0)) break; } if (i == maxN) { if (k != ECM_DEFAULT_K) { /* The user asked for a specific k and we couldn't satisfy the condition. Nothing we can do ... */ outputf (OUTPUT_ERROR, "Error: too large step 2 bound, increase -k\n"); r = ECM_ERROR; goto clear_and_exit; } else if (!mpz_fits_ulong_p (j)) { /* Can't fit the number of blocks in an unsigned long. Nothing we can do ... */ outputf (OUTPUT_ERROR, "Error: stage 2 interval too large, cannot " "generate suitable parameters.\nTry a smaller B2 value.\n"); r = ECM_ERROR; goto clear_and_exit; } if (maxN == 0) { /* We can't do a stage 2 at all with the memory the user allowed. Nothing we can do ... */ outputf (OUTPUT_ERROR, "Error: stage 2 not possible with memory " "allowed by -maxmem.\n"); r = ECM_ERROR; goto clear_and_exit; } /* else: We can fit the number of blocks into an unsigned int, so we use it. This may be a very large value for huge B2-B2min, the user is going to notice sooner or later */ } /* If the user specified a number of blocks, we'll use that no matter what. Since j may be smaller than k, this may increase the B2 limit */ if (k == ECM_DEFAULT_K) k = mpz_get_ui (j); /* Now that we have the number of blocks, compute real i1. There will be k * dF roots of G computed, starting at i0, skipping all that are not coprime to d2. While d2 is prime, that means: are not multiples of d2. Hence we want i1 so that i1 - floor(i1 / d2) - i0 + ceil((i0 / d2) == k * dF i1 - floor(i1 / d2) == k * dF + i0 - ceil((i0 / d2) */ mpz_set_ui (j, k); mpz_mul_ui (j, j, dF); if (d2 == 1) { mpz_add (i1, i0, j); mpz_sub_ui (i1, i1, 1); } else { mpz_add (j, j, i0); mpz_cdiv_q_ui (t, i0, d2); mpz_sub (j, j, t); /* j = k * dF + i0 - ceil((i0 / d2) */ mpz_fdiv_qr_ui (j, t, j, d2 - 1); mpz_mul_ui (j, j, d2); mpz_add (i1, j, t); } root_params->d1 = d1; root_params->d2 = d2; mpz_set (root_params->i0, i0); *finaldF = dF; *finalk = k; /* We want B2' the largest integer that satisfies i1 = floor ((B2' + (d1 - 1) * d2) / d1) = floor ((B2'-d2)/d1) + d2 i1 - d2 = floor ((B2'-d2)/d1) (B2'-d2)/d1 < i1-d2+1 B2'-d2 < (i1-d2+1) * d1 B2' < (i1-d2+1) * d1 + d2 B2' = (i1-d2+1) * d1 + d2 - 1 */ mpz_sub_ui (i1, i1, d2 - 1); mpz_mul_ui (B2, i1, d1); mpz_add_ui (B2, B2, d2 - 1); clear_and_exit: mpz_clear (t); mpz_clear (j); mpz_clear (i1); mpz_clear (i0); return r; } ecm-6.4.4/Fgw.c0000644023561000001540000003355612106741273010121 00000000000000/* Interface code for George Woltman's gwnum library Copyright 2004, 2005, 2006, 2008, 2011, 2012 Paul Zimmermann, Alexander Kruppa, David Cleaver. Contains code based on the GWNUM library, copyright 2002-2005 George Woltman, Just For Fun Software, Inc. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include /* for rint */ #include #include "ecm-gmp.h" #include "ecm.h" #include "ecm-impl.h" #define ADD_UNDERSCORES #include "gwdbldbl.h" #include "gwnum.h" #include "cpuid.h" void __gxx_personality_v0() { exit (EXIT_FAILURE); } void __cxa_guard_acquire () { return; } void __cxa_guard_release () { return; } static int sgn (const int i) { if (i == 0) return 0; return i > 0 ? 1 : -1; } /* With the following 2 functions, we try to find a representation of an input number in the form of z = k*b^n+c. If such a representation was found, set the the appropriate values and return 1. Otherwise, set b to zero and return 0. */ /* This function searches for a representation of z of the form k*b^n+c */ int kbnc_z (double *k, unsigned long *b, unsigned long *n, signed long *c, mpz_t z) { int i = 0; int j = 0; int exp = 1; int check_it_out = 0; int ret = 0; mpz_t diff; mpz_t abs_diff; mpz_t b_n; /* this will = base^exp */ mpz_t k_b_n; /* this will = k*b^n */ mpz_t test_k; mpz_t max_k; mpz_t lhs; /* used for finding the k value */ mpz_t rhs; /* used for finding the k value */ mpz_t base; mpz_t base_min; mpz_t base_max; unsigned long test_k_ui = 0; /* this puts a bound on how large our C value can be */ int max_diff = 8388607; /* make sure we have a place to put our results */ if (k == NULL || b == NULL || n == NULL || c == NULL) return 0; /* make sure the input meets some sort of minimum size requirement. The gwnum library reports ES1_CANNOT_DO_QUICKLY for number < 2^350 */ if (mpz_sizeinbase(z, 2) < 350) { *b = 0; return 0; } mpz_init (diff); mpz_init (abs_diff); mpz_init (b_n); mpz_init (k_b_n); mpz_init (lhs); mpz_init (rhs); mpz_init (test_k); mpz_init (base); mpz_init_set_ui (base_min, 2); mpz_init_set_ui (base_max, 10000); /* this puts a bound on how large of a k value we want to find */ mpz_init_set_str (max_k, "562949953421312", 10); /* when dividing: z/(base^exp) this will give us a possible k value */ /* we want a quick test to see if this might be a viable k value */ /* so, we want this k value to be close to an integer */ /* ie, test_k = 13.99999, is pretty close to the integer 14 */ /* since it is "pretty close", we can test this k value. */ /* whereas test_k = 13.5689, is not "pretty close" to an integer */ /* so, we will not run extra tests with this k value */ /* should we change this based on the size of z? */ /* for now, the code checks to see whether test_k is with 1/1000 of an integer */ for (mpz_set (base, base_min); mpz_cmp (base, base_max) <= 0; mpz_add_ui (base, base, 1)) { exp = (mpz_sizeinbase (z, 2) - 1) / (mpz_sizeinbase (base, 2) - 1) + 1; mpz_pow_ui (b_n, base, exp); /* base^exp should be > z here */ while (1) { check_it_out = 0; /* 0 */ mpz_tdiv_q (test_k, z, b_n); if (mpz_cmp(test_k, max_k) > 0) break; /* check to see if test_k is "pretty close" to the next smallest integer: z/b_n - test_k <= 1/1000 # z/b_n should be > test_k here z/b_n <= 1/1000 + test_k 1000*z/b_n <= 1 + 1000*test_k if (1000*z <= b_n + 1000*b_n*test_k) */ mpz_mul_ui (lhs, z, 1000); mpz_mul (rhs, b_n, test_k); mpz_mul_ui (rhs, rhs, 1000); mpz_add (rhs, rhs, b_n); if (mpz_cmp (lhs, rhs) <= 0) check_it_out = 1; /* check to see if test_k is "pretty close" to the next largest integer */ if (!check_it_out) { mpz_add_ui (test_k, test_k, 1); /* test_k - z/b_n <= 1/1000 # test_k should be > z/b_n here */ /* test_k <= 1/1000 + z/b_n */ /* test_k - 1/1000 <= z/b_n */ /* 1000*test_k - 1 <= 1000*z/b_n */ /* if (1000*b_n*test_k - b_n <= 1000*z) */ mpz_mul (lhs, b_n, test_k); mpz_mul_ui (lhs, lhs, 1000); mpz_sub (lhs, lhs, b_n); mpz_mul_ui (rhs, z, 1000); if (mpz_cmp (lhs, rhs) <= 0) check_it_out = 1; } if (check_it_out) { mpz_mul (k_b_n, b_n, test_k); mpz_sub (diff, z, k_b_n); mpz_abs (abs_diff, diff); if (mpz_cmp_ui (abs_diff, max_diff) <= 0) { /* make sure k and c are relatively prime */ if (mpz_gcd_ui (NULL, test_k, mpz_get_ui (diff)) == 1) { /* we are done!!! */ *k = mpz_get_d (test_k); *b = mpz_get_ui (base); *n = exp; *c = mpz_get_si (diff); ret = 1; goto end_kbnc; } else { *b = 0; ret = 0; goto end_kbnc; } } } mpz_divexact (b_n, b_n, base); exp--; } } /* if we get down here, then we couldn't find a representation k*b^n + c */ end_kbnc: mpz_clear (diff); mpz_clear (abs_diff); mpz_clear (b_n); mpz_clear (k_b_n); mpz_clear (lhs); mpz_clear (rhs); mpz_clear (test_k); mpz_clear (max_k); mpz_clear (base); mpz_clear (base_min); mpz_clear (base_max); return ret; } /* This function searches for a nice representation of z We are trying to see if z = k*b^n + c Some examples that we can find: "3^218+5123" "(199*3^218+5123)/(2*17*587*1187)" "(199*3^218 + 5123)/2/17/587/1187" */ int kbnc_str (double *k, unsigned long *b, unsigned long *n, signed long *c, char *z, mpz_t num) { int i = 0; int total = 0; char strk[11]; char strb[11]; char strn[11]; char strc[11]; mpz_t tmp; /* make sure we have a place to put our results */ if (k == NULL || b == NULL || n == NULL || c == NULL || z == NULL) return 0; *b = 0; for (i = 0; i < strlen(z); i++) { if (z[i] == '(' || z[i] == '{' || z[i] == '[') continue; /* check to see if the input is k*b^n+c */ total = sscanf (z+i, "%10[0-9]*%10[0-9]^%10[0-9]%*[ +]%10[0-9]", strk, strb, strn, strc); if (total == 4) { *k = (double) strtoul (strk, NULL, 10); *b = strtoul (strb, NULL, 10); *n = strtoul (strn, NULL, 10); *c = strtol (strc, NULL, 10); break; } /* check to see if the input is k*b^n-c */ total = sscanf (z+i, "%10[0-9]*%10[0-9]^%10[0-9]%*[ -]%10[0-9]", strk, strb, strn, strc); if (total == 4) { *k = (double) strtoul (strk, NULL, 10); *b = strtoul (strb, NULL, 10); *n = strtoul (strn, NULL, 10); *c = strtol (strc, NULL, 10); *c *= -1; break; } /* check to see if the input is b^n+c (k = 1) */ total = sscanf (z+i, "%10[0-9]^%10[0-9]%*[ +]%10[0-9]", strb, strn, strc); if (total == 3) { *k = 1.0; *b = strtoul (strb, NULL, 10); *n = strtoul (strn, NULL, 10); *c = strtol (strc, NULL, 10); break; } /* check to see if the input is b^n-c (k = 1) */ total = sscanf (z+i, "%10[0-9]^%10[0-9]%*[ -]%10[0-9]", strb, strn, strc); if (total == 3) { *k = 1.0; *b = strtoul (strb, NULL, 10); *n = strtoul (strn, NULL, 10); *c = strtol (strc, NULL, 10); *c *= -1; break; } break; } /* first, check to see if we found a k*b^n+c */ if (*b) { /* if we did, make sure that (k*b^n+c) is divisible by num */ mpz_init_set_ui (tmp, *b); mpz_pow_ui (tmp, tmp, *n); mpz_mul_ui (tmp, tmp, (unsigned long) *k); if (*c >= 0) mpz_add_ui (tmp, tmp, *c); else mpz_sub_ui (tmp, tmp, (*c * -1)); if (mpz_divisible_p (tmp, num)) return 1; } /* set b to zero so users have a second way to know we didn't find k,b,n,c */ *b = 0; /* if we get here, we didn't find a formula k*b^n+c for z */ return 0; } /* this method doesn't care if v is 32 or 64 bits... */ unsigned long gw_log_2(unsigned long v) { unsigned long r = 0; /* r will be lg(v) */ while (v >>= 1) { r++; } return r; } int gw_ecm_stage1 (mpz_t f, curve *P, mpmod_t modulus, double B1, double *B1done, mpz_t go, double gw_k, unsigned long gw_b, unsigned long gw_n, signed long gw_c) { ecm_uint gw_B1done = *B1done; unsigned long siz_x, siz_z; /* Size of gw_x and gw_y as longs */ mpz_t gw_x, gw_z, gw_A; int youpi; if (mpz_cmp_ui (go, 1) > 0) { mpres_t b; mpres_init (b, modulus); mpres_add_ui (b, P->A, 2, modulus); mpres_div_2exp (b, b, 2, modulus); /* b == (A+2)/4 */ ecm_mul (P->x, P->y, go, modulus, b); mpres_clear (b, modulus); } outputf (OUTPUT_VERBOSE, "Using gwnum_ecmStage1(%.0f, %d, %d, %d, %.0f, %ld)\n", gw_k, gw_b, gw_n, gw_c, B1, gw_B1done); /* Copy x, z and A values from modular representation to plain integers */ /* Allocate enough memory for any residue (mod k*b^n+c) for x, z */ mpz_init2 (gw_x, (gw_n+1)*gw_log_2(gw_b)+64); mpz_init2 (gw_z, (gw_n+1)*gw_log_2(gw_b)+64); mpz_init (gw_A); /* mpres_get_z always produces non-negative integers */ mpres_get_z (gw_x, P->x, modulus); mpres_get_z (gw_z, P->y, modulus); mpres_get_z (gw_A, P->A, modulus); /* gwnum_ecmStage1() wants long int pointers for size_x, size_z, so copy them into long int vars */ siz_x = SIZ(gw_x); siz_z = SIZ(gw_z); /* George Woltman says that the gwnum library can handle k values up to 49 or 50 bits long, and the maximum c value is +/-8388607 */ ASSERT_ALWAYS (gw_k == rint (gw_k)); /* check that k is an integer */ ASSERT_ALWAYS (1.0 <= gw_k && gw_k <= 562949953421312.0); ASSERT_ALWAYS (-8388607 <= gw_c && gw_c <= 8388607); #if GMP_NUMB_BITS <= 32 youpi = gwnum_ecmStage1_u32 (gw_k, gw_b, gw_n, gw_c, PTR(modulus->orig_modulus), ABSIZ(modulus->orig_modulus), B1, &gw_B1done, PTR(gw_A), ABSIZ(gw_A), PTR(gw_x), &siz_x, PTR(gw_z), &siz_z, NULL, 0); #else /* contributed by David Cleaver */ youpi = gwnum_ecmStage1_u64 (gw_k, gw_b, gw_n, gw_c, PTR(modulus->orig_modulus), ABSIZ(modulus->orig_modulus), B1, &gw_B1done, PTR(gw_A), ABSIZ(gw_A), PTR(gw_x), &siz_x, PTR(gw_z), &siz_z, NULL, 0); #endif /* Test that not more was written to gw_x and gw_z than we had space for */ ASSERT_ALWAYS (siz_x <= (unsigned long) ALLOC(gw_x)); ASSERT_ALWAYS (siz_z <= (unsigned long) ALLOC(gw_z)); SIZ(gw_x) = siz_x; SIZ(gw_z) = siz_z; outputf (OUTPUT_DEVVERBOSE, "gw_ecm_stage1: after gwnum_ecmStage1, \n" "B1done = %lu, x = %Zd\nz = %Zd\n", gw_B1done, gw_x, gw_z); /* Copy x, z back to P and clean up the temp vars */ mpres_set_z (P->x, gw_x, modulus); mpres_set_z (P->y, gw_z, modulus); mpz_clear (gw_A); mpz_clear (gw_z); mpz_clear (gw_x); *B1done = gw_B1done; /* Here is a list of gwnum return codes. */ /* In the case of 2 or 5, we should continue on and let gmp-ecm */ /* do stage 1, instead of throwing an error and quitting */ /* #define ES1_SUCCESS 0 *//* Success, but no factor */ /* #define ES1_FACTOR_FOUND 1 *//* Success, factor found */ /* #define ES1_CANNOT_DO_IT 2 *//* This k,b,n,c cannot be handled */ /* #define ES1_MEMORY 3 *//* Out of memory */ /* #define ES1_INTERRUPT 4 *//* Execution interrupted */ /* #define ES1_CANNOT_DO_QUICKLY 5 *//* Requires 3-multiply reduction */ /* #define ES1_HARDWARE_ERROR 6 *//* An error was detected, most */ if (youpi == ES1_CANNOT_DO_IT || youpi == ES1_CANNOT_DO_QUICKLY) { outputf (OUTPUT_VERBOSE, "Notice: Did not use gwnum_ecmStage1(%.0f, %d, %d, %d, %.0f, %ld)\n", gw_k, gw_b, gw_n, gw_c, B1, gw_B1done); youpi = ECM_NO_FACTOR_FOUND; goto end_of_gwecm; } if (youpi > 1) { outputf (OUTPUT_ERROR, "GW stage 1 returned code %d\n", youpi); youpi = ECM_ERROR; goto end_of_gwecm; } if (youpi == 1) { /* How did that happen? Since we passed z, GWNUM should not do an extgcd and so not find factors... but if it did anyways, we deal with it. Who's going to turn down a factor? */ outputf (OUTPUT_DEVVERBOSE, "gw_ecm_stage1: Strange, gwnum_ecmStage1 reports a factor\n"); mpres_get_z (f, P->x, modulus); youpi = ECM_FACTOR_FOUND_STEP1; goto end_of_gwecm; } /* Normalize z (in P->y) to 1 */ youpi = ECM_NO_FACTOR_FOUND; if (!mpres_invert (P->y, P->y, modulus)) /* Factor found? */ { mpres_gcd (f, P->y, modulus); youpi = ECM_FACTOR_FOUND_STEP1; } else { mpres_mul (P->x, P->x, P->y, modulus); mpres_set_ui (P->y, 1UL, modulus); } end_of_gwecm: return youpi; } ecm-6.4.4/test.pp10000755023561000001540000001043212106741273010622 00000000000000#!/bin/sh # test file for P+1 method # # Copyright 2002, 2003, 2005, 2006, 2007, 2008, 2009, 2012 Jim Fougeron, # Alexander Kruppa, Dave Newman and Paul Zimmermann. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License # along with this program; see the file COPYING. If not, see # http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., # 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. PP1="$1 -pp1" # Call with "checkcode $? n" to check that return code is n # (see test.pm1 for the explanation of the different return codes) checkcode () { if [ $1 != $2 ] then echo "############### ERROR ###############" echo "Expected return code $2 but got $1" exit 1 fi } checkcode2 () { if [ $1 != $2 ] then if [ $1 != $3 ] then echo "############### ERROR ###############" echo "Expected return code $2 or $3 but got $1" exit 1 fi fi } # P+1 requires that sigma^2-4 is a quadratic non-residue mod p echo 328006342451 | $PP1 -x0 5 120 7043; checkcode $? 8 # check rational seed echo 328006342451 | $PP1 -x0 1/5 120 7043; checkcode $? 8 # try primes < d in stage 2 echo 2050449218179969792522461197 | $PP1 -x0 6 -k 1 20 0-1e6; checkcode $? 14 echo 6215074747201 | $PP1 -x0 5 630 199729; checkcode $? 8 # bug in 6.1.3 echo 6215074747201 | $PP1 -power 2 -x0 5 630 199729; checkcode $? 8 echo 6215074747201 | $PP1 -dickson 3 -x0 5 630 199729; checkcode $? 8 echo 8857714771093 | $PP1 -x0 3 23251 49207; checkcode $? 8 echo 236344687097 | $PP1 -x0 3 619 55001; checkcode $? 8 echo 87251820842149 | $PP1 -x0 5 3691 170249; checkcode $? 8 echo 719571227339189 | $PP1 -x0 4 41039 57679; checkcode $? 8 echo 5468575720021 | $PP1 -x0 6 1439 175759; checkcode $? 8 echo 49804972211 | $PP1 -x0 5 15443 268757; checkcode $? 8 echo 329573417220613 | $PP1 -x0 3 5279 101573; checkcode $? 8 echo 4866979762781 | $PP1 -x0 4 7309 97609; checkcode $? 8 echo 187333846633 | $PP1 -x0 3 2063 9851; checkcode $? 8 echo 332526664667473 | $PP1 -x0 3 65993 111919; checkcode $? 8 echo 265043186297 | $PP1 -x0 3 8761 152791; checkcode $? 8 echo 207734163253 | $PP1 -x0 3 1877 4211; checkcode $? 8 echo 225974065503889 | $PP1 -x0 5 -k 5 7867 8243; checkcode $? 8 echo 660198074631409 | $PP1 -x0 5 22541 115679; checkcode $? 8 echo 563215815517 | $PP1 -x0 3 3469 109849; checkcode $? 8 # test B2min-B2 echo 563215815517 | $PP1 -x0 3 3469 109849-109849; checkcode $? 8 echo 409100738617 | $PP1 -x0 3 19 19; checkcode $? 8 # p37 from 45^123+1 found by Peter Montgomery with B1=30M echo 2277189375098448170118558775447117254551111605543304035536750762506158547102293199086726265869065639109 | $PP1 -x0 3 2337233 132554351 checkcode $? 14 # bug in ecm-5.0 (overflow in fin_diff_coeff) echo 630503947831861669 | $PP1 -x0 5 7 9007199254740000-9007199254741000; checkcode $? 8 # bug in ecm-6.0.1 on 64-bit machines. The error message "Error, maximal # step1 bound for P+1 is ..." on 32-bit machines is normal. echo "NOTE: NEXT TEST WILL FAIL ON 32BIT MACHINES, THIS IS EXPECTED." echo 8589934621 | $PP1 -x0 10 4294967310-4294967311 1; checkcode2 $? 1 8 # A test with a larger input number to test modular arithmetic routines not # in mulredc*.asm. This input has 1363 bits so it has 22 64 bit words # (43 32 bit words) and cannot use mulredc which handles only up to 20 limbs echo "6054018161*10^400+417727253109" | $PP1 -x0 4 2e3 2e6; checkcode $? 14 # Bug reported by Andreas Schickel: on 32 bit systems, the code in lucas.c # for generating Lucas chains is prone to causing integer overflows, giving # incorrect chains for some primes. This test exhibits the bug on 32 bit # systems but works on 64 bit echo 154618728587 | $PP1 -x0 3 -go 36 4294957296-4294967295 1; checkcode $? 8 echo 18446744073709551337 | $PP1 -pp1 -x0 2 70823 714487; checkcode $? 8 echo "All P+1 tests are ok." ecm-6.4.4/toomcook.c0000644023561000001540000003261312106741273011221 00000000000000/* Implementation of the Toom-Cook 3-way and 4-way algorithms for polynomial convolution products. Copyright 2001, 2002, 2003, 2004, 2005, 2006 Paul Zimmermann, Alexander Kruppa and Dave Newman. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "ecm-impl.h" #define A0 A[i] #define A1 A[l+i] #define A2 A[2*l+i] #define B0 B[i] #define B1 B[l+i] #define B2 B[2*l+i] #define C0 C[i] #define C1 C[l+i] #define C2 C[2*l+i] #define C3 C[3*l+i] #define C4 C[4*l+i] #define t0 t[i] #define t2 t[2*l+i-1] #define T t[4*l-2] /* Puts in C[0..2len-2] the product of A[0..len-1] and B[0..len-1]. This version works for all input sizes, but cannot handle input arrays overlapping with output. Assumes len >= 1. The auxiliary memory M(len) necessary in t satisfies: M(0) = 0, M(1) = 0, M(2) = 1, M(3) = 3, otherwise M(len) = 2*(2*l-1) + max(M(l), 1) with l = ceil(len/3). We prove M(len) <= 2*len + 2 * k with k = ceil(log[3](len)) by induction: 4*l-2 + max(M(l), 1) <= 4*l-2 + max(2*l + 2 * (k-1), 1) <= 6*l - 2 + 2 * (k-1) <= 2*(len+2) - 2 + 2 * (k-1) <= 2*len + 2 * k */ void toomcook3 (listz_t C, listz_t A, listz_t B, unsigned int len, listz_t t) { int i, l, k; if (len <= 2 || len == 4) { karatsuba (C, A, B, len, t); return; } l = (len + 2) / 3; /* ceil(len/3) */ k = len - 2 * l; /* smaller part */ for (i = 0; i < k; i++) /* uses t[0..3*l+k-1] */ { mpz_add (C0, A0, A2); mpz_sub (C2, C0, A1); /* C2 = A0 - A1 + A2 = A(-1) */ mpz_add (C0, C0, A1); /* C0 = A0 + A1 + A2 = A(1) */ mpz_add (C1, B0, B2); mpz_sub (C3, C1, B1); /* C3 = B0 - B1 + B2 = B(-1) */ mpz_add (C1, C1, B1); /* C1 = B0 + B1 + B2 = B(1) */ } for (; i < l; i++) /* uses t[0..4*l-1] */ { /* A2 and B2 are smaller than the rest */ mpz_add (C0, A0, A1); mpz_sub (C2, A0, A1); mpz_add (C1, B0, B1); mpz_sub (C3, B0, B1); } toomcook3 (t, C + 2 * l, C + 3 * l, l, &T); /* t0 = C2*C3 = A(-1)*B(-1) = C(-1), len(t0) = 2*l-1 */ for (i = 0; i < k; i++) { mpz_mul_2exp (C2, A2, 1); /* C2 = A(2), C3 = B(2) */ mpz_add (C2, C2, A1); mpz_mul_2exp (C2, C2, 1); mpz_add (C2, C2, A0); mpz_mul_2exp (C3, B2, 1); mpz_add (C3, C3, B1); mpz_mul_2exp (C3, C3, 1); mpz_add (C3, C3, B0); } for (; i < l; i++) { mpz_mul_2exp (C2, A1, 1); mpz_add (C2, C2, A0); mpz_mul_2exp (C3, B1, 1); mpz_add (C3, C3, B0); } toomcook3 (t + 2 * l - 1, C + 2 * l, C + 3 * l, l, &T); /* t2 = C2*C3 = A(2)*B(2) = C(2), len(t2) = 2*l-1 */ toomcook3 (C + 2 * l, C, C + l, l, &T); /* C2 = C0*C1 = A(1)*B(1) = C(1), len(C1) = 2*l-1 */ toomcook3 (C, A, B, l, &T); /* C0 = A(0)*B(0) = C(0), len(C0) = 2*l-1 */ toomcook3 (C + 4 * l, A + 2 * l, B + 2 * l, k, &T); /* C4 = A(inf)*B(inf) = C(inf), len(C4) = 2*k-1 */ /* C0: C_0 C2: C(1) C4: C_4 t0: C(-1) t2: C(2) */ /* C_3 = A_1 * B_2 + A_2 * B_1, len(C_3) = l+k-1 We need not bother to compute C_3[2l-1] if k= 0. For len=3, toomcook4 would use 6 multiplies, toomcook3 uses only 5. For len=6, toomcook4 would use 18 multiplies, toomcook3 only 15. For len=9, toomcook4 would use 30 multiplies, toomcook3 only 25. Further values where toomcook3 is faster are 17,18,26,27,77,78,79,80,81. */ if (len <= 2) { karatsuba (C, A, B, len, t); return; } if (len == 3 || len == 5 || len == 6 || len == 9 || len == 17 || len == 18 || (25 <= len && len <= 27) || (77 <= len && len <= 81)) { toomcook3 (C, A, B, len, t); return; } l = (len + 3) / 4; /* l = ceil(len/4) */ k = len - 3 * l; /* k = smaller part. len = 3*l + k, k <= l */ for (i = 0; i < l; i++) { /*** Evaluate A(2), A(-2), 8*A(1/2) ***/ mpz_mul_2exp (C0, A0, 1); mpz_add (C0, C0, A1); mpz_mul_2exp (C0, C0, 1); mpz_add (C0, C0, A2); mpz_mul_2exp (C0, C0, 1); if (i < k) { mpz_add (C0, C0, A3); /* C[0 .. l-1] = 8*A(1/2) */ mpz_mul_2exp (C2, A3, 2); mpz_add (C2, C2, A1); mpz_mul_2exp (C2, C2, 1); /* C[2l .. 3l-1] = 8*A_3 + 2*A_1 */ } else mpz_mul_2exp (C2, A1, 1); mpz_mul_2exp (T, A2, 2); mpz_add (T, T, A0); /* T = 4*A_2 + A0 */ mpz_sub (C4, T, C2); /* C[4l .. 5l-1] = A(-2) */ mpz_add (C2, C2, T); /* C[2l .. 3l-1] = A(2) */ #ifdef DEBUG gmp_fprintf (ECM_STDOUT, "8*A(1/2)[%d] = %Zd\n", i, C0); gmp_fprintf (ECM_STDOUT, "A(2)[%d] = %Zd\n", i, C2); gmp_fprintf (ECM_STDOUT, "A(-2)[%d] = %Zd\n", i, C4); #endif /*** Evaluate B(2), B(-2), 8*B(1/2) ***/ mpz_mul_2exp (C1, B0, 1); mpz_add (C1, C1, B1); mpz_mul_2exp (C1, C1, 1); mpz_add (C1, C1, B2); mpz_mul_2exp (C1, C1, 1); if (i < k) { mpz_add (C1, C1, B3); /* C[l .. 2l-1] = 8*B(1/2) */ mpz_mul_2exp (C3, B3, 2); mpz_add (C3, C3, B1); mpz_mul_2exp (C3, C3, 1); /* C[3l .. 3l+k-1] = 8*B_3 + 2*B_1 */ } else mpz_mul_2exp (C3, B1, 1); mpz_mul_2exp (T, B2, 2); mpz_add (T, T, B0); /* T = 4*B_2 + B0 */ mpz_sub (C5, T, C3); /* C[5l .. 5l+k-1] = B(-2) */ mpz_add (C3, C3, T); /* C[3l .. 3l+k-1] = B(2) */ #ifdef DEBUG gmp_fprintf (ECM_STDOUT, "8*B(1/2)[%d] = %Zd\n", i, C1); gmp_fprintf (ECM_STDOUT, "B(2)[%d] = %Zd\n", i, C3); gmp_fprintf (ECM_STDOUT, "B(-2)[%d] = %Zd\n", i, C5); #endif } toomcook4 (t, C, C + l, l, &T); /* t0 = 8*A(1/2) * 8*B(1/2) = 64*C(1/2) */ toomcook4 (t + 2 * l - 1, C + 2 * l, C + 3 * l, l, &T); /* t2 = A(2) * B(2) = C(2) */ toomcook4 (t + 4 * l - 2, C + 4 * l, C + 5 * l, l, &T); /* t4 = A(-2) * B(-2) = C(-2) */ for (i = 0; i < l; i++) { mpz_add (C0, A0, A2); mpz_add (C1, B0, B2); if (i < k) { mpz_add (T, A1, A3); mpz_sub (C2, C0, T); /* C2 = A(-1) */ mpz_add (C0, C0, T); /* C0 = A(1) */ mpz_add (T, B1, B3); mpz_sub (C3, C1, T); /* C3 = B(-1) */ mpz_add (C1, C1, T); /* C1 = B(1) */ } else { mpz_sub (C2, C0, A1); mpz_add (C0, C0, A1); mpz_sub (C3, C1, B1); mpz_add (C1, C1, B1); } #ifdef DEBUG gmp_fprintf (ECM_STDOUT, "A(1)[%d] = %Zd\n", i, C0); gmp_fprintf (ECM_STDOUT, "A(-1)[%d] = %Zd\n", i, C2); gmp_fprintf (ECM_STDOUT, "B(1)[%d] = %Zd\n", i, C1); gmp_fprintf (ECM_STDOUT, "B(-1)[%d] = %Zd\n", i, C3); #endif } toomcook4 (C + 4 * l, C + 2 * l, C + 3 * l, l, &T); /* C4 = A(-1) * B(-1) = C(-1) */ toomcook4 (C + 2 * l, C, C + l, l, &T); /* C2 = A(1) * B(1) = C(1) */ toomcook4 (C, A, B, l, &T); /* C0 = A_0 * B_0 = C_0 */ toomcook4 (C + 6 * l, A + 3 * l, B + 3 * l, k, &T); /* C6 = A_3 * B_3 = C_6 */ for (i = 0; i < 2 * l - 1; i++) { #ifdef DEBUG gmp_fprintf (ECM_STDOUT, "C(0)[%d] = %Zd\n", i, C0); gmp_fprintf (ECM_STDOUT, "C(1)[%d] = %Zd\n", i, C2); gmp_fprintf (ECM_STDOUT, "C(-1)[%d] = %Zd\n", i, C4); gmp_fprintf (ECM_STDOUT, "C(2)[%d] = %Zd\n", i, t2); gmp_fprintf (ECM_STDOUT, "C(-2)[%d] = %Zd\n", i, t4); gmp_fprintf (ECM_STDOUT, "64*C(1/2)[%d] = %Zd\n", i, t0); if (i < 2 * k - 1) gmp_fprintf (ECM_STDOUT, "C(inf)[%d] = %Zd\n", i, C6); gmp_fprintf (ECM_STDOUT, "C_0[%d] = %Zd\n", i, C0); #endif mpz_add (t0, t0, t2); /* t0 = 65 34 20 16 20 34 65 */ mpz_sub (T, C2, C4); /* T = 2*C_odd(1) = 0 2 0 2 0 2 0 */ mpz_add (C2, C2, C4); /* C2 = 2*C_even(1) */ mpz_fdiv_q_2exp (C2, C2, 1); /* C2 = C_even(1) */ mpz_add (C4, t2, t4); /* C4 = 2*C_even(2) */ mpz_fdiv_q_2exp (C4, C4, 1); /* C4 = C_even(2) */ mpz_sub (t4, t2, t4); /* t4 = 2*C_odd(2) */ mpz_fdiv_q_2exp (t4, t4, 2); /* t4 = C_odd(2)/2 = C_1 + 4*C_3 + 16*C_5 */ mpz_fdiv_q_2exp (t2, T, 1); /* t2 = C_odd(1) */ mpz_sub (t0, t0, T); /* t0 = 65 32 20 14 20 32 65 */ mpz_mul_2exp (T, T, 4); mpz_sub (t0, t0, T); /* t0 = 65 0 20 -18 20 0 65 */ if (i < 2 * k - 1) { mpz_add (T, C0, C6); /* T = C_0 + C_6 */ mpz_sub (C2, C2, T); /* C2 = C_2 + C_4 */ mpz_sub (t0, t0, T); /* t0 = 64 0 20 -18 20 0 64 */ mpz_mul_2exp (T, T, 5); } else { mpz_sub (C2, C2, C0); /* C2 = C_2 + C_4 */ mpz_sub (t0, t0, C0); /* t0 = 64 0 20 -18 20 0 */ mpz_mul_2exp (T, C0, 5); } mpz_fdiv_q_2exp (t0, t0, 1); /* t0 = 32 0 10 -9 10 0 32 */ mpz_sub (t0, t0, T); /* t0 = 0 0 10 -9 10 0 0 */ mpz_sub (t0, t0, C2); /* t0 = 0 0 9 -9 9 0 0 */ mpz_divexact_ui (t0, t0, 9); /* t0 = 0 0 1 -1 1 0 0 */ mpz_sub (t0, C2, t0); /* t0 = C_3 */ mpz_sub (t2, t2, t0); /* t2 = C_1 + C_5 */ mpz_mul_2exp (T, t0, 2); /* T = 4*C_3 */ mpz_sub (t4, t4, T); /* t4 = C_1 + 16*C_5 */ mpz_sub (t4, t4, t2); /* t4 = 15*C_5 */ mpz_divexact_ui (t4, t4, 15); /* t4 = C_5 */ mpz_sub (t2, t2, t4); /* t2 = C_1 */ mpz_sub (C4, C4, C0); /* C4 = 4*C_2 + 16*C_4 + 64*C_6 */ mpz_fdiv_q_2exp (C4, C4, 2); /* C4 = C_2 + 4*C_4 + 16*C_6 */ if (i < 2 * k - 1) { mpz_mul_2exp (T, C6, 4); mpz_sub (C4, C4, T); /* C4 = C_2 + 4*C_4 */ } mpz_sub (C4, C4, C2); /* C4 = 3*C_4 */ mpz_divby3_1op (C4); /* C4 = C_4 */ mpz_sub (C2, C2, C4); /* C2 = C_2 */ #ifdef DEBUG gmp_fprintf (ECM_STDOUT, "C_1[%d] = %Zd\n", i, t2); gmp_fprintf (ECM_STDOUT, "C_2[%d] = %Zd\n", i, C2); gmp_fprintf (ECM_STDOUT, "C_3[%d] = %Zd\n", i, t0); gmp_fprintf (ECM_STDOUT, "C_4[%d] = %Zd\n", i, C4); gmp_fprintf (ECM_STDOUT, "C_5[%d] = %Zd\n", i, t4); if (i < 2 * k - 1) gmp_fprintf (ECM_STDOUT, "C_6[%d] = %Zd\n", i, C6); #endif } for (i = 0; i < l - 1; i++) mpz_add (C1, C1, t2); mpz_set (C1, t2); for (i = l; i < 2 * l - 1; i++) mpz_add (C1, C1, t2); for (i = 0; i < l - 1; i++) mpz_add (C3, C3, t0); mpz_set (C3, t0); for (i = l; i < 2 * l - 1; i++) mpz_add (C3, C3, t0); for (i = 0; i < l - 1; i++) mpz_add (C5, C5, t4); mpz_set (C5, t4); for (i = l; i < l + k - 1; i++) mpz_add (C5, C5, t4); } ecm-6.4.4/sp.h0000644023561000001540000003404112106741273010013 00000000000000/* sp.h - header file for the sp library Copyright 2005, 2006, 2007, 2008, 2010, 2011, 2012 Dave Newman, Jason Papadopoulos, Paul Zimmermann, Brian Gladman, Alexander Kruppa. Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2010 Free Software Foundation, Inc. (for parts from gmp-impl.h). This file is part of the SP library. The SP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The SP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the SP Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _SP_H #define _SP_H #include "config.h" #include #ifdef HAVE_SYS_TYPES_H #include /* needed for size_t */ #endif #ifndef TUNE #include "ecm-params.h" #else extern size_t NTT_GFP_TWIDDLE_DIF_BREAKOVER; extern size_t NTT_GFP_TWIDDLE_DIT_BREAKOVER; extern size_t MUL_NTT_THRESHOLD; extern size_t PREREVERTDIVISION_NTT_THRESHOLD; extern size_t POLYINVERT_NTT_THRESHOLD; extern size_t POLYEVALT_NTT_THRESHOLD; extern size_t MPZSPV_NORMALISE_STRIDE; #endif #include #if defined( __GNUC__ ) && __GNUC__ >= 3 #define ATTRIBUTE_UNUSED __attribute__ ((unused)) #else #define ATTRIBUTE_UNUSED #endif /************** * GMP_IMPL.H * **************/ #ifdef WANT_ASSERT #include #define ASSERT(expr) assert (expr) #else #define ASSERT(expr) do {} while (0) #endif /* the following was inspired by longlong.h and gmp-impl.h; * note that a small prime must be the size of a GMP limb */ typedef mp_limb_t UWtype; typedef unsigned int UHWtype; #if (defined(_PA_RISC1_1) && defined(__GNUC__)) /* this seems to be needed, otherwise umul_ppmm() does not work properly */ typedef mp_limb_t USItype __attribute__ ((mode (SI))); typedef mp_limb_t UDItype __attribute__ ((mode (DI))); #else typedef mp_limb_t USItype; typedef mp_limb_t UDItype; #endif #ifndef W_TYPE_SIZE #define W_TYPE_SIZE GMP_LIMB_BITS #endif #ifndef ULONG_MAX #define ULONG_MAX __GMP_ULONG_MAX #endif #define LONGLONG_STANDALONE #include "longlong.h" /* we use the remainder tree for products of 2^I0_THRESHOLD moduli or more, and the naive method for fewer moduli. We must have I0_THRESHOLD >= 1. */ #define I0_THRESHOLD 7 /********* * TYPES * *********/ /* SP */ /* the type for both a small prime, and a residue modulo a small prime. * Small primes must be 1 bit smaller than the word size for 32-bit * systems (otherwise there may not be enough suitable primes), but * may be 2+ bits smaller when the word size exceeds 32 bits (and this * simplifies modular reductions) * * For a residue x modulo a sp p, we require 0 <= x < p */ typedef UWtype sp_t; #if W_TYPE_SIZE <= 32 #define SP_NUMB_BITS (W_TYPE_SIZE - 1) #else #define SP_NUMB_BITS (W_TYPE_SIZE - 2) #endif #define SP_MIN ((sp_t)1 << (SP_NUMB_BITS - 1)) #define SP_MAX ((sp_t)(-1) >> (W_TYPE_SIZE - SP_NUMB_BITS)) /* vector of residues modulo a common small prime */ typedef sp_t * spv_t; /* length of a spv */ typedef unsigned long spv_size_t; typedef struct { spv_t ntt_roots; spv_size_t twiddle_size; spv_t twiddle; } __sp_nttdata; typedef __sp_nttdata sp_nttdata_t[1]; #define MAX_NTT_BLOCK_SIZE 128 /* Which steps to perform in convolution product funtions: forward transform, pair-wise multiplication, inverse transform */ #define NTT_MUL_STEP_FFT1 1 #define NTT_MUL_STEP_FFT2 2 #define NTT_MUL_STEP_MUL 4 #define NTT_MUL_STEP_IFFT 8 /* SPM */ /* small prime modulus - this contains some precomputed constants to * calculate modulo a sp */ typedef struct { sp_t sp; /* value of the sp */ sp_t mul_c; /* constant used for reduction mod sp */ sp_t invm; /* -1/sp mod 2^GMP_NUMB_BITS */ sp_t Bpow; /* B^(n+1) mod sp where the input N has n limbs */ sp_t prim_root; sp_t inv_prim_root; sp_nttdata_t nttdata; sp_nttdata_t inttdata; spv_t scratch; } __spm_struct; typedef __spm_struct * spm_t; /* MPZSPM */ typedef mpz_t * mpzv_t; typedef struct { /* number of small primes needed to represent each coeff */ unsigned int sp_num; spv_size_t max_ntt_size; mpz_t modulus; /* spm data */ spm_t *spm; /* precomputed crt constants, see mpzspm.c */ mpzv_t crt1, crt2; sp_t *crt3, **crt4, *crt5; /* product tree to speed up conversion from mpz to sp */ mpzv_t *T; /* product tree */ unsigned int d; /* ceil(log(sp_num)/log(2)) */ } __mpzspm_struct; typedef __mpzspm_struct * mpzspm_t; /* MPZSPV */ /* sp representation of a mpz polynomial */ typedef spv_t * mpzspv_t; #define MAX(x,y) (((x)<(y))?(y):(x)) #define MIN(x,y) (((x)<(y))?(x):(y)) #define SIZ(x) ((x)->_mp_size) #define PTR(x) ((x)->_mp_d) /* expanding macros and then turning them into strings requires two levels of macro-izing */ #define _(x) #x #define STRING(x) _(x) /************* * FUNCTIONS * *************/ /* general */ static inline unsigned int ceil_log_2 (spv_size_t x) { unsigned int a = 0; x--; while (x) { a++; x >>= 1; } return a; } /* Conversion functions sp_t <-> mpz_t. Using mpz_*_ui() functions is not portable as those take unsigned long's, but on some systems (e.g. 64 bit Windows with Visual C), unsigned long has 32 bits while sp_t should use 64 */ static inline void mpz_set_sp (mpz_t m, const sp_t n) { /* Is sizeof() a safe way of determining whether the conversion is lossless? */ if (sizeof (sp_t) <= sizeof (unsigned long)) { mpz_set_ui (m, (unsigned long) n); } else if (sizeof (sp_t) == 8 && sizeof (unsigned long) == 4) { /* We want to right-shift by 32 bits on a 64 bit system here. Putting a shift amount of 32 as a constant causes a compiler warning on 32 bit systems. So we put sizeof (sp_t) * 4 which always evaluates to 32 in this branch of the code, and does not cause a compiler warning if sp_t is only 4 bytes wide. */ mpz_set_ui (m, (unsigned long) (n >> (sizeof (sp_t) * 4))); mpz_mul_2exp (m, m, 32UL); mpz_add_ui (m, m, (unsigned long int) (n & 4294967295UL)); } else { abort (); } } static inline sp_t mpz_get_sp (const mpz_t n) { if (sizeof (sp_t) == sizeof (unsigned long)) { return (sp_t) mpz_get_ui (n); } else if (sizeof (sp_t) == sizeof (mp_limb_t)) { /* mpz_get_ui() returns the least significant bits of the absolute value of its argument that fit in an unsigned long. In the current GMP implementation with sign/magnitude representation, mpz_getlimbn() also returns the least sigificant bits of the absolute value. To allow for a future change to 2's-complement representation in GMP, we should explicitly use mpz_abs() to a temp var here. */ return (sp_t) mpz_getlimbn (n, 0); } else { abort (); } } void * sp_aligned_malloc (size_t len); void sp_aligned_free (void *newptr); /* sp */ /* Routines for arithmetic on residues modulo a small prime * * All functions return values in the range 0 <= x < p. * * The variable name of the modulus is 'p' if the input must be prime, * 'm' if we also allow composites. */ static inline sp_t sp_sub(sp_t a, sp_t b, sp_t m) { #if (defined(__GNUC__) || defined(__ICL)) && \ (defined(__x86_64__) || defined(__i386__)) sp_t t = 0, tr = a; __asm__ ( "sub %2, %0 # sp_sub: tr -= b\n\t" "cmovc %3, %1 # sp_sub: if (a < b) t = m\n\t" : "+&r" (tr), "+r" (t) : "g" (b), "g" (m) : "cc" ); return tr + t; #elif defined(_MSC_VER) && !defined(_WIN64) __asm { mov eax, a xor edx, edx sub eax, b cmovb edx, m add eax, edx } #else if (a >= b) return a - b; else return a - b + m; #endif } static inline sp_t sp_add(sp_t a, sp_t b, sp_t m) { #if (defined(__GNUC__) || defined(__ICL)) && \ (defined(__x86_64__) || defined(__i386__)) sp_t t = a - m, tr = a + b; __asm__ ( "add %2, %1 # sp_add: t += b\n\t" "cmovc %1, %0 # sp_add: if (cy) tr = t \n\t" : "+r" (tr), "+&r" (t) : "g" (b) : "cc" ); return tr; #elif defined(_MSC_VER) && !defined(_WIN64) __asm { mov eax, a add eax, b mov edx, eax sub edx, m cmovnc eax, edx } #elif SP_NUMB_BITS <= W_TYPE_SIZE - 1 sp_t t = a + b; if (t >= m) t -= m; return t; #else return sp_sub(a, m - b, m); #endif } /* functions used for modular reduction */ #if SP_NUMB_BITS <= W_TYPE_SIZE - 2 /* having a small modulus allows the reciprocal * to be one bit larger, which guarantees that the * initial remainder fits in a word and also that at * most one correction is necessary */ #define sp_reciprocal(invxl,xl) \ do { \ ATTRIBUTE_UNUSED mp_limb_t dummy; \ udiv_qrnnd (invxl, dummy, \ (sp_t) 1 << (2 * SP_NUMB_BITS + 1 - \ W_TYPE_SIZE), 0, xl); \ } while (0) static inline sp_t sp_udiv_rem(sp_t nh, sp_t nl, sp_t d, sp_t di) { sp_t r; mp_limb_t q1, q2; ATTRIBUTE_UNUSED mp_limb_t tmp; q1 = nh << (2*(W_TYPE_SIZE - SP_NUMB_BITS)) | nl >> (2*SP_NUMB_BITS - W_TYPE_SIZE); umul_ppmm (q2, tmp, q1, di); r = nl - d * (q2 >> 1); return sp_sub(r, d, d); } #else /* big modulus; no shortcuts allowed */ #define sp_reciprocal(invxl,xl) \ do { \ mp_limb_t dummy; \ udiv_qrnnd (invxl, dummy, \ (sp_t) 1 << (2 * SP_NUMB_BITS - \ W_TYPE_SIZE), 0, xl); \ } while (0) static inline sp_t sp_udiv_rem(sp_t nh, sp_t nl, sp_t d, sp_t di) { mp_limb_t q1, q2, tmp, dqh, dql; q1 = nh << (2*(W_TYPE_SIZE - SP_NUMB_BITS)) | nl >> (2*SP_NUMB_BITS - W_TYPE_SIZE); umul_ppmm (q2, tmp, q1, di); umul_ppmm (dqh, dql, q2, d); tmp = nl; nl = tmp - dql; nh = nh - dqh - (nl > tmp); if (nh) nl -= d; nl = sp_sub(nl, d, d); return sp_sub(nl, d, d); } #endif /* x*y mod m */ static inline sp_t sp_mul (sp_t x, sp_t y, sp_t m, sp_t d) { sp_t u, v; umul_ppmm (u, v, x, y); return sp_udiv_rem (u, v, m, d); } /* x*y mod m */ static inline sp_t sp_sqr (sp_t x, sp_t m, sp_t d) { sp_t u, v; umul_ppmm (u, v, x, x); return sp_udiv_rem (u, v, m, d); } #define sp_neg(x,m) ((x) == (sp_t) 0 ? (sp_t) 0 : (m) - (x)) /* Returns x^a % m, uses a right-to-left powering ladder */ static inline sp_t sp_pow (sp_t x, sp_t a, sp_t m, sp_t d) { sp_t partial = 1; while (1) { if (a & 1) partial = sp_mul (x, partial, m, d); a >>= 1; if (!a) return partial; x = sp_sqr (x, m, d); } } /* 1/x mod p where d is p->mul_c */ #define sp_inv(x,p,d) sp_pow (x, (p) - 2, p, d) /* x / 2 mod m */ #define sp_div_2(x,m) (((x) & 1) ? (m) - (((m) - (x)) >> 1) : ((x) >> 1)) int sp_spp (sp_t, sp_t, sp_t); int sp_prime (sp_t); /* spm */ spm_t spm_init (spv_size_t, sp_t, mp_size_t); void spm_clear (spm_t); /* spv */ /* ASSIGNMENT */ void spv_set (spv_t, spv_t, spv_size_t); void spv_rev (spv_t, spv_t, spv_size_t); void spv_set_sp (spv_t, sp_t, spv_size_t); void spv_set_zero (spv_t, spv_size_t); /* ARITHMETIC */ /* add */ void spv_add (spv_t, spv_t, spv_t, spv_size_t, sp_t); void spv_add_sp (spv_t, spv_t, sp_t, spv_size_t, sp_t); /* subtract */ void spv_sub (spv_t, spv_t, spv_t, spv_size_t, sp_t); void spv_sub_sp (spv_t, spv_t, sp_t, spv_size_t, sp_t); void spv_neg (spv_t, spv_t, spv_size_t, sp_t); /* pointwise multiplication */ void spv_pwmul (spv_t, spv_t, spv_t, spv_size_t, sp_t, sp_t); void spv_pwmul_rev (spv_t, spv_t, spv_t, spv_size_t, sp_t, sp_t); void spv_mul_sp (spv_t, spv_t, sp_t, spv_size_t, sp_t, sp_t); void spv_random (spv_t, spv_size_t, sp_t); int spv_cmp (spv_t, spv_t, spv_size_t); /* ntt_gfp */ void spv_ntt_gfp_dif (spv_t, spv_size_t, spm_t); void spv_ntt_gfp_dit (spv_t, spv_size_t, spm_t); /* mpzspm */ spv_size_t mpzspm_max_len (mpz_t); mpzspm_t mpzspm_init (spv_size_t, mpz_t); void mpzspm_clear (mpzspm_t); /* mpzspv */ mpzspv_t mpzspv_init (spv_size_t, mpzspm_t); void mpzspv_clear (mpzspv_t, mpzspm_t); int mpzspv_verify (mpzspv_t, spv_size_t, spv_size_t, mpzspm_t); void mpzspv_set (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, spv_size_t, mpzspm_t); void mpzspv_revcopy (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, spv_size_t, mpzspm_t); void mpzspv_set_sp (mpzspv_t, spv_size_t, sp_t, spv_size_t, mpzspm_t); void mpzspv_from_mpzv (mpzspv_t, const spv_size_t, const mpzv_t, const spv_size_t, mpzspm_t); void mpzspv_reverse (mpzspv_t, spv_size_t, spv_size_t, mpzspm_t); void mpzspv_neg (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, spv_size_t, mpzspm_t); void mpzspv_add (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, spv_size_t, mpzspm_t); void mpzspv_to_mpzv (mpzspv_t, spv_size_t, mpzv_t, spv_size_t, mpzspm_t); void mpzspv_normalise (mpzspv_t, spv_size_t, spv_size_t, mpzspm_t); void mpzspv_pwmul (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, spv_size_t, mpzspm_t); void mpzspv_to_ntt (mpzspv_t, spv_size_t, spv_size_t, spv_size_t, int, mpzspm_t); void mpzspv_from_ntt (mpzspv_t, spv_size_t, spv_size_t, spv_size_t, mpzspm_t); void mpzspv_mul_ntt (mpzspv_t, spv_size_t, mpzspv_t, spv_size_t, spv_size_t, mpzspv_t, spv_size_t, spv_size_t, spv_size_t, int, spv_size_t, mpzspm_t, int); void mpzspv_random (mpzspv_t, spv_size_t, spv_size_t, mpzspm_t); void mpzspv_to_dct1 (mpzspv_t, mpzspv_t, spv_size_t, spv_size_t, mpzspv_t, mpzspm_t); void mpzspv_mul_by_dct (mpzspv_t, const mpzspv_t, spv_size_t, const mpzspm_t, int); void mpzspv_sqr_reciprocal (mpzspv_t, spv_size_t, const mpzspm_t); #endif /* _SP_H */ ecm-6.4.4/ecm-params.h.athlon0000644023561000001540000000113312106741273012676 00000000000000/* this is the parameter file for Opteron */ #define MPZMOD_THRESHOLD 170 #define REDC_THRESHOLD 294 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 0, 0, 0, 0, 0, 1, 7, 8, 1, 1, 8, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 16, 16, 1, 1, 16, 1} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 11 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 11 #define SPV_NTT_GFP_DIF_RECURSIVE_THRESHOLD 32768 #define SPV_NTT_GFP_DIT_RECURSIVE_THRESHOLD 32768 #define MUL_NTT_THRESHOLD 1024 #define PREREVERTDIVISION_NTT_THRESHOLD 64 #define POLYINVERT_NTT_THRESHOLD 512 #define POLYEVALT_NTT_THRESHOLD 512 #define MPZSPV_NORMALISE_STRIDE 512 ecm-6.4.4/listz.c0000644023561000001540000006347212106741273010543 00000000000000/* Arithmetic on lists of residues modulo n. Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2012 Paul Zimmermann and Alexander Kruppa. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include "ecm-impl.h" #ifdef DEBUG #define ASSERTD(x) assert(x) #else #define ASSERTD(x) #endif #if (MULT == KS) #define LIST_MULT_N kronecker_schonhage #define WRAP /* use wrap-around multiplication for low short product */ #elif (MULT == TOOM4) #define LIST_MULT_N toomcook4 #elif (MULT == TOOM3) #define LIST_MULT_N toomcook3 #elif (MULT == KARA) #define LIST_MULT_N karatsuba #else #error "MULT is neither KS, TOOM4, nor TOOM3, nor KARA" #endif extern unsigned int Fermat; /* returns a bound on the auxiliary memory needed by LIST_MULT_N */ int list_mul_mem (unsigned int len) { unsigned int mem; mem = 2 * len; #if defined(TOOMCOOK3) || defined(TOOMCOOK4) while (len > 3) { mem += 2; len = (len + 2) / 3; /* ceil(len/3) */ } mem += 4; #endif return mem; } /* creates a list of n integers, return NULL if error */ listz_t init_list (unsigned int n) { listz_t p; unsigned int i; p = (mpz_t*) malloc (n * sizeof (mpz_t)); if (p == NULL) return NULL; for (i = 0; i < n; i++) mpz_init (p[i]); return p; } /* creates a list of n integers, return NULL if error. Allocates each mpz_t to the size of N bits */ listz_t init_list2 (unsigned int n, unsigned int N) { listz_t p; unsigned int i; p = (mpz_t*) malloc (n * sizeof (mpz_t)); if (p == NULL) return NULL; for (i = 0; i < n; i++) mpz_init2 (p[i], N); return p; } /* clears a list of n integers */ void clear_list (listz_t p, unsigned int n) { unsigned int i; if (p == NULL) return; for (i = 0; i < n; i++) mpz_clear (p[i]); free (p); } #ifdef DEBUG /* prints a list of n coefficients as a polynomial */ void print_list (listz_t p, unsigned int n) { unsigned int i; for (i = 0; i < n; i++) { if (i > 0 && mpz_cmp_ui (p[i], 0) >= 0) fprintf (ECM_STDOUT, "+"); mpz_out_str (ECM_STDOUT, 10, p[i]); fprintf (ECM_STDOUT, "*x^%u", i); } fprintf (ECM_STDOUT, "\n"); } static int list_check (listz_t a, unsigned int l, mpz_t n) { unsigned int i; for (i = 0; i < l; i++) if (mpz_cmp_ui (a[i], 0) < 0 || mpz_cmp (n, a[i]) <= 0) { fprintf (ECM_STDOUT, "l=%u i=%u\n", l, i); mpz_out_str (ECM_STDOUT, 10, a[i]); fprintf (ECM_STDOUT, "\n"); return 0; } return 1; } #endif /* DEBUG */ /* Read all entries in list from stream. Return 0 on success, ECM_ERROR on error */ int list_inp_raw (listz_t a, FILE *f, unsigned int n) { unsigned int i; for (i = 0; i < n; i++) if (mpz_inp_raw (a[i], f) == 0) return ECM_ERROR; return 0; } /* Write all entries in list to stream. Return 0 on success, ECM_ERROR on error */ int list_out_raw (FILE *f, listz_t a, unsigned int n) { unsigned int i; for (i = 0; i < n; i++) if (mpz_out_raw (f, a[i]) == 0) return ECM_ERROR; return 0; } /* p <- q */ void list_set (listz_t p, listz_t q, unsigned int n) { unsigned int i; for (i = 0; i < n; i++) mpz_set (p[i], q[i]); } /* p[0] <-> p[n-1], p[1] <-> p[n-2], ... */ void list_revert (listz_t p, unsigned int n) { unsigned int i; for (i = 0; i < n - 1 - i; i++) mpz_swap (p[i], p[n - 1 - i]); } void list_swap (listz_t p, listz_t q, unsigned int n) { unsigned int i; for (i = 0; i < n; i++) mpz_swap (p[i], q[i]); } /* p <- -q, keeps residues normalized */ void list_neg (listz_t p, listz_t q, unsigned int l, mpz_t n) { unsigned int i; for (i = 0; i < l; i++) { if (mpz_sgn (q[i])) mpz_sub (p[i], n, q[i]); else mpz_set_ui (p[i], 0); } } /* p <- q modulo mod */ void list_mod (listz_t p, listz_t q, unsigned int n, mpz_t mod) { unsigned int i; for (i = 0; i < n; i++) mpz_mod (p[i], q[i], mod); } /* p <- q + r */ void list_add (listz_t p, listz_t q, listz_t r, unsigned int l) { unsigned int i; for (i = 0; i < l; i++) mpz_add (p[i], q[i], r[i]); } /* p <- q - r */ void list_sub (listz_t p, listz_t q, listz_t r, unsigned int l) { unsigned int i; for (i = 0; i < l; i++) mpz_sub (p[i], q[i], r[i]); } /* p[i] <- q[i] * r mod m */ void list_mul_z (listz_t p, listz_t q, mpz_t r, unsigned int n, mpz_t m) { unsigned int i; for (i = 0; i < n; i++) { mpz_mul (p[i], q[i], r); mpz_mod (p[i], p[i], m); } } /* p <- gcd(n, l[0]*l[1]*...*l[k-1], returns non-zero iff p is non trivial. Clobbers l[0] */ int list_gcd (mpz_t p, listz_t l, unsigned int k, mpz_t n) { unsigned int i; for (i = 1; i < k; i++) { mpz_mul (l[0], l[0], l[i]); mpz_mod (l[0], l[0], n); } mpz_gcd (p, l[0], n); return mpz_cmp_ui (p, 1); } /* Multiply up the integers in l, modulo n. Each entry becomes the product (mod n) of itself and all previous entries */ void list_mulup (listz_t l, unsigned int k, mpz_t n, mpz_t t) { unsigned int i; for (i = 1; i < k; i++) { mpz_mul (t, l[i - 1], l[i]); mpz_mod (l[i], t, n); } } /* p <- 0 */ void list_zero (listz_t p, unsigned int n) { unsigned int i; for (i = 0; i < n; i++) mpz_set_ui (p[i], 0); } #ifndef KS_MULTIPLY /* puts in a[0]..a[K-1] the K low terms of the product of b[0..K-1] and c[0..K-1]. Assumes K >= 1, and a[0..2K-2] exist. Needs space for list_mul_mem(K) in t. */ static void list_mul_low (listz_t a, listz_t b, listz_t c, unsigned int K, listz_t t, mpz_t n) { unsigned int p, q; ASSERT(K > 0); switch (K) { case 1: mpz_mul (a[0], b[0], c[0]); return; case 2: mpz_mul (a[0], b[0], c[0]); mpz_mul (a[1], b[0], c[1]); mpz_addmul (a[1], b[1], c[0]); return; case 3: karatsuba (a, b, c, 2, t); mpz_addmul (a[2], b[2], c[0]); mpz_addmul (a[2], b[0], c[2]); return; default: /* MULT is 2 for Karatsuba, 3 for Toom3, 4 for Toom4 */ for (p = 1; MULT * p <= K; p *= MULT); /* p = greatest power of MULT <=K */ p = (K / p) * p; ASSERTD(list_check(b,p,n) && list_check(c,p,n)); LIST_MULT_N (a, b, c, p, t); if ((q = K - p)) { list_mul_low (t, b + p, c, q, t + 2 * q - 1, n); list_add (a + p, a + p, t, q); list_mul_low (t, c + p, b, q, t + 2 * q - 1, n); list_add (a + p, a + p, t, q); } } } #endif /* puts in a[K-1]..a[2K-2] the K high terms of the product of b[0..K-1] and c[0..K-1]. Assumes K >= 1, and a[0..2K-2] exist. Needs space for list_mul_mem(K) in t. */ void list_mul_high (listz_t a, listz_t b, listz_t c, unsigned int K, listz_t t) { #ifdef KS_MULTIPLY /* ks is faster */ LIST_MULT_N (a, b, c, K, t); #else unsigned int p, q; ASSERT(K > 0); switch (K) { case 1: mpz_mul (a[0], b[0], c[0]); return; case 2: mpz_mul (a[2], b[1], c[1]); mpz_mul (a[1], b[1], c[0]); mpz_addmul (a[1], b[0], c[1]); return; case 3: karatsuba (a + 2, b + 1, c + 1, 2, t); mpz_addmul (a[2], b[0], c[2]); mpz_addmul (a[2], b[2], c[0]); return; default: /* MULT is 2 for Karatsuba, 3 for Toom3, 4 for Toom4 */ for (p = 1; MULT * p <= K; p *= MULT); p = (K / p) * p; q = K - p; LIST_MULT_N (a + 2 * q, b + q, c + q, p, t); if (q) { list_mul_high (t, b + p, c, q, t + 2 * q - 1); list_add (a + K - 1, a + K - 1, t + q - 1, q); list_mul_high (t, c + p, b, q, t + 2 * q - 1); list_add (a + K - 1, a + K - 1, t + q - 1, q); } } #endif } /* Puts in a[0..2K-2] the product of b[0..K-1] and c[0..K-1]. The auxiliary memory M(K) necessary in T satisfies: M(1)=0, M(K) = max(3*l-1,2*l-2+M(l)) <= 2*K-1 where l = ceil(K/2). Assumes K >= 1. */ void karatsuba (listz_t a, listz_t b, listz_t c, unsigned int K, listz_t t) { if (K == 1) { mpz_mul (a[0], b[0], c[0]); } else if (K == 2) /* basic Karatsuba scheme */ { mpz_add (t[0], b[0], b[1]); /* t0 = b_0 + b_1 */ mpz_add (a[1], c[0], c[1]); /* a1 = c_0 + c_1 */ mpz_mul (a[1], a[1], t[0]); /* a1 = b_0*c_0 + b_0*c_1 + b_1*c_0 + b_1*c_1 */ mpz_mul (a[0], b[0], c[0]); /* a0 = b_0 * c_0 */ mpz_mul (a[2], b[1], c[1]); /* a2 = b_1 * c_1 */ mpz_sub (a[1], a[1], a[0]); /* a1 = b_0*c_1 + b_1*c_0 + b_1*c_1 */ mpz_sub (a[1], a[1], a[2]); /* a1 = b_0*c_1 + b_1*c_0 */ } else if (K == 3) { /* implement Weimerskirch/Paar trick in 6 muls and 13 adds http://www.crypto.ruhr-uni-bochum.de/Publikationen/texte/kaweb.pdf */ /* diagonal terms */ mpz_mul (a[0], b[0], c[0]); mpz_mul (a[2], b[1], c[1]); mpz_mul (a[4], b[2], c[2]); /* (0,1) rectangular term */ mpz_add (t[0], b[0], b[1]); mpz_add (t[1], c[0], c[1]); mpz_mul (a[1], t[0], t[1]); mpz_sub (a[1], a[1], a[0]); mpz_sub (a[1], a[1], a[2]); /* (1,2) rectangular term */ mpz_add (t[0], b[1], b[2]); mpz_add (t[1], c[1], c[2]); mpz_mul (a[3], t[0], t[1]); mpz_sub (a[3], a[3], a[2]); mpz_sub (a[3], a[3], a[4]); /* (0,2) rectangular term */ mpz_add (t[0], b[0], b[2]); mpz_add (t[1], c[0], c[2]); mpz_mul (t[2], t[0], t[1]); mpz_sub (t[2], t[2], a[0]); mpz_sub (t[2], t[2], a[4]); mpz_add (a[2], a[2], t[2]); } else { unsigned int i, k, l; listz_t z; k = K / 2; l = K - k; z = t + 2 * l - 1; /* improved code with 7*k-3 additions, contributed by Philip McLaughlin */ for (i = 0; i < k; i++) { mpz_sub (z[i], b[i], b[l+i]); mpz_sub (a[i], c[i], c[l+i]); } if (l > k) /* case K odd */ { mpz_set (z[k], b[k]); mpz_set (a[k], c[k]); } /* as b[0..l-1] + b[l..K-1] is stored in t[2l-1..3l-2], we need here at least 3l-1 entries in t */ karatsuba (t, z, a, l, a + l); /* fills t[0..2l-2] */ /* trick: save t[2l-2] in a[2l-1] to enable M(K) <= 2*K-1 */ z = t + 2 * l - 2; mpz_set (a[2*l-1], t[2*l-2]); karatsuba (a, b, c, l, z); /* fill a[0..2l-2] */ karatsuba (a + 2 * l, b + l, c + l, k, z); /* fills a[2l..2K-2] */ mpz_set (t[2*l-2], a[2*l-1]); /* restore t[2*l-2] */ mpz_set_ui (a[2*l-1], 0); /* l l-1 1 l 2k-1-l _________________________________________________ | a0 | a1 |0| a2 | a3 | ------------------------------------------------- l l-1 ________________________ | t0 | t1 | ------------------------ We want to replace [a1, a2] by [a1 + a0 + a2 - t0, a2 + a1 + a3 - t1] i.e. [a12 + a0 - t0, a12 + a3 - t1] where a12 = a1 + a2. */ list_add (a + 2 * l, a + 2 * l, a + l, l-1); /* a[2l..3l-1] <- a1+a2 */ if (k > 1) { list_add (a + l, a + 2 * l, a, l); /* a[l..2l-1] <- a0 + a1 + a2 */ list_add (a + 2 * l, a + 2 * l, a + 3 * l, 2 * k - 1 - l); } else /* k=1, i.e. K=2 or K=3, and a2 has only one entry */ { mpz_add (a[l], a[2*l], a[0]); if (K == 3) mpz_set (a[l+1], a[1]); } list_sub (a + l, a + l, t, 2 * l - 1); } } /* multiplies b[0]+...+b[k-1]*x^(k-1)+x^k by c[0]+...+c[l-1]*x^(l-1)+x^l and puts the results in a[0]+...+a[k+l-1]*x^(k+l-1) [the leading monomial x^(k+l) is implicit]. If monic_b (resp. monic_c) is 0, don't consider x^k in b (resp. x^l in c). Assumes k = l or k = l+1. The auxiliary array t contains at least list_mul_mem(l) entries. a and t should not overlap. */ void list_mul (listz_t a, listz_t b, unsigned int k, int monic_b, listz_t c, unsigned int l, int monic_c, listz_t t) { unsigned int i, po2; ASSERT(k == l || k == l + 1); for (po2 = l; (po2 & 1) == 0; po2 >>= 1); po2 = (po2 == 1); #ifdef DEBUG if (Fermat && !(po2 && l == k)) fprintf (ECM_STDOUT, "list_mul: Fermat number, but poly lengths %d and %d\n", k, l); #endif if (po2 && Fermat) { if (monic_b && monic_c && l == k) { F_mul (a, b, c, l, MONIC, Fermat, t); monic_b = monic_c = 0; } else F_mul (a, b, c, l, DEFAULT, Fermat, t); } else LIST_MULT_N (a, b, c, l, t); /* set a[0]...a[2l-2] */ if (k > l) /* multiply b[l]*x^l by c[0]+...+c[l-1]*x^(l-1) */ { for (i = 0; i < l - 1; i++) mpz_addmul (a[l+i], b[l], c[i]); mpz_mul (a[2*l-1], b[l], c[l-1]); } /* deal with x^k and x^l */ if (monic_b || monic_c) { mpz_set_ui (a[k + l - 1], 0); if (monic_b && monic_c) /* Single pass over a[] */ { /* a += b * x^l + c * x^k, so a[i] += b[i-l]; a[i] += c[i-k] if 0 <= i-l < k or 0 <= i-k < l, respectively */ if (k > l) mpz_add (a[l], a[l], b[0]); for (i = k; i < k + l; i++) { mpz_add (a[i], a[i], b[i-l]); /* i-l < k */ mpz_add (a[i], a[i], c[i-k]); /* i-k < l */ } } else if (monic_c) /* add b * x^l */ list_add (a + l, a + l, b, k); else /* only monic_b, add x^k * c */ list_add (a + k, a + k, c, l); } } /* Multiplies b[0..k-1] by c[0..k-1], stores the result in a[0..2k-2], and stores the reduced product in a2[0..2k-2]. (Here, there is no implicit monic leading monomial.) Requires at least list_mul_mem(k) cells in t. */ void list_mulmod (listz_t a2, listz_t a, listz_t b, listz_t c, unsigned int k, listz_t t, mpz_t n) { int i; for (i = k; (i & 1) == 0; i >>= 1); ASSERTD(list_check(b,k,n)); ASSERTD(list_check(c,k,n)); if (i == 1 && Fermat) F_mul (a, b, c, k, DEFAULT, Fermat, t); else LIST_MULT_N (a, b, c, k, t); /* set a[0]...a[2l-2] */ list_mod (a2, a, 2 * k - 1, n); } /* puts in G[0]..G[k-1] the coefficients from (x+a[0])...(x+a[k-1]) Warning: doesn't fill the coefficient 1 of G[k], which is implicit. Needs k + list_mul_mem(k/2) cells in T. G == a is allowed. T must not overlap with anything else. */ void PolyFromRoots (listz_t G, listz_t a, unsigned int k, listz_t T, mpz_t n) { unsigned int l, m; ASSERT (T != G && T != a); ASSERT (k >= 1); if (k == 1) { /* we consider x + a[0], which mean we consider negated roots */ mpz_mod (G[0], a[0], n); return; } m = k / 2; /* m >= 1 */ l = k - m; /* l >= 1 */ PolyFromRoots (G, a, l, T, n); PolyFromRoots (G + l, a + l, m, T, n); list_mul (T, G, l, 1, G + l, m, 1, T + k); list_mod (G, T, k, n); } /* puts in G[0]..G[k-1] the coefficients from (x+a[0])...(x+a[k-1]) Warning: doesn't fill the coefficient 1 of G[k], which is implicit. Needs k + list_mul_mem(k/2) cells in T. The product tree is stored in: G[0..k-1] (degree k) Tree[0][0..k-1] (degree k/2) Tree[1][0..k-1] (degree k/4), ..., Tree[lgk-1][0..k-1] (degree 1) (then we should have initially Tree[lgk-1] = a). The parameter dolvl signals that only level 'dolvl' of the tree should be computed (dolvl < 0 means all levels). Either Tree <> NULL and TreeFile == NULL, and we write the tree to memory, or Tree == NULL and TreeFile <> NULL, and we write the tree to disk. */ int PolyFromRoots_Tree (listz_t G, listz_t a, unsigned int k, listz_t T, int dolvl, mpz_t n, listz_t *Tree, FILE *TreeFile, unsigned int sh) { unsigned int l, m; listz_t H1, *NextTree; ASSERT (k >= 1); if (k == 1) { /* we consider x + a[0], which mean we consider negated roots */ mpz_mod (G[0], a[0], n); return 0; } if (Tree == NULL) /* -treefile case */ { H1 = G; NextTree = NULL; } else { H1 = Tree[0] + sh; NextTree = Tree + 1; } m = k / 2; l = k - m; if (dolvl != 0) /* either dolvl < 0 and we need to compute all levels, or dolvl > 0 and we need first to compute lower levels */ { PolyFromRoots_Tree (H1, a, l, T, dolvl - 1, n, NextTree, TreeFile, sh); PolyFromRoots_Tree (H1 + l, a + l, m, T, dolvl - 1, n, NextTree, TreeFile, sh + l); } if (dolvl <= 0) { /* Write this level to disk, if requested */ if (TreeFile != NULL) { if (list_out_raw (TreeFile, H1, l) == ECM_ERROR || list_out_raw (TreeFile, H1 + l, m) == ECM_ERROR) { outputf (OUTPUT_ERROR, "Error writing product tree of F\n"); return ECM_ERROR; } } list_mul (T, H1, l, 1, H1 + l, m, 1, T + k); list_mod (G, T, k, n); } return 0; } /* puts in q[0..K-1] the quotient of x^(2K-2) by B where B = b[0]+b[1]*x+...+b[K-1]*x^(K-1) with b[K-1]=1. */ void PolyInvert (listz_t q, listz_t b, unsigned int K, listz_t t, mpz_t n) { if (K == 1) { mpz_set_ui (q[0], 1); return; } else { int k, l, po2, use_middle_product = 0; #ifdef KS_MULTIPLY use_middle_product = 1; #endif k = K / 2; l = K - k; for (po2 = K; (po2 & 1) == 0; po2 >>= 1); po2 = (po2 == 1 && Fermat != 0); /* first determine l most-significant coeffs of Q */ PolyInvert (q + k, b + k, l, t, n); /* Q1 = {q+k, l} */ /* now Q1 * B = x^(2K-2) + O(x^(2K-2-l)) = x^(2K-2) + O(x^(K+k-2)). We need the coefficients of degree K-1 to K+k-2 of Q1*B */ ASSERTD(list_check(q+k,l,n) && list_check(b,l,n)); if (po2 == 0 && use_middle_product) { TMulKS (t, k - 1, q + k, l - 1, b, K - 1, n, 0); list_neg (t, t, k, n); } else if (po2) { list_revert (q + k, l); /* This expects the leading monomials explicitly in q[2k-1] and b[k+l-1] */ F_mul_trans (t, q + k, b, K / 2, K, Fermat, t + k); list_revert (q + k, l); list_neg (t, t, k, n); } else { LIST_MULT_N (t, q + k, b, l, t + 2 * l - 1); /* t[0..2l-1] = Q1 * B0 */ list_neg (t, t + l - 1, k, n); if (k > 1) { list_mul (t + k, q + k, l - 1, 1, b + l, k - 1, 1, t + k + K - 2); /* Q1 * B1 */ list_sub (t + 1, t + 1, t + k, k - 1); } } list_mod (t, t, k, n); /* high(1-B*Q1) */ ASSERTD(list_check(t,k,n) && list_check(q+l,k,n)); if (po2) F_mul (t + k, t, q + l, k, DEFAULT, Fermat, t + 3 * k); else LIST_MULT_N (t + k, t, q + l, k, t + 3 * k - 1); list_mod (q, t + 2 * k - 1, k, n); } } /* divides a[0]+a[1]*x+...+a[2K-1]*x^(2K-1) By b[0]+b[1]*x+...+b[K-1]*x^(K-1)+x^K i.e. a polynomial of 2K coefficients divided by a monic polynomial with K+1 coefficients (b[K]=1 is implicit). Puts the quotient in q[0]+q[1]*x+...+q[K-1]*x^(K-1) and the remainder in a[0]+a[1]*x+...+a[K-1]*x^(K-1) Needs space for list_mul_mem(K) coefficients in t. If top is non-zero, a[0]..a[K-1] are reduced mod n. */ void RecursiveDivision (listz_t q, listz_t a, listz_t b, unsigned int K, listz_t t, mpz_t n, int top) { if (K == 1) /* a0+a1*x = a1*(b0+x) + a0-a1*b0 */ { mpz_mod (a[1], a[1], n); mpz_mul (q[0], a[1], b[0]); mpz_mod (q[0], q[0], n); mpz_sub (a[0], a[0], q[0]); if (top) mpz_mod (a[0], a[0], n); mpz_set (q[0], a[1]); } else { unsigned int k, l, i, po2; k = K / 2; l = K - k; for (po2 = K; (po2 && 1) == 0; po2 >>= 1); po2 = (po2 == 1); /* first perform a (2l) / l division */ RecursiveDivision (q + k, a + 2 * k, b + k, l, t, n, 0); /* subtract q[k..k+l-1] * b[0..k-1] */ ASSERTD(list_check(q+l,k,n) && list_check(b,k,n)); if (po2 && Fermat) F_mul (t, q + l, b, k, DEFAULT, Fermat, t + K); /* sets t[0..2*k-2]*/ else LIST_MULT_N (t, q + l, b, k, t + K - 1); /* sets t[0..2*k-2] */ list_sub (a + l, a + l, t, 2 * k - 1); if (k < l) /* don't forget to subtract q[k] * b[0..k-1] */ { for (i=0; i= 2. Requires 2K-1 + list_mul_mem(K) cells in t. Notations: R = r[0..K-1], A = a[0..2K-2], low(A) = a[0..K-1], high(A) = a[K..2K-2], Q = t[0..K-2] Return non-zero iff an error occurred. */ int PrerevertDivision (listz_t a, listz_t b, listz_t invb, unsigned int K, listz_t t, mpz_t n) { int po2, wrap; listz_t t2 = NULL; #ifdef WRAP wrap = ks_wrapmul_m (K + 1, K + 1, n) <= 2 * K - 1 + list_mul_mem (K); #else wrap = 0; #endif /* Q <- high(high(A) * INVB) with a short product */ for (po2 = K; (po2 & 1) == 0; po2 >>= 1); po2 = (po2 == 1); if (Fermat && po2) { mpz_set_ui (a[2 * K - 1], 0); if (K <= 4 * Fermat) { F_mul (t, a + K, invb, K, DEFAULT, Fermat, t + 2 * K); /* Put Q in T, as we still need high(A) later on */ list_mod (t, t + K - 2, K, n); } else { F_mul (t, a + K, invb, K, DEFAULT, Fermat, t + 2 * K); list_mod (a + K, t + K - 2, K, n); } } else /* non-Fermat case */ { list_mul_high (t, a + K, invb, K - 1, t + 2 * K - 3); /* the high part of A * INVB is now in {t+K-2, K-1} */ if (wrap) { MEMORY_TAG; t2 = init_list2 (K - 1, mpz_sizeinbase (n, 2)); MEMORY_UNTAG; if (t2 == NULL) { fprintf (ECM_STDERR, "Error, not enough memory\n"); return ECM_ERROR; } list_mod (t2, t + K - 2, K - 1, n); } else /* we can store in high(A) which is no longer needed */ list_mod (a + K, t + K - 2, K - 1, n); } /* the quotient Q = trunc(A / B) has degree K-2, i.e. K-1 terms */ /* T <- low(Q * B) with a short product */ mpz_set_ui (a[2 * K - 1], 0); if (Fermat && po2) { if (K <= 4 * Fermat) { /* Multiply without zero padding, result is (mod x^K - 1) */ F_mul (t + K, t, b, K, NOPAD, Fermat, t + 2 * K); /* Take the leading monomial x^K of B into account */ list_add (t, t + K, t, K); /* Subtract high(A) */ list_sub(t, t, a + K, K); } else F_mul (t, a + K, b, K, DEFAULT, Fermat, t + 2 * K); } else /* non-Fermat case */ { #ifdef KS_MULTIPLY /* ks is faster */ if (wrap) /* Q = {t2, K-1}, B = {b, K+1} We know that Q*B vanishes with the coefficients of degree K to 2K-2 of {A, 2K-1} */ { unsigned int m; m = ks_wrapmul (t, K + 1, b, K + 1, t2, K - 1, n); clear_list (t2, K - 1); /* coefficients of degree m..2K-2 wrap around, i.e. were subtracted to 0..2K-2-m */ if (m < 2 * K - 1) /* otherwise product is exact */ list_add (t, t, a + m, 2 * K - 1 - m); } else LIST_MULT_N (t, a + K, b, K, t + 2 * K - 1); #else list_mul_low (t, a + K, b, K, t + 2 * K - 1, n); #endif } /* now {t, K} contains the low K terms from Q*B */ list_sub (a, a, t, K); list_mod (a, a, K, n); return 0; } /* Puts in inv[0..l-1] the inverses of a[0..l-1] (mod n), using 3*(l-1) multiplies and one gcdext. Returns 1 if a factor was found (stored in t), 0 otherwise. */ int list_invert (listz_t inv, listz_t a, unsigned long l, mpz_t t, mpmod_t modulus) { unsigned long i; if (l == 0) return 0; mpz_set (inv[0], a[0]); for (i = 1; i < l; i++) { mpz_mul (t, inv[i-1], a[i]); mpz_mod (inv[i], t, modulus->orig_modulus); /* inv[i] = a[0]*...*a[i] */ } mpz_gcdext (t, inv[l-1], NULL, inv[l-1], modulus->orig_modulus); if (mpz_cmp_ui (t, 1) != 0) return 1; for (i = l-1; i > 0; i--) { mpz_mul (t, inv[i], inv[i-1]); /* t = (a[0]*...*a[i])^(-1) * (a[0]*...*a[i-1]) = a[i]^(-1) */ mpz_mul (inv[i-1], inv[i], a[i]); /* inv[i-1] = (a[0]*...*a[i])^(-1) * a[i] = (a[0]*...*a[i-1])^(-1) */ mpz_mod (inv[i-1], inv[i-1], modulus->orig_modulus); mpz_mod (inv[i], t, modulus->orig_modulus); } return 0; } ecm-6.4.4/ecm-params.h.armv5tel0000644023561000001540000000116512106741274013156 00000000000000/* those parameters were obtained on gcc50.fsffrance.org with ecm-6.3-rc3, gmp-5.0.1, and gcc 4.3.2 -O2 -pedantic -fomit-frame-pointer (armv5tel-unknown-linux-gnueabi) */ #define MPZMOD_THRESHOLD 140 #define REDC_THRESHOLD 512 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 16 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 512 #define PREREVERTDIVISION_NTT_THRESHOLD 16 #define POLYINVERT_NTT_THRESHOLD 128 #define POLYEVALT_NTT_THRESHOLD 256 #define MPZSPV_NORMALISE_STRIDE 128 ecm-6.4.4/build.vc10/0000755023561000001540000000000012113421641011135 500000000000000ecm-6.4.4/build.vc10/mul_fft-params.h.x64.amd0000644023561000001540000001121212106741270015325 00000000000000#define MUL_FFT_MODF_THRESHOLD 300 #define SQR_FFT_MODF_THRESHOLD 568 #define MUL_FFT_TABLE2 {{1, 4 /*66*/}, {401, 5 /*96*/}, {417, 4 /*98*/}, {433, 5 /*96*/}, {865, 6 /*96*/}, {897, 5 /*98*/}, {929, 6 /*96*/}, {2113, 7 /*97*/}, {2177, 6 /*98*/}, {2241, 7 /*97*/}, {2305, 6 /*98*/}, {2369, 7 /*97*/}, {3713, 8 /*93*/}, {3841, 7 /*98*/}, {4225, 8 /*94*/}, {4353, 7 /*98*/}, {4481, 8 /*94*/}, {4865, 7 /*98*/}, {4993, 8 /*95*/}, {6913, 9 /*87*/}, {7169, 8 /*96*/}, {7425, 9 /*93*/}, {7681, 8 /*96*/}, {8449, 9 /*94*/}, {8705, 8 /*97*/}, {8961, 9 /*90*/}, {9729, 8 /*97*/}, {9985, 9 /*90*/}, {11777, 8 /*97*/}, {12033, 9 /*92*/}, {13825, 10 /*87*/}, {14337, 9 /*96*/}, {17921, 10 /*90*/}, {19457, 9 /*97*/}, {20993, 10 /*87*/}, {21505, 9 /*97*/}, {22017, 10 /*91*/}, {23553, 9 /*97*/}, {26113, 10 /*92*/}, {31745, 9 /*98*/}, {32257, 10 /*88*/}, {44033, 11 /*91*/}, {47105, 10 /*97*/}, {56321, 11 /*87*/}, {63489, 10 /*98*/}, {70657, 11 /*87*/}, {71681, 10 /*98*/}, {72705, 11 /*90*/}, {79873, 10 /*98*/}, {80897, 11 /*83*/}, {81921, 10 /*96*/}, {82945, 11 /*85*/}, {96257, 10 /*98*/}, {97281, 12 /*75*/}, {98305, 10 /*97*/}, {101377, 12 /*78*/}, {102401, 11 /*91*/}, {110593, 12 /*87*/}, {126977, 11 /*98*/}, {161793, 12 /*83*/}, {192513, 11 /*98*/}, {194561, 13 /*75*/}, {253953, 12 /*98*/}, {258049, 11 /*99*/}, {276481, 12 /*85*/}, {282625, 11 /*96*/}, {284673, 12 /*87*/}, {389121, 11 /*99*/}, {391169, 13 /*75*/}, {434177, 12 /*95*/}, {438273, 13 /*84*/}, {516097, 12 /*99*/}, {585729, 11 /*99*/}, {620545, 13 /*79*/}, {630785, 12 /*96*/}, {651265, 13 /*83*/}, {778241, 12 /*99*/}, {782337, 11 /*99*/}, {817153, 12 /*96*/}, {819201, 14 /*79*/}, {1032193, 13 /*99*/}, {1040385, 11 /*99*/}, {1046529, 12 /*94*/}, {LONG_MAX, 0}} #define MUL_FFTM_TABLE2 {{1, 4 /*66*/}, {337, 5 /*95*/}, {353, 4 /*97*/}, {369, 5 /*96*/}, {385, 4 /*98*/}, {401, 5 /*96*/}, {801, 6 /*96*/}, {833, 5 /*98*/}, {865, 6 /*96*/}, {1729, 7 /*96*/}, {1793, 6 /*98*/}, {1857, 7 /*96*/}, {2049, 6 /*98*/}, {2113, 7 /*97*/}, {3841, 8 /*96*/}, {4097, 7 /*98*/}, {4225, 8 /*97*/}, {4609, 7 /*98*/}, {4737, 8 /*97*/}, {7169, 9 /*93*/}, {7681, 8 /*98*/}, {8449, 9 /*94*/}, {8705, 8 /*98*/}, {8961, 9 /*94*/}, {9217, 8 /*98*/}, {9473, 9 /*95*/}, {14849, 10 /*93*/}, {15361, 9 /*96*/}, {17921, 10 /*90*/}, {19457, 9 /*97*/}, {20481, 10 /*95*/}, {21505, 9 /*97*/}, {22017, 10 /*91*/}, {23553, 9 /*97*/}, {24065, 10 /*92*/}, {29697, 11 /*93*/}, {30721, 10 /*96*/}, {37889, 11 /*95*/}, {38913, 10 /*97*/}, {44033, 11 /*91*/}, {47105, 10 /*97*/}, {52225, 11 /*92*/}, {55297, 10 /*98*/}, {56321, 11 /*87*/}, {63489, 10 /*98*/}, {64513, 11 /*88*/}, {79873, 12 /*83*/}, {81921, 11 /*93*/}, {88065, 12 /*91*/}, {94209, 11 /*97*/}, {104449, 12 /*81*/}, {110593, 11 /*98*/}, {112641, 12 /*87*/}, {126977, 11 /*98*/}, {137217, 12 /*85*/}, {159745, 11 /*98*/}, {161793, 12 /*83*/}, {167937, 11 /*98*/}, {169985, 12 /*87*/}, {192513, 11 /*98*/}, {194561, 12 /*85*/}, {196609, 11 /*97*/}, {202753, 12 /*89*/}, {217089, 13 /*84*/}, {221185, 12 /*98*/}, {225281, 13 /*87*/}, {253953, 12 /*98*/}, {323585, 13 /*83*/}, {385025, 12 /*98*/}, {389121, 14 /*75*/}, {393217, 12 /*93*/}, {405505, 14 /*78*/}, {507905, 13 /*98*/}, {516097, 12 /*99*/}, {552961, 13 /*85*/}, {573441, 12 /*97*/}, {577537, 13 /*88*/}, {778241, 12 /*99*/}, {782337, 13 /*85*/}, {851969, 14 /*82*/}, {868353, 13 /*95*/}, {909313, 14 /*87*/}, {1032193, 13 /*99*/}, {LONG_MAX, 0}} #define MUL_FFT_FULL_TABLE2 {{16, 1}, {4224, 2}, {4416, 6}, {4480, 2}, {4608, 4}, {4640, 2}, {4800, 1}, {5120, 2}, {5184, 1}, {5632, 2}, {5760, 1}, {6656, 4}, {6720, 1}, {7168, 4}, {7360, 1}, {7936, 4}, {8000, 2}, {8064, 1}, {8704, 2}, {8832, 6}, {8960, 3}, {9216, 1}, {13312, 6}, {14336, 3}, {15360, 5}, {16896, 6}, {17920, 1}, {19968, 2}, {20736, 1}, {21504, 2}, {23808, 1}, {28672, 4}, {29440, 2}, {29952, 1}, {33792, 2}, {35328, 1}, {36864, 4}, {37120, 1}, {49152, 4}, {49920, 1}, {50176, 3}, {53248, 1}, {55296, 2}, {59904, 3}, {61440, 1}, {65536, 2}, {70656, 6}, {71680, 2}, {72192, 5}, {73728, 4}, {79360, 1}, {81920, 2}, {82944, 1}, {86016, 2}, {89088, 1}, {90112, 2}, {95232, 1}, {100352, 5}, {110592, 1}, {114688, 4}, {117760, 1}, {131072, 2}, {144384, 5}, {147456, 4}, {158720, 1}, {161792, 3}, {163840, 2}, {190464, 1}, {196608, 4}, {199680, 3}, {212992, 1}, {262144, 6}, {272384, 7}, {294912, 6}, {301056, 4}, {322560, 1}, {327680, 3}, {344064, 2}, {380928, 1}, {385024, 2}, {387072, 1}, {393216, 7}, {425984, 6}, {444416, 5}, {466944, 1}, {520192, 2}, {577536, 7}, {589824, 6}, {602112, 4}, {645120, 3}, {688128, 2}, {774144, 1}, {786432, 6}, {788480, 4}, {808960, 5}, {811008, 2}, {817152, 3}, {819200, 5}, {823296, 2}, {829440, 1}, {1048576, 2}, {1069056, 1}, {1073152, 5}, {1081344, 3}, {1089536, 2}, {LONG_MAX, 1}} ecm-6.4.4/build.vc10/ecm-params.h0000644023561000001540000000072412106741271013264 00000000000000#define MPZMOD_THRESHOLD 170 #define REDC_THRESHOLD 294 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 0, 0, 0, 0, 0, 1, 7, 8, 1, 1, 8, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 16, 16, 1, 1, 16, 1} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 11 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 11 #define MUL_NTT_THRESHOLD 1024 #define PREREVERTDIVISION_NTT_THRESHOLD 64 #define POLYINVERT_NTT_THRESHOLD 512 #define POLYEVALT_NTT_THRESHOLD 512 #define MPZSPV_NORMALISE_STRIDE 512 ecm-6.4.4/build.vc10/ecm-params.h.x64.amd0000644023561000001540000000122212106741271014436 00000000000000/* updated 03 Jan 2012 on frite.loria.fr (AMD Phenom(tm) II X2 B55 Processor) for ecm-6.4 with GMP 5.0.2 */ #define TUNE_MULREDC_THRESH 10 #define TUNE_SQRREDC_THRESH 1 #define MPZMOD_THRESHOLD 103 #define REDC_THRESHOLD 512 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 0, 8, 10, 11, 11, 12, 12, 12, 13, 14, 15, 16, 17, 18, 19, 16, 18, 18, 18, 20} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 12 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 256 #define PREREVERTDIVISION_NTT_THRESHOLD 16 #define POLYINVERT_NTT_THRESHOLD 512 #define POLYEVALT_NTT_THRESHOLD 128 #define MPZSPV_NORMALISE_STRIDE 512 ecm-6.4.4/build.vc10/tune/0000755023561000001540000000000012113421641012110 500000000000000ecm-6.4.4/build.vc10/tune/tune.vcxproj.filters0000644023561000001540000000724312106741270016102 00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hpp;hxx;hm;inl;inc;xsd {38f1a18f-40fc-4eed-a68e-e79b58327b6c} Source Files\Assembler Source Files\Assembler Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files ecm-6.4.4/build.vc10/tune/Makefile.in0000644023561000001540000002347212113353770014114 00000000000000# Makefile.in generated by automake 1.11.3 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = build.vc10/tune DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ ALLOCA = @ALLOCA@ AMTAR = @AMTAR@ AR = @AR@ ASMPATH = @ASMPATH@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCAS = @CCAS@ CCASDEPMODE = @CCASDEPMODE@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GMPLIB = @GMPLIB@ GREP = @GREP@ GSL_LD_FLAGS = @GSL_LD_FLAGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ M4 = @M4@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ POW_LIB = @POW_LIB@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VALGRIND = @VALGRIND@ VERSION = @VERSION@ XSLDIR = @XSLDIR@ XSLTPROC = @XSLTPROC@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ EXTRA_DIST = tune.vcxproj tune.vcxproj.filters all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu build.vc10/tune/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu build.vc10/tune/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs tags: TAGS TAGS: ctags: CTAGS CTAGS: distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile installdirs: install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ distclean distclean-generic distclean-libtool distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: ecm-6.4.4/build.vc10/tune/Makefile.am0000644023561000001540000000005712106741270014073 00000000000000EXTRA_DIST = tune.vcxproj tune.vcxproj.filters ecm-6.4.4/build.vc10/tune/tune.vcxproj0000644023561000001540000001735712106741270014442 00000000000000 Release Win32 Release x64 {80E08750-5C6C-492E-BB1E-7200978AE125} tune Win32Proj Application Unicode true Application NotSet <_ProjectFileVersion>10.0.30128.1 $(SolutionDir)..\bin\$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ false $(SolutionDir)..\bin\$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ false MaxSpeed true ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\;%(AdditionalIncludeDirectories) WIN32;NDEBUG;_CONSOLE;TUNE;%(PreprocessorDefinitions) MultiThreaded true Level3 ProgramDatabase ..\..\..\$(mp_dir)lib\$(IntDir)$(mp_lib);%(AdditionalDependencies) true Console true true MachineX86 X64 MaxSpeed true ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\;%(AdditionalIncludeDirectories) WIN32;_WIN64;NDEBUG;_CONSOLE;TUNE;%(PreprocessorDefinitions) MultiThreaded true Level3 ProgramDatabase ..\..\..\$(mp_dir)lib\$(IntDir)$(mp_lib);%(AdditionalDependencies) true Console true true MachineX64 _WIN64 TUNE_MULREDC_THRESH#0;TUNE_SQRREDC_THRESH#0;%(PreprocessorDefinitions) TUNE_MULREDC_THRESH#0;TUNE_SQRREDC_THRESH#0;%(PreprocessorDefinitions) {cd555681-d65b-4173-a29c-b8bf06a4871b} ecm-6.4.4/build.vc10/assembler/0000755023561000001540000000000012113421641013112 500000000000000ecm-6.4.4/build.vc10/assembler/test_mulredc.c0000644023561000001540000001441612106741270015703 00000000000000#include #include #include #include #include "asmredc.h" void mp_print(mp_limb_t *x, int N) { int i; for (i = 0; i < N-1; ++i) printf("%lu + W*(", x[i]); printf("%lu", x[N-1]); for (i = 0; i < N-1; ++i) printf(")"); printf("\n"); } static mp_limb_t call_mulredc (int N, mp_limb_t *z, mp_limb_t *x, mp_limb_t *y, mp_limb_t *m, mp_limb_t invm) { mp_limb_t cy; switch (N) { case 1: cy = mulredc1(z, x[0], y[0], m[0], invm); break; case 2: cy = mulredc2(z, x, y, m, invm); break; case 3: cy = mulredc3(z, x, y, m, invm); break; case 4: cy = mulredc4(z, x, y, m, invm); break; case 5: cy = mulredc5(z, x, y, m, invm); break; case 6: cy = mulredc6(z, x, y, m, invm); break; case 7: cy = mulredc7(z, x, y, m, invm); break; case 8: cy = mulredc8(z, x, y, m, invm); break; case 9: cy = mulredc9(z, x, y, m, invm); break; case 10: cy = mulredc10(z, x, y, m, invm); break; case 11: cy = mulredc11(z, x, y, m, invm); break; case 12: cy = mulredc12(z, x, y, m, invm); break; case 13: cy = mulredc13(z, x, y, m, invm); break; case 14: cy = mulredc14(z, x, y, m, invm); break; case 15: cy = mulredc15(z, x, y, m, invm); break; case 16: cy = mulredc16(z, x, y, m, invm); break; case 17: cy = mulredc17(z, x, y, m, invm); break; case 18: cy = mulredc18(z, x, y, m, invm); break; case 19: cy = mulredc19(z, x, y, m, invm); break; case 20: cy = mulredc20(z, x, y, m, invm); break; default: cy = mulredc20(z, x, y, m, invm); } return cy; } void test(mp_size_t N, int k) { mp_limb_t *x, *y, *yp, *z, *m, invm, cy, cy2, *tmp, *tmp2, *tmp3; int i, j; x = (mp_limb_t *) malloc(N*sizeof(mp_limb_t)); y = (mp_limb_t *) malloc(N*sizeof(mp_limb_t)); z = (mp_limb_t *) malloc((N+1)*sizeof(mp_limb_t)); m = (mp_limb_t *) malloc(N*sizeof(mp_limb_t)); tmp = (mp_limb_t *) malloc((2*N+2)*sizeof(mp_limb_t)); tmp2 = (mp_limb_t *) malloc((2*N+2)*sizeof(mp_limb_t)); tmp3 = (mp_limb_t *) malloc((2*N+2)*sizeof(mp_limb_t)); if (x == NULL || y == NULL || z == NULL || m == NULL || tmp == NULL || tmp2 == NULL || tmp3 == NULL) { fprintf (stderr, "Cannot allocate memory in test_mulredc\n"); exit (1); } mpn_random2(m, N); m[0] |= 1UL; if (m[N-1] == 0) m[N-1] = 1UL; invm = 1UL; for (i = 0; i < 10; ++i) invm = (2*invm-m[0]*invm*invm); invm = -invm; assert( (invm*m[0] +1UL) == 0UL); yp = y; for (i=0; i < k; ++i) { /* Try a few special cases */ if (i == 0) { /* Try all 0, product should be 0 */ for (j = 0; j < N; j++) x[j] = y[j] = 0; } else if (i == 1) { /* Try all 1 */ for (j = 0; j < N; j++) x[j] = y[j] = 1; } else if (i == 2) { /* Try all 2^wordsize - 1 */ for (j = 0; j < N; j++) x[j] = y[j] = ~(0UL); } else { /* In the other cases, try random data */ if (i % 2 == 0) { /* Try squaring */ mpn_random2(x, N); yp = x; } else { /* Try multiplication */ mpn_random2(x, N); mpn_random2(y, N); } } // Mul followed by ecm_redc3 mpn_mul_n(tmp, x, yp, N); ecm_redc3(tmp, m, N, invm); cy2 = mpn_add_n (tmp2, tmp + N, tmp, N); // Mixed mul and redc cy = call_mulredc (N, z, x, yp, m, invm); if (cy != cy2) printf ("i = %d: mulredc cy = %ld, mpn_mul_n/ecm_redc3 cy = %ld\n", i, (long) cy, (long) cy2); assert (cy == cy2); if (mpn_cmp(z,tmp2, N) != 0) { printf ("i = %d\nmulredc = ", i); for (j = N - 1; j >= 0; j--) printf ("%lx ", z[j]); printf ("\nmpn_mul_n/ecm_redc3 = "); for (j = N - 1; j >= 0; j--) printf ("%lx ", tmp2[j]); printf ("\n"); assert (mpn_cmp(z,tmp2, N) == 0); } if (cy) printf("!"); z[N] = cy; // Check with pure gmp : multiply by 2^(N*GMP_NUMB_BITS) and compare. for (j=0; j < N; ++j) { tmp[j] = 0; tmp[j+N] = z[j]; } tmp[2*N] = z[N]; mpn_tdiv_qr(tmp2, tmp3, 0, tmp, 2*N+1, m, N); for (j=0; j < N; ++j) z[j] = tmp3[j]; mpn_mul_n(tmp, x, yp, N); mpn_tdiv_qr(tmp2, tmp3, 0, tmp, 2*N, m, N); assert(mpn_cmp(z, tmp3, N) == 0); } free(tmp); free(tmp2); free(tmp3); free(x); free(y); free(z); free(m); } int main(int argc, char** argv) { int i, len; if (argc > 1) /* Test a specific length */ { len = atoi (argv[1]); for (i = 0; i < 1; i++) test (len, 1000000); return 0; } for (;;) { for (i = 1; i <= 20; ++i) { test(i, 1000); } #if 0 test(1, 1000); test(2, 1000); test(3, 1000); test(4, 1000); test(5, 1000); test(6, 1000); test(7, 1000); test(8, 1000); test(9, 1000); test(10, 1000); test(11, 1000); test(12, 1000); test(13, 100); test(14, 100); test(15, 100); test(16, 100); test(17, 100); test(18, 100); test(44, 10); test(45, 10); test(46, 10); test(47, 10); test(48, 10); test(49, 10); #endif printf("."); fflush(stdout); } #if 0 x[0] = 12580274668139321508UL; x[1] = 9205793975152560417UL; x[2] = 7857372727033793057UL; y[0] = 13688385828267279103UL; y[1] = 10575011835742767258UL; y[2] = 8802048318027595690UL; m[0] = 2981542467342508025UL; m[1] = 5964669706257742025UL; m[2] = 18446744073678090270UL; invm = 9419286575570128311UL; carry = mulredc(z, x, y, m, 3, invm); printf("%lu + 2^64*(%lu + 2^64*%lu), carry=%lu\n", z[0], z[1], z[2], carry); #endif return 0; } #if 0 W := 2^64; x0:= 12580274668139321508; x1:= 9205793975152560417; x2:= 7857372727033793057; x := x0 + W*(x1 + W*x2); y0:= 13688385828267279103; y1:= 10575011835742767258; y2:= 8802048318027595690; y := y0 + W*(y1 + W*y2); m0:= 2981542467342508025; m1:= 5964669706257742025; m2:= 18446744073678090270; m := m0 + W*(m1 + W*m2); invm := 9419286575570128311; #endif ecm-6.4.4/build.vc10/assembler/a_win32a_mulredc.asm0000644023561000001540000000627712106741270016673 00000000000000 ; Part of GMP-ECM ; ; mp_limb_t mulredc1( 1 limb ; mp_limb_t *z, ; const mp_limb_t x, ; const mp_limb_t y, ; const mp_limb_t m, ; mp_limb_t inv_m ; ) ; ; mp_limb_t mulredc( > 1 limb ; mp_limb_t *z, ; const mp_limb_t *x, ; const mp_limb_t *y, ; const mp_limb_t *m, ; mp_limb_t inv_m ; ) %macro mseq_1 3 mul ebp add [edi+4*%3], %2 mov %2, 0 adc %1, eax mov eax, [esi+4*%3+8] adc %2, edx %endmacro %macro mseq_2 3 mul ebp add [edi+3*%3], %1 mov %1, 0 adc %1, eax mov eax, [esi+4*%3+8] adc %2, edx %endmacro %macro mulredc 1 %assign limbs %1 %define f_name(x) _mulredc %+ x global f_name(limbs) %ifdef DLL export f_name(limbs) %endif f_name(limbs): push ebp push edi push esi push ebx sub esp, 8*(limbs+1) mov edi, esp %assign i 0 %rep 2 * limbs + 1 mov dword [edi+4*i], 0 %assign i i + 1 %endrep mov dword [esp+8*limbs+4], limbs ; align 32 .1: mov eax, [esp+8*limbs+32] mov esi, [esp+8*limbs+36] mov eax, [eax] mul dword [esi] add eax, [edi] mul dword [esp+8*limbs+44] mov ebp, eax mov esi, [esp+8*limbs+40] mov eax, [esi] mul ebp mov ebx, eax mov ecx, edx mov eax, [esi+4] %assign i 0 %rep limbs - 2 %if (i & 1) mseq_1 ebx, ecx, i %else mseq_1 ecx, ebx, i %endif %assign i i + 1 %endrep mul ebp %if (limbs & 1) add [edi+4*limbs-8], ecx adc eax, ebx %else add [edi+4*limbs-8], ebx adc eax, ecx %endif adc edx, 0 add [edi+4*limbs-4], eax adc edx, 0 add [edi+4*limbs], edx adc dword [edi+4*limbs+4], 0 mov eax, [esp+8*limbs+32] mov ebp, [eax] mov esi, [esp+8*limbs+36] mov eax, [esi] mul ebp mov ebx, eax mov ecx, edx mov eax, [esi+4] %assign i 0 %rep limbs - 2 %if (i & 1) mseq_1 ebx, ecx, i %else mseq_1 ecx, ebx, i %endif %assign i i + 1 %endrep mul ebp %if (limbs & 1) add [edi+4*limbs-8], ecx adc eax, ebx %else add [edi+4*limbs-8], ebx adc eax, ecx %endif adc edx, 0 add [edi+4*limbs-4], eax adc edx, 0 add [edi+4*limbs],edx adc dword [edi+4*limbs+4], 0 add dword [esp+8*limbs+32], 4 add edi, 4 dec dword [esp+8*limbs+4] jnz .1 mov ebx, [esp+8*limbs+28] %assign i 0 %rep limbs mov eax, [edi+4*i] mov [ebx+4*i], eax %assign i i + 1 %endrep mov eax, [edi+4*limbs] add esp, 8*(limbs+1) pop ebx pop esi pop edi pop ebp ret %endmacro text global _mulredc1 _mulredc1: mov eax, [esp+12] mul dword [esp+8] mov [esp+12], edx mov [esp+8], eax mul dword [esp+20] mul dword [esp+16] add eax, [esp+8] adc edx, [esp+12] mov ecx, [esp+4] mov [ecx], edx adc eax,0 ret %assign i 2 %rep 19 ; 3..20 inclusive mulredc i %assign i i + 1 %endrep end ecm-6.4.4/build.vc10/assembler/mulredc.h0000644023561000001540000000506512106741270014651 00000000000000#ifndef __ASM_REDC_H__ #define __ASM_REDC_H__ #include extern void ecm_redc3(mp_limb_t *cp, const mp_limb_t *np, mp_size_t nn, mp_limb_t Nprim); /* WARNING: the size-1 version doesn't take pointers in input */ extern mp_limb_t mulredc1(mp_limb_t *z, mp_limb_t x, mp_limb_t y, mp_limb_t m, mp_limb_t inv_m); extern mp_limb_t mulredc2(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc3(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc4(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc5(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc6(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc7(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc8(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc9(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc10(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc11(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc12(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc13(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc14(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc15(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc16(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc17(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc18(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc19(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); extern mp_limb_t mulredc20(mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, mp_limb_t inv_m); #endif ecm-6.4.4/build.vc10/assembler/mulredc.asm0000644023561000001540000000021712106741270015174 00000000000000 %ifdef _WIN64 %include "a_x64_mulredc.asm" %elif AMD_ASM %include "a_win32a_mulredc.asm" %else %include "a_win32p_mulredc.asm" %endif ecm-6.4.4/build.vc10/assembler/a_x64_mulredc.asm0000644023561000001540000001071112106741270016175 00000000000000; ; Part of GMP-ECM ; ; mp_limb_t mulredc1( MSVC 1 limb ; mp_limb_t *z, rcx ; const mp_limb_t x, rdx ; const mp_limb_t y, r8 ; const mp_limb_t m, r9 ; mp_limb_t inv_m [rsp+0x28] ; ) ; ; mp_limb_t mulredc( MSVC > 1 limb ; mp_limb_t *z, rcx ; const mp_limb_t *x, rdx ; const mp_limb_t *y, r8 ; const mp_limb_t *m, r9 ; mp_limb_t inv_m [rsp+0x28] ; ) %macro mseq_1 4 mov %2, rcx mul r14 add %1, rax mov rax, [r9+8*%3] adc %2, rdx mul r11 %if %3 < %4 - 1 add rax, %1 mov [rbp+8*(%3-1)], rax mov rax, [r8+8*(%3+1)] adc %2, rdx setc cl %else add %1, rax mov [rbp+8*(%3-1)], %1 adc %2, rdx mov [rbp+8*%3], %2 setc cl mov [rbp+8*(%3+1)], rcx %endif %endmacro %macro mseq_20 2 mov r14, [r13+r12*8] mov rax, [r8] mov %1, [rbp] mov %2, [rbp+8] mul r14 add r12, 1 add rax, %1 adc %2, rdx setc cl mov %1, rax imul rax, r10 mov r11, rax mul qword [r9] add %1, rax adc %2, rdx mov rax, [r8+8] %endmacro %macro mseq_2 4 mov %2, [rbp+8*(%3+1)] adc %2, rcx %if %3 < %4 - 1 setc cl %endif mul r14 add %1, rax mov rax, [r9+8*%3] adc %2, rdx %if %3 < %4 - 1 adc cl, 0 %else setc cl %endif mul r11 %if %3 < %4 - 1 add rax, %1 mov [rbp+8*(%3-1)], rax adc %2, rdx mov rax, [r8+8*(%3+1)] %else add %1, rax mov [rbp+8*(%3-1)], %1 adc %2, rdx mov [rbp+8*%3],%2 adc cl, 0 mov [rbp+8*(%3+1)], rcx %endif %endmacro %macro store 1 %assign i 0 %rep %1 %if i == %1 - 1 && (%1 & 1) mov rax, [rbp+8*i] mov [rdi+8*i], rax %elif (i & 1) mov [rdi+8*(i-1)], rax mov [rdi+8*i], rdx %else mov rax, [rbp+8*i] mov rdx, [rbp+8*(i+1)] %endif %assign i i + 1 %endrep %endmacro %macro mulredc 1 %assign limbs %1 %define f_name(x) mulredc %+ x %define stack_space 8 * (limbs + 1 + (limbs & 1)) global f_name(limbs) %ifdef DLL export f_name(limbs) %endif align 64 PROC_FRAME f_name(limbs) ; SEH Frame push_reg rbp push_reg rbx push_reg rsi push_reg rdi push_reg r12 push_reg r13 push_reg r14 alloc_stack stack_space END_PROLOGUE ; *y in r8 mov rdi, rcx ; *z -> rdi mov r13, rdx ; *x -> r13 mov r10, [rsp+8*12+stack_space] ; invm -> r10 ; *m in r9 mov r14, [r13] mov rax, [r8] xor rcx, rcx lea rbp, [rsp] mov r12, rcx mul qword r14 add r12, 1 mov rsi, rax mov rbx, rdx imul rax, r10 mov r11, rax mul qword [r9] add rsi, rax mov rax, [r8+8] adc rbx, rdx setc cl %assign j 1 %rep limbs - 1 %if (j & 1) mseq_1 rbx, rsi, j, limbs %else mseq_1 rsi, rbx, j, limbs %endif %assign j j + 1 %endrep align 32 .1: %assign j 1 %if (limbs & 1) mseq_20 rsi, rbx %rep limbs - 1 %if (j & 1) mseq_2 rbx, rsi, j, limbs %else mseq_2 rsi, rbx, j, limbs %endif %assign j j + 1 %endrep %else mseq_20 rbx, rsi %rep limbs - 1 %if (j & 1) mseq_2 rsi, rbx, j, limbs %else mseq_2 rbx, rsi, j, limbs %endif %assign j j + 1 %endrep %endif cmp r12, limbs jb .1 store limbs mov rax, rcx add rsp, stack_space pop r14 pop r13 pop r12 pop rdi pop rsi pop rbx pop rbp ret ENDPROC_FRAME %endmacro bits 64 section .text global mulredc1 %ifdef DLL export mulredc1 %endif align 64 mulredc1: mov rax, r8 mul rdx mov r10, rax mov r11, rdx mul qword [rsp+0x28] mul r9 add rax, r10 adc rdx, r11 mov [rcx], rdx adc rax, 0 ret %assign i 2 %rep 19 ; 2..20 inclusive mulredc i %assign i i + 1 %endrep end ecm-6.4.4/build.vc10/assembler/a_win32p_redc.asm0000644023561000001540000000523512106741270016165 00000000000000; ; Part of GMP-ECM ; ; void ecm_redc3( ; mp_limb_t *z, rdi r8 <- rcx ; const mp_limb_t *x, rsi r9 <- rdx ; size_t n, rdx r10 <- r8 ; mp_limb_t m rcx r11 <- r9 ; ) %macro rloop 3 mov eax, [byte esi+4*%3] mul ebp add [byte edi+4*%3], %2 adc %1, eax mov %2, edx adc %2, 0 %endmacro bits 32 section .text global _ecm_redc3 %ifdef DLL export _ecm_redc3 %endif _ecm_redc3: push ebp push edi push esi push ebx sub esp, 16 mov ecx, [esp+44] mov edi, [esp+36] mov [esp], ecx cmp ecx, 5 jae .unroll .1: mov ebp, [esp+48] mov esi, [esp+40] imul ebp, [edi] mov [esp+36], edi mov ecx, [esp+44] xor ebx, ebx .2: mov eax, [esi] add edi, 4 mul ebp add esi, 4 add eax, ebx adc edx, 0 add [edi-4], eax adc edx, 0 dec ecx mov ebx, edx jnz .2 mov edi, [esp+36] mov [edi], ebx dec dword [esp] lea edi, [edi+4] jnz .1 add esp, 16 pop ebx pop esi pop edi pop ebp ret .unroll: mov edx, ecx dec ecx sub edx, 2 neg ecx shr edx, 4 and ecx, 15 mov [esp+8], edx mov edx, ecx shl edx, 4 neg ecx lea edx, [edx+ecx*1+.loop_base] mov [esp+44], ecx mov [esp+12], edx .4: mov ebp, [esp+48] mov esi, [esp+40] imul ebp, [edi] mov [esp+36], edi mov ecx, [esp+44] mov edx, [esp+8] mov [esp+4], edx mov eax, [esi] lea esi, [esi+ecx*4+4] mul ebp lea edi, [edi+ecx*4] mov ebx, edx mov edx, [esp+12] test ecx, 1 mov ecx, eax cmovnz ecx, ebx cmovnz ebx, eax jmp edx align 32 .5: add edi, 64 .loop_base: rloop ebx, ecx, 0 rloop ecx, ebx, 1 rloop ebx, ecx, 2 rloop ecx, ebx, 3 rloop ebx, ecx, 4 rloop ecx, ebx, 5 rloop ebx, ecx, 6 rloop ecx, ebx, 7 rloop ebx, ecx, 8 rloop ecx, ebx, 9 rloop ebx, ecx, 10 rloop ecx, ebx, 11 rloop ebx, ecx, 12 rloop ecx, ebx, 13 rloop ebx, ecx, 14 rloop ecx, ebx, 15 dec dword [esp+4] lea esi, [esi+64] jns .5 add [edi+64], ecx mov edi, [esp+36] adc ebx, 0 mov [edi], ebx dec dword [esp] lea edi, [edi+4] jnz .4 add esp, 16 pop ebx pop esi pop edi pop ebp ret end ecm-6.4.4/build.vc10/assembler/redc.asm0000644023561000001540000000020412106741270014452 00000000000000%ifdef _WIN64 %include "a_x64_redc.asm" %elif AMD_ASM %include "a_win32a_redc.asm" %else %include "a_win32p_redc.asm" %endif ecm-6.4.4/build.vc10/assembler/Makefile.in0000644023561000001540000002374312113353770015117 00000000000000# Makefile.in generated by automake 1.11.3 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = build.vc10/assembler DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ ALLOCA = @ALLOCA@ AMTAR = @AMTAR@ AR = @AR@ ASMPATH = @ASMPATH@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCAS = @CCAS@ CCASDEPMODE = @CCASDEPMODE@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GMPLIB = @GMPLIB@ GREP = @GREP@ GSL_LD_FLAGS = @GSL_LD_FLAGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ M4 = @M4@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ POW_LIB = @POW_LIB@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VALGRIND = @VALGRIND@ VERSION = @VERSION@ XSLDIR = @XSLDIR@ XSLTPROC = @XSLTPROC@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ EXTRA_DIST = a_win32a_mulredc.asm a_win32a_redc.asm a_win32p_mulredc.asm \ a_win32p_redc.asm a_x64_mulredc.asm a_x64_redc.asm \ test_mulredc.c mulredc.h mulredc.asm redc.asm all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu build.vc10/assembler/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu build.vc10/assembler/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs tags: TAGS TAGS: ctags: CTAGS CTAGS: distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile installdirs: install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ distclean distclean-generic distclean-libtool distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: ecm-6.4.4/build.vc10/assembler/a_win32p_mulredc.asm0000644023561000001540000000472712106741270016710 00000000000000 ; Part of GMP-ECM ; ; mp_limb_t mulredc1( 1 limb ; mp_limb_t *z, ; const mp_limb_t x, ; const mp_limb_t y, ; const mp_limb_t m, ; mp_limb_t inv_m ; ) ; ; mp_limb_t mulredc( > 1 limb ; mp_limb_t *z, ; const mp_limb_t *x, ; const mp_limb_t *y, ; const mp_limb_t *m, ; mp_limb_t inv_m ; ) %macro mseq 1 movd mm1, [esi+4*%1] movd mm2, [edi+4*%1] pmuludq mm1, mm7 paddq mm2, mm1 paddq mm0, mm2 movd [edi+4*%1], mm0 psrlq mm0, 32 %endmacro %macro mulredc 1 %assign limbs %1 %define f_name(x) _mulredc %+ x global f_name(limbs) %ifdef DLL export f_name(limbs) %endif f_name(limbs): push ebp push edi push esi push ebx sub esp, 8*(limbs+1) mov edi, esp %assign i 0 %rep 2 * limbs + 1 mov dword [edi+4*i], 0 %assign i i + 1 %endrep mov dword [esp+8*limbs+4], limbs align 32 .1: mov eax, [esp+8*limbs+32] mov esi, [esp+8*limbs+36] mov eax, [eax] mul dword [esi] add eax, [edi] mul dword [esp+8*limbs+44] mov ebp, eax mov esi, [esp+8*limbs+40] pxor mm0, mm0 movd mm7, ebp %assign i 0 %rep limbs mseq i %assign i i + 1 %endrep movd ecx, mm0 add [edi+4*limbs], ecx adc dword [edi+4*limbs+4], 0 mov eax, [esp+8*limbs+32] mov ebp, [eax] mov esi, [esp+8*limbs+36] pxor mm0, mm0 movd mm7, ebp %assign i 0 %rep limbs mseq i %assign i i + 1 %endrep movd ecx, mm0 add [edi+4*limbs], ecx adc dword [edi+4*limbs+4], 0 add dword [esp+8*limbs+32], 4 add edi, 4 dec dword [esp+8*limbs+4] jnz .1 mov ebx, [esp+8*limbs+28] %assign i 0 %rep limbs mov eax, [edi+4*i] mov [ebx+4*i], eax %assign i i + 1 %endrep mov eax, [edi+4*limbs] add esp, 8*(limbs+1) pop ebx pop esi pop edi pop ebp emms ret %endmacro bits 32 section .text global _mulredc1 %ifdef DLL export _mulredc1 %endif _mulredc1: mov eax, [esp+12] mul dword [esp+8] mov [esp+12], edx mov [esp+8], eax mul dword [esp+20] mul dword [esp+16] add eax, [esp+8] adc edx, [esp+12] mov ecx, [esp+4] mov [ecx], edx adc eax, 0 ret %assign i 2 %rep 19 ; 3..20 inclusive mulredc i %assign i i + 1 %endrep end ecm-6.4.4/build.vc10/assembler/Makefile.am0000644023561000001540000000031012106741270015065 00000000000000EXTRA_DIST = a_win32a_mulredc.asm a_win32a_redc.asm a_win32p_mulredc.asm \ a_win32p_redc.asm a_x64_mulredc.asm a_x64_redc.asm \ test_mulredc.c mulredc.h mulredc.asm redc.asm ecm-6.4.4/build.vc10/assembler/a_x64_redc.asm0000644023561000001540000000557612106741270015474 00000000000000; ; Part of GMP-ECM ; ; void ecm_redc3( ; mp_limb_t *z, rdi r8 <- rcx ; const mp_limb_t *x, rsi r9 <- rdx ; size_t n, rdx r10 <- r8 ; mp_limb_t m rcx r11 <- r9 ; ) %macro rloop 3 mov rax,[byte rsi+8*%3] mul rbp add [byte rdi+8*%3], %1 adc %2, rax mov %1, rdx adc %1, 0 %endmacro bits 64 section .text global ecm_redc3 %ifdef DLL export ecm_redc3 %endif PROC_FRAME ecm_redc3 push_reg rbp push_reg rbx push_reg rsi push_reg rdi alloc_stack 5*8 END_PROLOGUE mov rdi, rcx mov rsi, rdx mov rdx, r8 mov rcx, r9 mov r8, rdi mov r9, rsi mov r10, rdx mov r11, rcx mov rcx, r10 mov [rsp], rcx cmp rcx, 3 jae .unroll .1: mov rbp, r11 mov rsi, r9 imul rbp, [rdi] mov r8, rdi mov rcx, r10 xor rbx, rbx .2: mov rax, [rsi] add rdi, 8 mul rbp add rsi, 8 add rax, rbx adc rdx, 0 add [rdi-8], rax adc rdx, 0 dec rcx mov rbx, rdx jnz .2 mov rdi, r8 mov [rdi], rbx dec qword [rsp] lea rdi, [rdi+8] jnz .1 add rsp, 5*8 pop rdi pop rsi pop rbx pop rbp ret .unroll: mov rdx, rcx dec rcx sub rdx, 2 neg rcx shr rdx, 4 and rcx, 15 mov [rsp+16], rdx mov rdx, rcx shl rdx, 4 lea r10, [.loop_base wrt rip] add rdx, r10 lea rdx, [rdx+rcx*4] add rdx, rcx neg rcx mov r10, rcx mov [rsp+24], rdx .4: mov rbp, r11 mov rsi, r9 imul rbp, [rdi] mov r8, rdi mov rcx, r10 mov rdx, [rsp+16] mov [rsp+8], rdx mov rax, [rsi] lea rsi, [rsi+rcx*8+8] mul rbp lea rdi, [rdi+rcx*8] mov rbx, rdx mov rdx, [rsp+24] test rcx, 1 mov rcx, rax cmovnz rcx, rbx cmovnz rbx, rax jmp rdx align 64 .5: add rdi, 128 .loop_base: rloop rcx, rbx, 0 rloop rbx, rcx, 1 rloop rcx, rbx, 2 rloop rbx, rcx, 3 rloop rcx, rbx, 4 rloop rbx, rcx, 5 rloop rcx, rbx, 6 rloop rbx, rcx, 7 rloop rcx, rbx, 8 rloop rbx, rcx, 9 rloop rcx, rbx, 10 rloop rbx, rcx, 11 rloop rcx, rbx, 12 rloop rbx, rcx, 13 rloop rcx, rbx, 14 rloop rbx, rcx, 15 dec qword [rsp+8] lea rsi, [rsi+128] jns .5 add [rdi+128], rcx mov rdi, r8 adc rbx, 0 mov [rdi], rbx dec qword [rsp] lea rdi, [rdi+8] jnz .4 add rsp, 5*8 pop rdi pop rsi pop rbx pop rbp ret ENDPROC_FRAME end ecm-6.4.4/build.vc10/assembler/a_win32a_redc.asm0000644023561000001540000000503512106741270016144 00000000000000; ; Part of GMP-ECM ; ; void ecm_redc3( ; mp_limb_t *z, rdi r8 <- rcx ; const mp_limb_t *x, rsi r9 <- rdx ; size_t n, rdx r10 <- r8 ; mp_limb_t m rcx r11 <- r9 ; ) %macro seq 3 mov eax, [byte esi+4*%3] mul ebp add [byte edi+4*%3], %2 adc %1, eax mov %2, edx adc %2, 0 %endmacro text global _ecm_redc3 _ecm_redc3: push ebp push edi push esi push ebx sub esp, 16 mov ecx, [esp+44] mov edi, [esp+36] mov [esp], ecx cmp ecx, 5 jae .3 .1: mov ebp, [esp+48] mov esi, [esp+40] imul ebp, [edi] mov [esp+36], edi mov ecx, [esp+44] xor ebx, ebx .2: mov eax, [esi] add edi, 4 mul ebp add esi, 4 add eax, ebx adc edx, 0 add [edi-4], eax adc edx, 0 dec ecx mov ebx, edx jnz .2 mov edi, [esp+36] mov [edi], ebx dec dword [esp] lea edi, [edi+4] jnz .1 add esp, 16 pop ebx pop esi pop edi pop ebp ret .3: mov edx, ecx dec ecx sub edx, 2 neg ecx shr edx, 4 and ecx, 15 mov [esp+8], edx mov edx, ecx shl edx, 4 neg ecx lea edx, [edx+ecx+.6] mov [esp+44], ecx mov [esp+12], edx .4: mov ebp, [esp+48] mov esi, [esp+40] imul ebp, [edi] mov [esp+36], edi mov ecx, [esp+44] mov edx, [esp+8] mov [esp+4], edx mov eax, [esi] lea esi, [esi+ecx*4+4] mul ebp lea edi, [edi+ecx*4] mov ebx, edx mov edx, [esp+12] test ecx, 1 mov ecx, eax cmovnz ecx, ebx cmovnz ebx, eax jmp edx align 32 .5: add edi, 64 .6: %assign i 0 %rep 16 %if (i & 1) seq ecx, ebx, i %else seq ebx, ecx, i %endif %assign i i + 1 %endrep dec dword [esp+4] lea esi, [esi+64] jns .5 add [edi+64], ecx mov edi, [esp+36] adc ebx, 0 mov [edi], ebx dec dword [esp] lea edi, [edi+4] jnz .4 add esp, 16 pop ebx pop esi pop edi pop ebp ret end ecm-6.4.4/build.vc10/mul_fft-params.h.win32.amd0000644023561000001540000000017512106741271015655 00000000000000/* Empty file so that #include won't produce an error message. With no parameters defined, mul_fft.c will use defaults. */ecm-6.4.4/build.vc10/ecm/0000755023561000001540000000000012113421641011701 500000000000000ecm-6.4.4/build.vc10/ecm/ecm.vcxproj.filters0000644023561000001540000000371012106741270015457 00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hpp;hxx;hm;inl;inc;xsd Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Header Files Header Files Header Files Header Files Header Files Header Files ecm-6.4.4/build.vc10/ecm/Makefile.in0000644023561000001540000002346512113353770013707 00000000000000# Makefile.in generated by automake 1.11.3 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = build.vc10/ecm DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ ALLOCA = @ALLOCA@ AMTAR = @AMTAR@ AR = @AR@ ASMPATH = @ASMPATH@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCAS = @CCAS@ CCASDEPMODE = @CCASDEPMODE@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GMPLIB = @GMPLIB@ GREP = @GREP@ GSL_LD_FLAGS = @GSL_LD_FLAGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ M4 = @M4@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ POW_LIB = @POW_LIB@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VALGRIND = @VALGRIND@ VERSION = @VERSION@ XSLDIR = @XSLDIR@ XSLTPROC = @XSLTPROC@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ EXTRA_DIST = ecm.vcxproj ecm.vcxproj.filters all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu build.vc10/ecm/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu build.vc10/ecm/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs tags: TAGS TAGS: ctags: CTAGS CTAGS: distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile installdirs: install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ distclean distclean-generic distclean-libtool distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: ecm-6.4.4/build.vc10/ecm/ecm.vcxproj0000644023561000001540000003056712106741270014022 00000000000000 Debug Win32 Debug x64 Release Win32 Release x64 {C0E2EA85-996A-4B5F-AD30-590FAF5B7187} ecm Win32Proj Application MultiByte Application MultiByte Application MultiByte Application MultiByte <_ProjectFileVersion>10.0.30128.1 $(SolutionDir)..\bin\$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ false $(SolutionDir)..\bin\$(Platform)\Release\ $(Platform)\Release\ false $(SolutionDir)..\bin\$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ true $(SolutionDir)..\bin\$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ true Full true Speed ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\;%(AdditionalIncludeDirectories) WIN32;NDEBUG;_CONSOLE;OUTSIDE_LIBECM;%(PreprocessorDefinitions) MultiThreaded Level3 ProgramDatabase Default true ..\..\..\$(mp_dir)lib\$(IntDir)$(mp_lib);advapi32.lib;ws2_32.lib true Console true true false MachineX86 X64 Full true Speed ..\..\..\$(mp_dir)lib\$(IntDir);%(AdditionalIncludeDirectories) WIN32;_WIN64;NDEBUG;_CONSOLE;OUTSIDE_LIBECM;%(PreprocessorDefinitions) MultiThreaded Level3 ProgramDatabase Default true ws2_32.lib;..\..\..\$(mp_dir)lib\$(Platform)\release\$(mp_lib);%(AdditionalDependencies) true Console true true false MachineX64 8388608 65536 Disabled ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\;%(AdditionalIncludeDirectories) WIN32;_DEBUG;_CONSOLE;OUTSIDE_LIBECM;%(PreprocessorDefinitions) true EnableFastChecks MultiThreadedDebug Level3 EditAndContinue Default true ..\..\..\$(mp_dir)lib\$(IntDir)$(mp_lib);advapi32.lib;ws2_32.lib true Console false MachineX86 X64 Disabled ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\;%(AdditionalIncludeDirectories) WIN32;_WIN64;_DEBUG;_CONSOLE;OUTSIDE_LIBECM;%(PreprocessorDefinitions) true EnableFastChecks MultiThreadedDebug Level3 ProgramDatabase Default true ..\..\..\$(mp_dir)lib\$(IntDir)$(mp_lib);advapi32.lib;ws2_32.lib true Console false MachineX64 8388608 65536 ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\;%(AdditionalIncludeDirectories) ..\..\..\$(mp_dir)lib\$(IntDir)$(mp_lib);advapi32.lib;ws2_32.lib {cd555681-d65b-4173-a29c-b8bf06a4871b} false ecm-6.4.4/build.vc10/ecm/Makefile.am0000644023561000001540000000005512106741270013662 00000000000000EXTRA_DIST = ecm.vcxproj ecm.vcxproj.filters ecm-6.4.4/build.vc10/mul_fft-params.h.win32.intel0000644023561000001540000001073112106741271016226 00000000000000#define MUL_FFT_MODF_THRESHOLD 480 #define SQR_FFT_MODF_THRESHOLD 480 #define MUL_FFT_TABLE2 {{1, 4 /*66*/}, {305, 5 /*95*/}, {321, 4 /*97*/}, {337, 5 /*95*/}, {353, 4 /*97*/}, {369, 5 /*96*/}, {801, 6 /*96*/}, {1281, 7 /*91*/}, {1409, 6 /*97*/}, {1601, 7 /*92*/}, {1921, 6 /*98*/}, {1985, 7 /*94*/}, {2689, 8 /*91*/}, {2817, 7 /*95*/}, {3201, 8 /*92*/}, {3329, 7 /*96*/}, {3457, 8 /*87*/}, {3841, 7 /*96*/}, {3969, 8 /*88*/}, {4865, 7 /*97*/}, {4993, 8 /*90*/}, {6913, 9 /*87*/}, {7681, 8 /*96*/}, {8961, 9 /*90*/}, {9729, 8 /*97*/}, {9985, 9 /*83*/}, {11777, 8 /*97*/}, {12033, 9 /*85*/}, {13825, 10 /*87*/}, {15361, 9 /*96*/}, {15873, 8 /*98*/}, {16129, 9 /*88*/}, {19969, 10 /*83*/}, {23553, 9 /*97*/}, {26113, 10 /*81*/}, {31745, 9 /*98*/}, {34305, 10 /*85*/}, {39937, 9 /*98*/}, {40449, 10 /*83*/}, {48129, 11 /*75*/}, {63489, 10 /*98*/}, {80897, 11 /*83*/}, {96257, 12 /*75*/}, {126977, 11 /*98*/}, {129025, 9 /*98*/}, {130561, 11 /*80*/}, {194561, 12 /*75*/}, {258049, 10 /*98*/}, {261121, 9 /*99*/}, {261633, 10 /*94*/}, {277505, 9 /*99*/}, {278017, 10 /*94*/}, {293889, 9 /*99*/}, {294401, 7 /*99*/}, {294529, 8 /*99*/}, {294657, 10 /*94*/}, {310273, 9 /*99*/}, {310785, 10 /*95*/}, {326657, 12 /*83*/}, {389121, 13 /*75*/}, {516097, 11 /*98*/}, {522241, 10 /*99*/}, {523265, 11 /*94*/}, {587777, 10 /*99*/}, {588801, 11 /*94*/}, {620545, 10 /*99*/}, {621569, 9 /*99*/}, {622081, 11 /*95*/}, {653313, 10 /*99*/}, {662529, 11 /*96*/}, {686081, 10 /*99*/}, {687105, 9 /*99*/}, {687617, 11 /*95*/}, {718849, 10 /*99*/}, {752641, 9 /*99*/}, {753153, 11 /*95*/}, {784385, 10 /*99*/}, {818177, 9 /*99*/}, {818689, 11 /*96*/}, {849921, 10 /*99*/}, {850945, 11 /*96*/}, {882689, 10 /*99*/}, {883713, 9 /*99*/}, {884225, 11 /*96*/}, {980993, 10 /*99*/}, {982017, 12 /*93*/}, {LONG_MAX, 0}} #define MUL_FFTM_TABLE2 {{1, 4 /*66*/}, {273, 5 /*94*/}, {289, 4 /*97*/}, {305, 5 /*95*/}, {609, 6 /*95*/}, {641, 5 /*97*/}, {673, 6 /*95*/}, {705, 5 /*97*/}, {737, 6 /*96*/}, {1473, 7 /*96*/}, {1537, 6 /*98*/}, {1601, 7 /*96*/}, {1665, 6 /*98*/}, {1729, 7 /*96*/}, {2689, 8 /*91*/}, {2817, 7 /*97*/}, {2945, 8 /*92*/}, {3329, 7 /*98*/}, {3457, 8 /*93*/}, {5377, 9 /*91*/}, {5633, 8 /*95*/}, {6401, 9 /*92*/}, {6657, 8 /*96*/}, {6913, 9 /*87*/}, {7681, 8 /*96*/}, {7937, 9 /*88*/}, {8705, 8 /*97*/}, {8961, 9 /*90*/}, {13825, 10 /*87*/}, {15361, 9 /*96*/}, {17921, 10 /*90*/}, {19457, 9 /*97*/}, {19969, 10 /*83*/}, {23553, 9 /*97*/}, {24065, 10 /*85*/}, {27649, 11 /*87*/}, {30721, 10 /*96*/}, {31745, 9 /*98*/}, {32257, 10 /*88*/}, {39937, 11 /*83*/}, {47105, 10 /*97*/}, {48129, 12 /*75*/}, {61441, 11 /*96*/}, {63489, 10 /*98*/}, {68609, 11 /*85*/}, {79873, 10 /*98*/}, {80897, 11 /*83*/}, {96257, 12 /*75*/}, {126977, 11 /*98*/}, {161793, 12 /*83*/}, {192513, 13 /*75*/}, {253953, 12 /*98*/}, {258049, 10 /*98*/}, {261121, 9 /*99*/}, {261633, 10 /*94*/}, {277505, 12 /*85*/}, {323585, 10 /*99*/}, {326657, 9 /*99*/}, {327169, 10 /*95*/}, {330753, 12 /*84*/}, {389121, 10 /*99*/}, {392193, 9 /*99*/}, {392705, 10 /*96*/}, {408577, 9 /*99*/}, {409089, 8 /*99*/}, {409345, 10 /*96*/}, {412673, 12 /*90*/}, {454657, 13 /*87*/}, {516097, 11 /*98*/}, {522241, 10 /*99*/}, {523265, 11 /*94*/}, {555009, 10 /*99*/}, {556033, 9 /*99*/}, {556545, 11 /*94*/}, {587777, 10 /*99*/}, {588801, 11 /*94*/}, {620545, 10 /*99*/}, {621569, 9 /*99*/}, {622081, 11 /*95*/}, {653313, 10 /*99*/}, {654337, 11 /*95*/}, {686081, 13 /*87*/}, {778241, 11 /*99*/}, {817153, 10 /*99*/}, {818177, 9 /*99*/}, {818689, 11 /*96*/}, {849921, 10 /*99*/}, {850945, 11 /*96*/}, {882689, 10 /*99*/}, {883713, 9 /*99*/}, {884225, 11 /*96*/}, {915457, 12 /*93*/}, {978945, 14 /*93*/}, {LONG_MAX, 0}} #define MUL_FFT_FULL_TABLE2 {{100, 2}, {216, 1}, {256, 2}, {264, 1}, {304, 2}, {312, 1}, {544, 4}, {560, 1}, {704, 2}, {720, 1}, {896, 2}, {960, 7}, {40960, 2}, {47616, 1}, {49152, 6}, {53760, 4}, {56320, 1}, {64512, 4}, {71680, 5}, {86016, 2}, {96768, 4}, {99840, 1}, {131072, 6}, {136192, 7}, {147456, 6}, {150528, 4}, {161280, 1}, {161792, 3}, {172032, 2}, {193536, 1}, {259072, 6}, {286720, 7}, {294912, 6}, {301056, 4}, {322560, 3}, {344064, 2}, {387072, 1}, {393216, 4}, {404480, 3}, {409600, 1}, {417792, 3}, {425984, 1}, {524288, 6}, {530432, 7}, {557056, 6}, {566272, 5}, {577536, 4}, {593920, 6}, {602112, 5}, {614400, 4}, {645120, 3}, {647168, 4}, {652800, 1}, {654336, 6}, {673792, 3}, {688128, 2}, {724992, 4}, {727040, 1}, {753664, 2}, {783360, 4}, {816640, 6}, {831488, 1}, {851968, 2}, {860160, 3}, {868352, 2}, {881664, 7}, {884736, 1}, {921600, 7}, {950272, 1}, {LONG_MAX, 1}} ecm-6.4.4/build.vc10/vsyasm.targets0000644023561000001540000001046312106741270014003 00000000000000 _YASM $(MSBuildThisFileDirectory)$(MSBuildThisFileName).xml @(YASM, '|') $(ComputeLinkInputsTargets); ComputeYASMOutput; $(ComputeLibInputsTargets); ComputeYASMOutput; ecm-6.4.4/build.vc10/ecm-params.h.win32.amd0000644023561000001540000000115012106741271014757 00000000000000/* this is the parameter file for Opteron */ #define MPZMOD_THRESHOLD 170 #define REDC_THRESHOLD 294 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 0, 0, 0, 0, 0, 1, 7, 8, 1, 1, 8, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 16, 16, 1, 1, 16, 1} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 11 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 11 #define SPV_NTT_GFP_DIF_RECURSIVE_THRESHOLD 32768 #define SPV_NTT_GFP_DIT_RECURSIVE_THRESHOLD 32768 #define MUL_NTT_THRESHOLD 1024 #define PREREVERTDIVISION_NTT_THRESHOLD 64 #define POLYINVERT_NTT_THRESHOLD 512 #define POLYEVALT_NTT_THRESHOLD 512 #define MPZSPV_NORMALISE_STRIDE 512 ecm-6.4.4/build.vc10/libecm/0000755023561000001540000000000012113421641012370 500000000000000ecm-6.4.4/build.vc10/libecm/libecm.vcxproj0000644023561000001540000002754712106741270015204 00000000000000 Debug Win32 Debug x64 Release Win32 Release x64 {CD555681-D65B-4173-A29C-B8BF06A4871B} libecm Win32Proj StaticLibrary MultiByte StaticLibrary MultiByte StaticLibrary MultiByte StaticLibrary MultiByte Static <_ProjectFileVersion>10.0.30128.1 $(SolutionDir)..\lib\$(Platform)\$(Configuration)\ $(SolutionDir)..\lib\$(Platform)\$(Configuration)\ $(SolutionDir)..\lib\$(Platform)\$(Configuration)\ $(SolutionDir)..\lib\$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ ecmlib $(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ cd $(SolutionDir) call file_copy ecm-params.h.win32.intel ..\ecm-params.h call file_copy mul_fft-params.h.win32.intel ..\mul_fft-params.h call file_copy config.h ..\config.h Full true ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\;%(AdditionalIncludeDirectories) WIN32;NDEBUG;_LIB;SSE2;USE_ASM_REDC;%(PreprocessorDefinitions) MultiThreaded Level3 Default cd $(SolutionDir) call file_copy ecm-params.h.x64.intel ..\ecm-params.h call file_copy mul_fft-params.h.x64.intel ..\mul_fft-params.h call file_copy config.h ..\config.h X64 Full true ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\;%(AdditionalIncludeDirectories) WIN32;_WIN64;NDEBUG;_LIB;USE_ASM_REDC;%(PreprocessorDefinitions) MultiThreaded Level3 Default _WIN64 cd $(SolutionDir) call file_copy ecm-params.h.win32.intel ..\ecm-params.h call file_copy mul_fft-params.h.win32.intel ..\mul_fft-params.h call file_copy config.h ..\config.h Disabled ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\;%(AdditionalIncludeDirectories) WIN32;_DEBUG;_LIB;SSE2;USE_ASM_REDC;%(PreprocessorDefinitions) true EnableFastChecks MultiThreadedDebug Level3 Default cd $(SolutionDir) call file_copy ecm-params.h.x64.intel ..\ecm-params.h call file_copy mul_fft-params.h.x64.intel ..\mul_fft-params.h call file_copy config.h ..\config.h X64 Disabled ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\;%(AdditionalIncludeDirectories) WIN32;_WIN64;_DEBUG;_LIB;USE_ASM_REDC;%(PreprocessorDefinitions) true EnableFastChecks MultiThreadedDebug Level3 Default _WIN64 MaxSpeed Full Full Full Full ecm-6.4.4/build.vc10/libecm/libecm.vcxproj.filters0000644023561000001540000001175712106741270016647 00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hpp;hxx;hm;inl;inc;xsd {2f18179f-5dba-420c-8dc7-bc7f8228a1b2} Source Files\Assembler Source Files\Assembler Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files ecm-6.4.4/build.vc10/libecm/Makefile.in0000644023561000001540000002350412113353770014370 00000000000000# Makefile.in generated by automake 1.11.3 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = build.vc10/libecm DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ ALLOCA = @ALLOCA@ AMTAR = @AMTAR@ AR = @AR@ ASMPATH = @ASMPATH@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCAS = @CCAS@ CCASDEPMODE = @CCASDEPMODE@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GMPLIB = @GMPLIB@ GREP = @GREP@ GSL_LD_FLAGS = @GSL_LD_FLAGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ M4 = @M4@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ POW_LIB = @POW_LIB@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VALGRIND = @VALGRIND@ VERSION = @VERSION@ XSLDIR = @XSLDIR@ XSLTPROC = @XSLTPROC@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ EXTRA_DIST = libecm.vcxproj libecm.vcxproj.filters all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu build.vc10/libecm/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu build.vc10/libecm/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs tags: TAGS TAGS: ctags: CTAGS CTAGS: distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile installdirs: install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ distclean distclean-generic distclean-libtool distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: ecm-6.4.4/build.vc10/libecm/Makefile.am0000644023561000001540000000006512106741270014352 00000000000000EXTRA_DIST = libecm.vcxproj libecm.vcxproj.filters ecm-6.4.4/build.vc10/readme.txt0000644023561000001540000001074412106741271013067 00000000000000 Building GMP-ECM with Microsoft Visual C++ 2010 (version 10) =========================================================== If you wish to build the assembler code support you will need to install the YASM assembler that is available at: http://www.tortall.net/projects/yasm/ THe version you need is vsyasm, which should be put it in the same directory as your Visual C++ compiler, which is typically: C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin The Multi-Precision Library - GMP and MPIR ========================================== GMP-ECM works with either GMP or MPIR, a fork of GMP. To build and run GMP-ECM using Visual Studio you first need to obtain and build either GMP or MPIR. MPIR has a fully integrated Visual Studio build system for Windows but GMP does not. The VC++ build of GMP-ECM now defaults to MPIR but the property sheet mp_lib.vsprops can be edited to set the macro mp_lib to 'gmp' instead of 'mpir' to build ECM using GMP. GMP === GMP can be built from the GMP source code available here: http://gmplib.org/ using the Visual Studio build files I provide here: http://www.gladman.me.uk/computing/gmp4win.php But these are based on GMP 4.2.x and are no longer being maintained. GMP 4.3.x can be built using cygwin or mingw for win32 and it is reported that the resulting libraries work with Visual Studio when appropriately renamed. It may also be possible to build the generic C version of GMP for 64-bit Windows systems using mingw64. But this version will be fairly slow because it cannot use the fast assembler normally used by GMP because this is not available in Windows format. MPIR ==== MPIR is available here: http://www.mpir.org It has full support for building MPIR for 32 and 64 bit Windows systems with x86 assembler support using the YASM assembler. In particular it includes fast assembler code for modern AMD and Intel architectures running in 64-bit mode on Windows (not available in GMP). Building GMP-ECM ================ The build files for GMP-ECM assume that the GMP and ECM build directories are in a common parent directory as follows: Parent Directory MPIR (or GMP) build.vc10 -- MPIR (or GMP) build files ... GMP-ECM buid.vc10 -- ECM build files The root directories for GMP and GMP-ECM are assumed to have these names irrespective of which version is being used (they used to be followed by version numbers but this meant that the build projects had to be updated too frequently). There are three build projects in build.vc10: ecm - the ECM application ecmlib - the ECM library tune - a program for tuning Before starting a build, these two files ecm-params.h mul_fft-params.h to set the tuning parameters that should be used in the build. Select the tuning include files by changing the appropriate '#elif 0' to '#elif 1'. If you wish to use the win32 AMD assembler files, you also have to use the Visual Studio property page to define AMD_ASM (althernively you can eidt mulredc.asm and redc.asm in the build.vc10\assembler\ directory to include the AMD assembler). When a version of ecm and ecmlib are built the library and the application are put in the directory matching the configuration that has been built: GMP-ECM build.vc10 -- ECM build files lib -- ECM static library files dll -- ECM dynamic library files bin -- ECM executable files within these lib, dll and bin directories, the outputs are located in sub-directories determined by the platform and configuration: win32\release win32\debug x64\release x64\debug If you don't want assembler support you need to change the define: #define NATIVE_REDC 1 in config.h (in the build.vc10 subdirectory) to: #undef NATIVE_REDC Tune ==== If tune is compiled and run for a particular configuration it will output a file with appropriate parameters for this configuration with a name suuch as: ecm-params.h.win32.amd.new To use this file when building ecm and ecmlib, remove the '.new' extension and add a reference to it in the ecm-param.h file in the build.vc10 directory. Tests ===== The file tests.py is a python script that runs the ECM tests. It runs the x64/release-amd version by default but can be edited to test other builds. Brian Gladman, 3rd January 2012 ecm-6.4.4/build.vc10/getrusage.h0000644023561000001540000000271412106741271013226 00000000000000 #ifndef _GETRUSAGE_H #define _GETRUSAGE_H #if defined(__cplusplus) extern "C" { #endif #define ENODATA 61 #define RUSAGE_SELF 0 #define RUSAGE_CHILDREN -1 typedef struct { long tv_sec; long tv_usec; } tval; typedef struct rusage { tval ru_utime; /* user time used */ tval ru_stime; /* system time used */ long ru_maxrss; /* integral max resident set size */ long ru_ixrss; /* integral shared text memory size */ long ru_idrss; /* integral unshared data size */ long ru_isrss; /* integral unshared stack size */ long ru_minflt; /* page reclaims */ long ru_majflt; /* page faults */ long ru_nswap; /* swaps */ long ru_inblock; /* block input operations */ long ru_oublock; /* block output operations */ long ru_msgsnd; /* messages sent */ long ru_msgrcv; /* messages received */ long ru_nsignals;/* signals received */ long ru_nvcsw; /* voluntary context switches */ long ru_nivcsw; /* involuntary context switches */ } rusage; int getrusage(int who, rusage *usage); #if defined(__cplusplus) } #endif #endif ecm-6.4.4/build.vc10/vsyasm.props0000644023561000001540000000231712106741270013474 00000000000000 Midl CustomBuild _SelectedFiles;$(YASMDependsOn) C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\bin\ False $(IntDir) 0 0 "$(YasmPath)"vsyasm.exe -Xvc -f $(Platform) [AllOptions] [AdditionalOptions] [Inputs] %(ObjectFile) Assembling %(Filename)%(Extension) false ecm-6.4.4/build.vc10/Makefile.in0000644023561000001540000004244212113353767013145 00000000000000# Makefile.in generated by automake 1.11.3 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = build.vc10 SUBDIRS = DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ html-recursive info-recursive install-data-recursive \ install-dvi-recursive install-exec-recursive \ install-html-recursive install-info-recursive \ install-pdf-recursive install-ps-recursive install-recursive \ installcheck-recursive installdirs-recursive pdf-recursive \ ps-recursive uninstall-recursive RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \ distdir ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" ACLOCAL = @ACLOCAL@ ALLOCA = @ALLOCA@ AMTAR = @AMTAR@ AR = @AR@ ASMPATH = @ASMPATH@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCAS = @CCAS@ CCASDEPMODE = @CCASDEPMODE@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GMPLIB = @GMPLIB@ GREP = @GREP@ GSL_LD_FLAGS = @GSL_LD_FLAGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ M4 = @M4@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ POW_LIB = @POW_LIB@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VALGRIND = @VALGRIND@ VERSION = @VERSION@ XSLDIR = @XSLDIR@ XSLTPROC = @XSLTPROC@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ EXTRA_DIST = config.h ecm-params.h ecm-params.h.win32.amd \ ecm-params.h.win32.intel ecm-params.h.x64.amd \ ecm-params.h.x64.intel ecm.sln file_copy.bat \ mp_lib.props mul_fft-params.h.win32.amd \ mul_fft-params.h.win32.intel mul_fft-params.h.x64.amd \ mul_fft-params.h.x64.intel readme.txt tests.py \ vsyasm.props vsyasm.targets vsyasm.xml getrusage.h DIST_SUBDIRS = assembler ecm libecm tune bench_mulredc all: all-recursive .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu build.vc10/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu build.vc10/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs # This directory's subdirectories are mostly independent; you can cd # into them and run `make' without going through this Makefile. # To change the values of `make' variables: instead of editing Makefiles, # (1) if the variable is set in `config.status', edit `config.status' # (which will cause the Makefiles to be regenerated when you run `make'); # (2) otherwise, pass the desired values on the `make' command line. $(RECURSIVE_TARGETS): @fail= failcom='exit 1'; \ for f in x $$MAKEFLAGS; do \ case $$f in \ *=* | --[!k]*);; \ *k*) failcom='fail=yes';; \ esac; \ done; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ list='$(SUBDIRS)'; for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" $(RECURSIVE_CLEAN_TARGETS): @fail= failcom='exit 1'; \ for f in x $$MAKEFLAGS; do \ case $$f in \ *=* | --[!k]*);; \ *k*) failcom='fail=yes';; \ esac; \ done; \ dot_seen=no; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ rev=''; for subdir in $$list; do \ if test "$$subdir" = "."; then :; else \ rev="$$subdir $$rev"; \ fi; \ done; \ rev="$$rev ."; \ target=`echo $@ | sed s/-recursive//`; \ for subdir in $$rev; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done && test -z "$$fail" tags-recursive: list='$(SUBDIRS)'; for subdir in $$list; do \ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ done ctags-recursive: list='$(SUBDIRS)'; for subdir in $$list; do \ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ done ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ mkid -fID $$unique tags: TAGS TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: CTAGS CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done check-am: all-am check: check-recursive all-am: Makefile installdirs: installdirs-recursive installdirs-am: install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: .MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \ install-am install-strip tags-recursive .PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ all all-am check check-am clean clean-generic clean-libtool \ ctags ctags-recursive distclean distclean-generic \ distclean-libtool distclean-tags distdir dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am install-exec \ install-exec-am install-html install-html-am install-info \ install-info-am install-man install-pdf install-pdf-am \ install-ps install-ps-am install-strip installcheck \ installcheck-am installdirs installdirs-am maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \ uninstall uninstall-am # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: ecm-6.4.4/build.vc10/bench_mulredc/0000755023561000001540000000000012113421642013730 500000000000000ecm-6.4.4/build.vc10/bench_mulredc/Makefile.in0000644023561000001540000002354712113353770015736 00000000000000# Makefile.in generated by automake 1.11.3 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = build.vc10/bench_mulredc DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = SOURCES = DIST_SOURCES = DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ ALLOCA = @ALLOCA@ AMTAR = @AMTAR@ AR = @AR@ ASMPATH = @ASMPATH@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCAS = @CCAS@ CCASDEPMODE = @CCASDEPMODE@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GMPLIB = @GMPLIB@ GREP = @GREP@ GSL_LD_FLAGS = @GSL_LD_FLAGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ M4 = @M4@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ POW_LIB = @POW_LIB@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VALGRIND = @VALGRIND@ VERSION = @VERSION@ XSLDIR = @XSLDIR@ XSLTPROC = @XSLTPROC@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ EXTRA_DIST = bench_mulredc.vcxproj bench_mulredc.vcxproj.filters all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu build.vc10/bench_mulredc/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu build.vc10/bench_mulredc/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs tags: TAGS TAGS: ctags: CTAGS CTAGS: distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile installdirs: install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ distclean distclean-generic distclean-libtool distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: ecm-6.4.4/build.vc10/bench_mulredc/bench_mulredc.vcxproj0000644023561000001540000002150712106741270020070 00000000000000 Debug Win32 Debug x64 Release Win32 Release x64 {4727DE12-787D-432D-B166-BF103B0C3C87} Win32Proj bench_mulredc Application true Unicode Application true Unicode Application false true Unicode Application false true Unicode true $(SolutionDir)..bin\$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ true $(SolutionDir)..bin\$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ false $(SolutionDir)..bin\$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ false $(SolutionDir)..bin\$(Platform)\$(Configuration)\ $(Platform)\$(Configuration)\ Level3 Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\ MultiThreadedDebug Console true psapi.lib;..\..\..\$(mp_dir)lib\$(IntDir)\mpir.lib;..\..\lib\$(IntDir)\libecm.lib;%(AdditionalDependencies) Level3 Disabled _WIN64;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\ MultiThreadedDebug Console true psapi.lib;..\..\..\$(mp_dir)lib\$(IntDir)\mpir.lib;..\..\lib\$(IntDir)\libecm.lib;%(AdditionalDependencies) Level3 MaxSpeed true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\ MultiThreaded Console true true true psapi.lib;..\..\..\$(mp_dir)lib\$(IntDir)\mpir.lib;..\..\lib\$(IntDir)\libecm.lib;%(AdditionalDependencies) Level3 MaxSpeed true true _WIN64;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) ..\..\..\$(mp_dir)lib\$(IntDir);..\..\;..\assembler;..\ MultiThreaded Console true true true psapi.lib;..\..\..\$(mp_dir)lib\$(IntDir)\mpir.lib;..\..\lib\$(IntDir)\libecm.lib;%(AdditionalDependencies) ecm-6.4.4/build.vc10/bench_mulredc/bench_mulredc.vcxproj.filters0000644023561000001540000000130612106741270021532 00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hpp;hxx;hm;inl;inc;xsd Source Files ecm-6.4.4/build.vc10/bench_mulredc/Makefile.am0000644023561000001540000000010112106741270015700 00000000000000EXTRA_DIST = bench_mulredc.vcxproj bench_mulredc.vcxproj.filters ecm-6.4.4/build.vc10/mul_fft-params.h.x64.intel0000644023561000001540000000017512106741271015706 00000000000000/* Empty file so that #include won't produce an error message. With no parameters defined, mul_fft.c will use defaults. */ecm-6.4.4/build.vc10/tests.py0000644023561000001540000003035712106741270012606 00000000000000 from __future__ import print_function import os import sys import string import platform from re import match from subprocess import Popen, PIPE, STDOUT from tempfile import * from time import clock class Timer() : def __enter__(self): self.start = clock() def __exit__(self, *args): print(' time {:.3f} milliseconds'.format(1000 * (clock() - self.start))) test_dir = '..\\bin\\x64\\Release\\' # test_dir = '..\\bin\\win32\\Release\\' ecm = [ ("2050449353925555290706354283", "-sigma 7 -k 1 30 0-1e6", 14), ("137703491", "-sigma 6 84 1000", 8), ("3533000986701102061387017352606588294716061", "-sigma 1621 191 225", 14), ("145152979917007299777325725119", "-sigma 711387948 924 117751", 14), ("2^919-1", "-sigma 262763035 937 1", 6), ("2^919-1", "-sigma 1691973485 283 1709", 6), ("(2^1033+1)/3", "-sigma 2301432245 521 1", 6), ("(2^1033+1)/3", "-sigma 2301432245 223 1847", 6), ("(2^1063+1)/3/26210488518118323164267329859", "-sigma 2399424618 383 1", 6), ("(2^1063+1)/3/26210488518118323164267329859", "-sigma 2399424618 71 500", 6), ("242668358425701966181147598421249782519178289604307455138484425562807899", "-sigma 1417477358 28560 8e7-85507063", 14), ("3533000986701102061387017352606588294716061", "-sigma 291310394389387 191 225", 14), ("121279606270805899614487548491773862357", "-sigma 1931630101 120", 14), ("291310394389387", "-power 3 -sigma 40 2000", 8), ("3533000986701102061387017352606588294716061", "-sigma 3547 167 211", 14), ("449590253344339769860648131841615148645295989319968106906219761704350259884936939123964073775456979170209297434164627098624602597663490109944575251386017", "-sigma 63844855 -go 172969 61843 20658299", 14), ("17061648125571273329563156588435816942778260706938821014533", "-sigma 585928442 174000", 14), ("89101594496537524661600025466303491594098940711325290746374420963129505171895306244425914080753573576861992127359576789001", "-sigma 877655087 -go 325001 157721 1032299", 14), ("5394204444759808120647321820789847518754252780933425517607611172590240019087317088600360602042567541009369753816111824690753627535877960715703346991252857", "-sigma 805816989 -go 345551 149827", 6), ("3923385745693995079670229419275984584311007321932374190635656246740175165573932140787529348954892963218868359081838772941945556717", "-sigma 876329474 141667 150814537", 14), ("124539923134619429718018353168641490719788526741873602224103589351798060075728544650990190016536810151633233676972068237330360238752628542584228856301923448951", "-sigma 1604840403 -go 983591971839332299 96097 24289207", 14), ("5735013127104523546495917836490637235369", "-power 60 -k 2 -A 3848610099745584498259560038340842096471 -x0 2527419713481530878734189429997880136878 330000 500000000", 8), ("17833653493084084667826559287841287911473", "-power 6 -k 2 -A 7423036368129288563912180723909655170075 -x0 9011819881065862648414808987718432766274 389797 16e8", 8), ("212252637915375215854013140804296246361", "-power 15 -k 2 -sigma 781683988 1000000", 8), ("4983070578699621345648758795946786489699447158923341167929707152021191319057138908604417894224244096909460401007237133698775496719078793168004317119431646035122982915288481052088094940158965731422616671", "-sigma 909010734 122861 176711", 6), ("1408323592065265621229603282020508687", "-sigma 1549542516 -go 2169539 531571 29973883000-29973884000", 8), ("3213162276640339413566047915418064969550383692549981333701", "-sigma 2735675386 -go 1615843 408997 33631583", 8), ("39614081257132168796771975177", "-sigma 480 1e6", 8), ("10000286586958753753", "-sigma 3956738175 1e6", 8), ("49672383630046506169472128421", "-sigma 2687434659 166669 86778487", 8), ("216259730493575791390589173296092767511", "-sigma 214659179 1124423 20477641", 8), ("49367108402201032092269771894422156977426293789852367266303146912244441959559870316184237", "-sigma 6 5000", 0), ("(2^1063+1)/3/26210488518118323164267329859", "-sigma 2399424618 383 1", 6), ("10090030271*10^400+696212088699", "-sigma 3923937547 1e3 1e6", 14), ("458903930815802071188998938170281707063809443792768383215233", "-batch -A 103699173453039012668349162616750601868936199904547322268878 10000", 14), ("458903930815802071188998938170281707063809443792768383215233", "-batch=2 -A 103699173453039012668349162616750601868936199904547322268878 10000", 14), ("2^349-1", "-batch -A 13883915733485915535567641090102088744917579395318243004655770450844428217574163575149253565087742 587 29383", 6), ("2^349-1", "-batch=2 -A 13883915733485915535567641090102088744917579395318243004655770450844428217574163575149253565087742 587 29383", 6), ("2^347-1", "-batch -A 292897222300654795048417351458499833714895857628156011078988080472621879897670335421898676171177982 3301 229939", 14), ("2^347-1", "-batch=2 -A 292897222300654795048417351458499833714895857628156011078988080472621879897670335421898676171177982 3301 229939", 14), ("911962091", "-batch=2 -A 440688534 50000", 8), ("31622776601683791911", "-batch=1 -A 27063318473587686303 11000", 0), ("18446744073709551557", "-batch -A 312656731337392125 11000", 8), ("4294967291", "-batch -A 17 1000", 8), ("((173^173+1)/174)/471462511391940575680645418941", "-sigma 12345 20000", 0), ("((173^173+1)/174)/471462511391940575680645418941+122", "-sigma 77 20000", 6), ("10000000000000000000000000000000000000121", "-sigma 61 -go 1195504287780095287 2950307", 8), ("10000000000000000000000000000000000000121", "-sigma 266 -go 218187387944803649 9405629", 8), ("10000000000000000000000000000000000000121", "-sigma 291 -go 5994496018878137 4372759", 8) ] pm1 = [ ("441995541378330835457", "-pm1 -x0 3 157080 7e9-72e8", 8 ), ("335203548019575991076297", "-pm1 -x0 2 23 31", 8 ), ("335203548019575991076297", "-pm1 -x0 3 31 58766400424189339249-58766400424189339249", 8 ), ("2050449353925555290706354283", "-pm1 -k 1 20 0-1e6", 14 ), ("67872792749091946529", "-pm1 -x0 3 8467 11004397", 8 ), ("5735039483399104015346944564789", "-pm1 1277209 9247741", 8 ), ("620224739362954187513", "-pm1 -x0 3 668093 65087177", 8 ), ("1405929742229533753", "-pm1 1123483 75240667", 8 ), ("16811052664235873", "-pm1 -x0 3 19110 178253039", 8 ), ("9110965748024759967611", "-pm1 1193119 316014211", 8 ), ("563796628294674772855559264041716715663", "-pm1 4031563 14334623", 8 ), ("188879386195169498836498369376071664143", "-pm1 3026227 99836987", 8 ), ("474476178924594486566271953891", "-pm1 9594209 519569569", 8 ), ("2124306045220073929294177", "-pm1 290021 1193749003", 8 ), ("504403158265489337", "-pm1 -k 4 8 9007199254740700-9007199254740900", 8 ), ("6857", "-pm1 840 857", 8 ), ("10090030271*10^400+696212088699", "-pm1 2e3 2e6", 14), ("2^(64*2)-1", "-pm1 -redc -x0 -1 2 1", 8), ("234^997+997^234", "-pm1 -ntt 100 324", 0) # Try saving and resuming # ("25591172394760497166702530699464321", "-pm1 -save test.pm1.save 100000 # checkcode $? 0 # $PM1 -resume test.pm1.save 120557 2007301 # C=$? # /bin/rm -f test.pm1.save # checkcode $C 8 ), ] pp1 = [ ("574535754974673735383001137423881", "-pp1 -x0 5 11046559 34059214979", 8 ), ("1212493270942550395500491620526329", "-pp1 -x0 9 1322743 15132776749", 8 ), ("12949162694219360835802307", "-pp1 -x0 5 3090877 362336209", 8 ), ("2224933405617843870480157177909", "-pp1 -x0 6 568751 573379", 8 ), ("6588443517876550825940165572081", "-pp1 -x0 5 308141 4213589", 8 ), ("951513164333845779921357796547797", "-pp1 -x0 5 991961 1927816573", 8 ), ("30273798812158206865862514296968537", "-pp1 -x0 5 24039443 5071284641", 8 ), ("4745647757936790297247194404494391", "-pp1 -x0 9 34652707 4267610467", 8 ), ("1267992248510159742851354500921987", "-pp1 -x0 5 205435127 3011959669", 8 ), ("3376019969685846629149599470807382641", "-pp1 -x0 5 16221563 125604601", 8 ), ("14783171388883747638481280920502006539", "-pp1 -x0 5 5963933 549138481", 8 ), ("884764954216571039925598516362554326397028807829", "-pp1 -x0 6 80105797 2080952771", 8 ), ("5703989257175782343045829011448227", "-pp1 -x0 6 2737661 581697661", 8 ), ("36542278409946587188439197532609203387", "-pp1 -x0 5 75484441 721860287", 8 ), ("23737785720181567451870298309457943", "-pp1 -x0 7 138563 9639649", 8 ), ("9535226150337134522266549694936148673", "-pp1 -x0 7 3037709 84506953", 8 ), ("68095768294557635629913837615365499", "-pp1 -x0 5 36936017 167452427", 8 ), ("3180944478436233980230464769757467081", "-pp1 -x0 5 7373719 764097571", 8 ), ("2879563791172315088654652145680902993", "-pp1 -x0 7 29850409 34290301", 8 ), ("79382035150980920346405340690307261392830949801", "-pp1 -x0 5 12073627 32945877451", 8 ), ("514102379852404115560097604967948090456409", "-pp1 -x0 8 223061 61500567937", 8 ), ("173357946863134423299822098041421951472072119", "-pp1 -x0 5 992599901 1401995848117", 8 ), ("183707757246801094558768264908628886377124291177", "-pp1 -x0 5 382807709 1052258680511", 8 ), ("16795982678646459679787538694991838379", "-pp1 -x0 6 2957579 26509499", 8 ), # ("7986478866035822988220162978874631335274957495008401", "-pp1 -x0 17 1632221953 843497917739, 8), # ("725516237739635905037132916171116034279215026146021770250523", "-pp1 -x0 5 51245344783 483576618980159", 8 ), ("1809864641442542950172698003347770061601055783363", "-pp1 -x0 6 21480101 12037458077389", 8 ), ("435326731374486648601801668751442584963", "-pp1 -x0 6 12002513 27231121", 8 ), ("3960666914072777038869829205072430197479", "-pp1 -x0 5 16534249 21802223243", 8) ] pp1_2 = [ ("328006342451", "-pp1 -x0 5 120 7043", 8 ), ("328006342451", "-pp1 -x0 1/5 120 7043", 8 ), ("2050449218179969792522461197", "-pp1 -x0 6 -k 1 20 0-1e6", 14), ("6215074747201", "-pp1 -power 2 -x0 5 630 199729", 8 ), ("6215074747201", "-pp1 -dickson 3 -x0 5 630 199729", 8 ), ("8857714771093", "-pp1 -x0 3 23251 49207", 8 ), ("236344687097", "-pp1 -x0 3 619 55001", 8 ), ("87251820842149", "-pp1 -x0 5 3691 170249", 8 ), ("719571227339189", "-pp1 -x0 4 41039 57679", 8 ), ("5468575720021", "-pp1 -x0 6 1439 175759", 8 ), ("49804972211", "-pp1 -x0 5 15443 268757", 8 ), ("329573417220613", "-pp1 -x0 3 5279 101573", 8 ), ("4866979762781", "-pp1 -x0 4 7309 97609", 8 ), ("187333846633", "-pp1 -x0 3 2063 9851", 8 ), ("332526664667473", "-pp1 -x0 3 65993 111919", 8 ), ("265043186297", "-pp1 -x0 3 8761 152791", 8 ), ("207734163253", "-pp1 -x0 3 1877 4211", 8 ), ("225974065503889", "-pp1 -x0 5 -k 5 7867 8243", 8 ), ("660198074631409", "-pp1 -x0 5 22541 115679", 8 ), ("563215815517", "-pp1 -x0 3 3469 109849", 8 ), ("563215815517", "-pp1 -x0 3 3469 109849-109849", 8 ), ("409100738617", "-pp1 -x0 3 19 19", 8 ), ("2277189375098448170118558775447117254551111605543304035536750762506158547102293199086726265869065639109", "-pp1 -x0 3 2337233 132554351", 14), ("630503947831861669", "-pp1 -x0 5 7 9007199254740000-9007199254741000", 8 ), ("8589934621", "-pp1 -x0 10 4294967310-4294967311 1", (1, 8) ), ("6054018161*10^400+417727253109", "-pp1 -x0 4 2e3 2e6", 14), ("154618728587", "-pp1 -x0 3 -go 36 4294957296-4294967295 1", 8) ] c200 = [ ("29799904256775982671863388319999573561548825027149399972531599612392671227006866151136667908641695103422986028076864929902803267437351318167549013218980573566942647077444419419003164546362008247462049", "-pm1 2 1e10", 0) ] test = [ ("173357946863134423299822098041421951472072119", "-pp1 -x0 5 992599901 1401995848117", 8 ), ] def run_exe(exe, args, inp) : al = {'stdin' : PIPE, 'stdout' : PIPE, 'stderr' : STDOUT } if sys.platform.startswith('win') : al['creationflags'] = 0x08000000 p = Popen([exe] + args.split(' '), **al) res = p.communicate(inp.encode())[0].decode() ret = p.poll() return (ret, res) def do_tests(tests) : global out exe = test_dir + "ecm.exe" for tt in tests : rv = run_exe(exe, tt[1], tt[0]) if type(tt[2]) == int and rv[0] != tt[2] : print("*** ERROR ***", rv[0], tt[2]) elif type(tt[2]) == tuple and \ rv[0] != tt[2][0] and rv[0] != tt[2][1] : print("*** ERROR ***", rv[0], tt[2]) if out : op = rv[1].rsplit('\r\n') for i in op : print(i) with Timer(): out = True do_tests(ecm) do_tests(pm1) do_tests(pp1) do_tests(pp1_2) do_tests(c200) do_tests(test) ecm-6.4.4/build.vc10/ecm-params.h.x64.intel0000644023561000001540000000122112106741271015007 00000000000000/* created 06 Feb 2012 on confit.loria.fr (Intel(R) Core(TM) i5-2500 CPU) for svn revision 1705 with GMP 5.0.3 and gcc 4.6.1 */ #define TUNE_MULREDC_THRESH 21 #define TUNE_SQRREDC_THRESH 9 #define MPZMOD_THRESHOLD 77 #define REDC_THRESHOLD 512 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 11, 12, 12, 13, 14, 13, 14, 14, 16, 16, 16, 16, 16, 16} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 17 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 256 #define PREREVERTDIVISION_NTT_THRESHOLD 8 #define POLYINVERT_NTT_THRESHOLD 128 #define POLYEVALT_NTT_THRESHOLD 64 #define MPZSPV_NORMALISE_STRIDE 256 ecm-6.4.4/build.vc10/vsyasm.xml0000644023561000001540000002064312106741270013133 00000000000000 General Symbols Files Command Line Execute Before Specifies the targets for the build customization to run before. Execute After Specifies the targets for the build customization to run after. Additional Options Additional Options ecm-6.4.4/build.vc10/Makefile.am0000644023561000001540000000075012106741271013121 00000000000000EXTRA_DIST = config.h ecm-params.h ecm-params.h.win32.amd \ ecm-params.h.win32.intel ecm-params.h.x64.amd \ ecm-params.h.x64.intel ecm.sln file_copy.bat \ mp_lib.props mul_fft-params.h.win32.amd \ mul_fft-params.h.win32.intel mul_fft-params.h.x64.amd \ mul_fft-params.h.x64.intel readme.txt tests.py \ vsyasm.props vsyasm.targets vsyasm.xml getrusage.h DIST_SUBDIRS = assembler ecm libecm tune bench_mulredc ecm-6.4.4/build.vc10/file_copy.bat0000644023561000001540000000027112106741271013524 00000000000000if not exist %1 ( echo file_copy failure: %1 not found && goto exit ) if exist %2 ( fc %1 %2 > nul && if not %errorlevel 1 goto exit ) echo copying %1 to %2 && copy %1 %2 :exit ecm-6.4.4/build.vc10/ecm-params.h.win32.intel0000644023561000001540000000127112106741270015334 00000000000000/* those parameters were obtained on toto.loria.fr with ecm-6.3-rc3 gmp-5.0.1, and gcc 4.0.2 -m32 -O2 -pedantic -fomit-frame-pointer -mtune=pentium3 -march=pentium3 */ #define TUNE_MULREDC_THRESH 1 #define TUNE_SQRREDC_THRESH 1 #define MPZMOD_THRESHOLD 98 #define REDC_THRESHOLD 398 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 10, 1, 1, 12, 12, 1, 14, 12, 13, 1, 15, 16, 15, 16, 19, 20, 22} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 17 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 1024 #define PREREVERTDIVISION_NTT_THRESHOLD 16 #define POLYINVERT_NTT_THRESHOLD 256 #define POLYEVALT_NTT_THRESHOLD 512 #define MPZSPV_NORMALISE_STRIDE 1024 ecm-6.4.4/build.vc10/config.h0000644023561000001540000001430512106753344012510 00000000000000/* config.h.in. Generated from configure.in by autoheader. */ #define VERSION "6.4.4" #define VERSION_GPU "gpu_ecm-win" #define PACKAGE_BUGREPORT "ecm-discuss@lists.gforge.inria.fr" /* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP systems. This function is required for `alloca.c' support on those systems. */ #undef CRAY_STACKSEG_END /* Define to 1 if using `alloca.c'. */ #define C_ALLOCA 1 /* Define to 1 if you have the `access' function. */ #undef HAVE_ACCESS /* Define to 1 if you have `alloca', as a function or macro. */ #define HAVE_ALLOCA 1 /* Define to 1 if you have and it should be used (not on Ultrix). */ #undef HAVE_ALLOCA_H /* Define to 1 if you have the `ctime' function. */ #define HAVE_CTIME 1 /* Define to 1 if you have the header file. */ #define HAVE_CTYPE_H 1 /* Define to 1 if you have the `floor' function. */ #define HAVE_FLOOR 1 /* Define to 1 if you have the `fmod' function. */ #define HAVE_FMOD 1 /* Define to 1 if you have the `gethostname' function. */ #define HAVE_GETHOSTNAME 1 /* Define to 1 if you have the `getrusage' function. */ #define HAVE_GETRUSAGE 1 /* Define to 1 if you have the `gettimeofday' function. */ #undef HAVE_GETTIMEOFDAY /* Define to 1 if you have the header file. */ #define HAVE_GMP_H 1 /* Define to 1 if gwnum.a or gwnum.lib exist */ #undef HAVE_GWNUM /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H /* Define to 1 if you have the header file. */ #undef HAVE_IO_H /* Define to 1 if you have the `isascii' function. */ #undef HAVE_ISASCII /* Define to 1 if you have the `isdigit' function. */ #define HAVE_ISDIGIT 1 /* Define to 1 if you have the `isspace' function. */ #define HAVE_ISSPACE 1 /* Define to 1 if you have the `isxdigit' function. */ #define HAVE_ISXDIGIT 1 /* Define to 1 if you have the `m' library (-lm). */ #undef HAVE_LIBM /* Define to 1 if you have the header file. */ #define HAVE_LIMITS_H 1 /* Define to 1 if you have the header file. */ #define HAVE_MALLOC_H 1 /* Define to 1 if you have the `malloc_usable_size' function. */ #undef HAVE_MALLOC_USABLE_SIZE /* Define to 1 if you have the header file. */ #define HAVE_MATH_H 1 /* Define to 1 if you have the `memmove' function. */ #define HAVE_MEMMOVE 1 /* Define to 1 if you have the header file. */ #define HAVE_MEMORY_H 1 /* Define to 1 if you have the `memset' function. */ #define HAVE_MEMSET 1 /* Define to 1 if you have the `nice' function. */ #undef HAVE_NICE /* Define to 1 if you have the `pow' function. */ #define HAVE_POW 1 /* Define to 1 if you have the `signal' function. */ #define HAVE_SIGNAL 1 /* Define to 1 if you have the header file. */ #define HAVE_SIGNAL_H 1 /* Define to 1 if you have the `sqrt' function. */ #define HAVE_SQRT 1 /* Define to 1 if you have the header file. */ #define HAVE_STDINT_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDLIB_H 1 /* Define to 1 if you have the `strchr' function. */ #define HAVE_STRCHR 1 /* Define to 1 if you have the header file. */ #undef HAVE_STRINGS_H /* Define to 1 if you have the header file. */ #define HAVE_STRING_H 1 /* Define to 1 if you have the `strlen' function. */ #define HAVE_STRLEN 1 /* Define to 1 if you have the `strncasecmp' function. */ #undef HAVE_STRNCASECMP /* Define to 1 if you have the `strstr' function. */ #undef HAVE_STRSTR /* Define to 1 if you have the header file. */ #undef HAVE_SYS_RESOURCE_H /* Define to 1 if you have the header file. */ #define HAVE_SYS_STAT_H 1 /* Define to 1 if you have the header file. */ #undef HAVE_SYS_TIME_H /* Define to 1 if you have the header file. */ #define HAVE_SYS_TYPES_H 1 /* Define to 1 if you have the `time' function. */ #undef HAVE_TIME /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H /* Define to 1 if you have the `unlink' function. */ #define HAVE_UNLINK 1 /* Define to 1 if you have the header file. */ #define HAVE_WINDOWS_H 1 /* Define to 1 if you have the `__gmpn_add_nc' function. */ #if defined( _WIN64 ) # define HAVE___GMPN_ADD_NC 1 #endif /* Define to 1 if you have the `__gmpn_mod_34lsub1' function. */ #define HAVE___GMPN_MOD_34LSUB1 1 /* Define to 1 if you have the `__gmpn_mul_fft' function. */ #define HAVE___GMPN_MUL_FFT 1 /* Define to 1 if you want memory debugging */ #undef MEMORY_DEBUG /* Define if the system has the type `long long'. */ #define HAVE_LONG_LONG 1 #define HAVE_LONG_LONG_INT 1 /* Define to 1 to use asm redc on x86 or x86_64 */ # define NATIVE_REDC 1 /* Define to 1 if your C compiler doesn't accept -c and -o together. */ #undef NO_MINUS_C_MINUS_O /* If using the C implementation of alloca, define if you know the direction of stack growth for your system; otherwise it will be automatically deduced at runtime. STACK_DIRECTION > 0 => grows toward higher addresses STACK_DIRECTION < 0 => grows toward lower addresses STACK_DIRECTION = 0 => direction of growth unknown */ #undef STACK_DIRECTION /* Define to 1 if you have the ANSI C header files. */ #define STDC_HEADERS 1 /* Define to 1 if you can safely include both and . */ #undef TIME_WITH_SYS_TIME /* Define to 1 if you want assertions enabled */ #undef WANT_ASSERT /* Define to 1 if you want shell command execution */ #undef WANT_SHELLCMD /* Define to empty if `const' does not conform to ANSI C. */ #undef const /* How to specify hot-spot attribute, if available */ #define ATTRIBUTE_HOT #define HAVE___GMPN_REDC_1 1 #define HAVE___GMPN_REDC_2 1 #define HAVE_ASM_REDC3 1 #define WINDOWS64_ABI 1 /* Define to `__inline__' or `__inline' if that's what the C compiler calls it, or to nothing if 'inline' is not supported under any name. */ #ifndef __cplusplus #define inline __inline #endif /* Define to `unsigned int' if does not define. */ #undef size_t #ifdef _MSC_VER # if _MSC_VER < 1600 # define int64_t __int64 # define uint64_t unsigned __int64 # endif # define strncasecmp strnicmp # define alloca _alloca # define fseek64 _fseek64 # define ftell64 _ftell64 #endif ecm-6.4.4/build.vc10/ecm.sln0000644023561000001540000000713512106741271012353 00000000000000Microsoft Visual Studio Solution File, Format Version 11.00 # Visual Studio 2010 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libecm", "libecm\libecm.vcxproj", "{CD555681-D65B-4173-A29C-B8BF06A4871B}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ecm", "ecm\ecm.vcxproj", "{C0E2EA85-996A-4B5F-AD30-590FAF5B7187}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tune", "tune\tune.vcxproj", "{80E08750-5C6C-492E-BB1E-7200978AE125}" ProjectSection(ProjectDependencies) = postProject {CD555681-D65B-4173-A29C-B8BF06A4871B} = {CD555681-D65B-4173-A29C-B8BF06A4871B} {C0E2EA85-996A-4B5F-AD30-590FAF5B7187} = {C0E2EA85-996A-4B5F-AD30-590FAF5B7187} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bench_mulredc", "bench_mulredc\bench_mulredc.vcxproj", "{4727DE12-787D-432D-B166-BF103B0C3C87}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 Debug|x64 = Debug|x64 Release|Win32 = Release|Win32 Release|x64 = Release|x64 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {CD555681-D65B-4173-A29C-B8BF06A4871B}.Debug|Win32.ActiveCfg = Debug|Win32 {CD555681-D65B-4173-A29C-B8BF06A4871B}.Debug|Win32.Build.0 = Debug|Win32 {CD555681-D65B-4173-A29C-B8BF06A4871B}.Debug|x64.ActiveCfg = Debug|x64 {CD555681-D65B-4173-A29C-B8BF06A4871B}.Debug|x64.Build.0 = Debug|x64 {CD555681-D65B-4173-A29C-B8BF06A4871B}.Release|Win32.ActiveCfg = Release|Win32 {CD555681-D65B-4173-A29C-B8BF06A4871B}.Release|Win32.Build.0 = Release|Win32 {CD555681-D65B-4173-A29C-B8BF06A4871B}.Release|x64.ActiveCfg = Release|x64 {CD555681-D65B-4173-A29C-B8BF06A4871B}.Release|x64.Build.0 = Release|x64 {C0E2EA85-996A-4B5F-AD30-590FAF5B7187}.Debug|Win32.ActiveCfg = Debug|Win32 {C0E2EA85-996A-4B5F-AD30-590FAF5B7187}.Debug|Win32.Build.0 = Debug|Win32 {C0E2EA85-996A-4B5F-AD30-590FAF5B7187}.Debug|x64.ActiveCfg = Debug|x64 {C0E2EA85-996A-4B5F-AD30-590FAF5B7187}.Debug|x64.Build.0 = Debug|x64 {C0E2EA85-996A-4B5F-AD30-590FAF5B7187}.Release|Win32.ActiveCfg = Release|Win32 {C0E2EA85-996A-4B5F-AD30-590FAF5B7187}.Release|Win32.Build.0 = Release|Win32 {C0E2EA85-996A-4B5F-AD30-590FAF5B7187}.Release|x64.ActiveCfg = Release|x64 {C0E2EA85-996A-4B5F-AD30-590FAF5B7187}.Release|x64.Build.0 = Release|x64 {80E08750-5C6C-492E-BB1E-7200978AE125}.Debug|Win32.ActiveCfg = Release|x64 {80E08750-5C6C-492E-BB1E-7200978AE125}.Debug|x64.ActiveCfg = Release|x64 {80E08750-5C6C-492E-BB1E-7200978AE125}.Debug|x64.Build.0 = Release|x64 {80E08750-5C6C-492E-BB1E-7200978AE125}.Release|Win32.ActiveCfg = Release|Win32 {80E08750-5C6C-492E-BB1E-7200978AE125}.Release|Win32.Build.0 = Release|Win32 {80E08750-5C6C-492E-BB1E-7200978AE125}.Release|x64.ActiveCfg = Release|x64 {80E08750-5C6C-492E-BB1E-7200978AE125}.Release|x64.Build.0 = Release|x64 {4727DE12-787D-432D-B166-BF103B0C3C87}.Debug|Win32.ActiveCfg = Debug|Win32 {4727DE12-787D-432D-B166-BF103B0C3C87}.Debug|Win32.Build.0 = Debug|Win32 {4727DE12-787D-432D-B166-BF103B0C3C87}.Debug|x64.ActiveCfg = Debug|x64 {4727DE12-787D-432D-B166-BF103B0C3C87}.Debug|x64.Build.0 = Debug|x64 {4727DE12-787D-432D-B166-BF103B0C3C87}.Release|Win32.ActiveCfg = Release|Win32 {4727DE12-787D-432D-B166-BF103B0C3C87}.Release|Win32.Build.0 = Release|Win32 {4727DE12-787D-432D-B166-BF103B0C3C87}.Release|x64.ActiveCfg = Release|x64 {4727DE12-787D-432D-B166-BF103B0C3C87}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection EndGlobal ecm-6.4.4/build.vc10/mp_lib.props0000644023561000001540000000127012106741271013412 00000000000000 mpir\ mpir.lib <_ProjectFileVersion>10.0.30128.1 $(mp_dir) true $(mp_lib) true ecm-6.4.4/COPYING.LIB0000644023561000001540000001672712106741274010674 00000000000000 GNU LESSER GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. This version of the GNU Lesser General Public License incorporates the terms and conditions of version 3 of the GNU General Public License, supplemented by the additional permissions listed below. 0. Additional Definitions. As used herein, "this License" refers to version 3 of the GNU Lesser General Public License, and the "GNU GPL" refers to version 3 of the GNU General Public License. "The Library" refers to a covered work governed by this License, other than an Application or a Combined Work as defined below. An "Application" is any work that makes use of an interface provided by the Library, but which is not otherwise based on the Library. Defining a subclass of a class defined by the Library is deemed a mode of using an interface provided by the Library. A "Combined Work" is a work produced by combining or linking an Application with the Library. The particular version of the Library with which the Combined Work was made is also called the "Linked Version". The "Minimal Corresponding Source" for a Combined Work means the Corresponding Source for the Combined Work, excluding any source code for portions of the Combined Work that, considered in isolation, are based on the Application, and not on the Linked Version. The "Corresponding Application Code" for a Combined Work means the object code and/or source code for the Application, including any data and utility programs needed for reproducing the Combined Work from the Application, but excluding the System Libraries of the Combined Work. 1. Exception to Section 3 of the GNU GPL. You may convey a covered work under sections 3 and 4 of this License without being bound by section 3 of the GNU GPL. 2. Conveying Modified Versions. If you modify a copy of the Library, and, in your modifications, a facility refers to a function or data to be supplied by an Application that uses the facility (other than as an argument passed when the facility is invoked), then you may convey a copy of the modified version: a) under this License, provided that you make a good faith effort to ensure that, in the event an Application does not supply the function or data, the facility still operates, and performs whatever part of its purpose remains meaningful, or b) under the GNU GPL, with none of the additional permissions of this License applicable to that copy. 3. Object Code Incorporating Material from Library Header Files. The object code form of an Application may incorporate material from a header file that is part of the Library. You may convey such object code under terms of your choice, provided that, if the incorporated material is not limited to numerical parameters, data structure layouts and accessors, or small macros, inline functions and templates (ten or fewer lines in length), you do both of the following: a) Give prominent notice with each copy of the object code that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the object code with a copy of the GNU GPL and this license document. 4. Combined Works. You may convey a Combined Work under terms of your choice that, taken together, effectively do not restrict modification of the portions of the Library contained in the Combined Work and reverse engineering for debugging such modifications, if you also do each of the following: a) Give prominent notice with each copy of the Combined Work that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the Combined Work with a copy of the GNU GPL and this license document. c) For a Combined Work that displays copyright notices during execution, include the copyright notice for the Library among these notices, as well as a reference directing the user to the copies of the GNU GPL and this license document. d) Do one of the following: 0) Convey the Minimal Corresponding Source under the terms of this License, and the Corresponding Application Code in a form suitable for, and under terms that permit, the user to recombine or relink the Application with a modified version of the Linked Version to produce a modified Combined Work, in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source. 1) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (a) uses at run time a copy of the Library already present on the user's computer system, and (b) will operate properly with a modified version of the Library that is interface-compatible with the Linked Version. e) Provide Installation Information, but only if you would otherwise be required to provide such information under section 6 of the GNU GPL, and only to the extent that such information is necessary to install and execute a modified version of the Combined Work produced by recombining or relinking the Application with a modified version of the Linked Version. (If you use option 4d0, the Installation Information must accompany the Minimal Corresponding Source and Corresponding Application Code. If you use option 4d1, you must provide the Installation Information in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.) 5. Combined Libraries. You may place library facilities that are a work based on the Library side by side in a single library together with other library facilities that are not Applications and are not covered by this License, and convey such a combined library under terms of your choice, if you do both of the following: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities, conveyed under the terms of this License. b) Give prominent notice with the combined library that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 6. Revised Versions of the GNU Lesser General Public License. The Free Software Foundation may publish revised and/or new versions of the GNU Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation. If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library. ecm-6.4.4/test_mulredc.c0000644023561000001540000001750112106741273012060 00000000000000#include "config.h" #include #include #include #include #include "mulredc.h" void mp_print(const mp_limb_t *x, const int N) { int i; for (i = 0; i < N; ++i) { if (i>0) printf (" + "); printf("%lu", x[i]); if (i>0) printf ("*2^%d", i*GMP_NUMB_BITS); } printf("\n"); } static mp_limb_t call_mulredc (const int N, mp_limb_t *z, const mp_limb_t *x, const mp_limb_t *y, const mp_limb_t *m, const mp_limb_t invm) { mp_limb_t cy; switch (N) { case 1: cy = mulredc1(z, x[0], y[0], m[0], invm); break; case 2: cy = mulredc2(z, x, y, m, invm); break; case 3: cy = mulredc3(z, x, y, m, invm); break; case 4: cy = mulredc4(z, x, y, m, invm); break; case 5: cy = mulredc5(z, x, y, m, invm); break; case 6: cy = mulredc6(z, x, y, m, invm); break; case 7: cy = mulredc7(z, x, y, m, invm); break; case 8: cy = mulredc8(z, x, y, m, invm); break; case 9: cy = mulredc9(z, x, y, m, invm); break; case 10: cy = mulredc10(z, x, y, m, invm); break; case 11: cy = mulredc11(z, x, y, m, invm); break; case 12: cy = mulredc12(z, x, y, m, invm); break; case 13: cy = mulredc13(z, x, y, m, invm); break; case 14: cy = mulredc14(z, x, y, m, invm); break; case 15: cy = mulredc15(z, x, y, m, invm); break; case 16: cy = mulredc16(z, x, y, m, invm); break; case 17: cy = mulredc17(z, x, y, m, invm); break; case 18: cy = mulredc18(z, x, y, m, invm); break; case 19: cy = mulredc19(z, x, y, m, invm); break; case 20: cy = mulredc20(z, x, y, m, invm); break; default: cy = mulredc20(z, x, y, m, invm); } return cy; } #if defined(HAVE_NATIVE_MULREDC1_N) static mp_limb_t call_mulredc1 (const int N, mp_limb_t *z, const mp_limb_t x, const mp_limb_t *y, const mp_limb_t *m, const mp_limb_t invm) { mp_limb_t cy; switch (N) { case 1: cy = mulredc1(z, x, y[0], m[0], invm); break; case 2: cy = mulredc1_2(z, x, y, m, invm); break; case 3: cy = mulredc1_3(z, x, y, m, invm); break; case 4: cy = mulredc1_4(z, x, y, m, invm); break; case 5: cy = mulredc1_5(z, x, y, m, invm); break; case 6: cy = mulredc1_6(z, x, y, m, invm); break; case 7: cy = mulredc1_7(z, x, y, m, invm); break; case 8: cy = mulredc1_8(z, x, y, m, invm); break; case 9: cy = mulredc1_9(z, x, y, m, invm); break; case 10: cy = mulredc1_10(z, x, y, m, invm); break; case 11: cy = mulredc1_11(z, x, y, m, invm); break; case 12: cy = mulredc1_12(z, x, y, m, invm); break; case 13: cy = mulredc1_13(z, x, y, m, invm); break; case 14: cy = mulredc1_14(z, x, y, m, invm); break; case 15: cy = mulredc1_15(z, x, y, m, invm); break; case 16: cy = mulredc1_16(z, x, y, m, invm); break; case 17: cy = mulredc1_17(z, x, y, m, invm); break; case 18: cy = mulredc1_18(z, x, y, m, invm); break; case 19: cy = mulredc1_19(z, x, y, m, invm); break; case 20: cy = mulredc1_20(z, x, y, m, invm); break; default: cy = mulredc1_20(z, x, y, m, invm); } return cy; } #endif void test(mp_size_t N, int k) { mp_limb_t *x, *y, *yp, *z, *m, invm, cy, cy2, *tmp, *tmp2, *tmp3; int i, j; x = (mp_limb_t *) malloc(N*sizeof(mp_limb_t)); y = (mp_limb_t *) malloc(N*sizeof(mp_limb_t)); z = (mp_limb_t *) malloc((N+1)*sizeof(mp_limb_t)); m = (mp_limb_t *) malloc(N*sizeof(mp_limb_t)); tmp = (mp_limb_t *) malloc((2*N+2)*sizeof(mp_limb_t)); tmp2 = (mp_limb_t *) malloc((2*N+2)*sizeof(mp_limb_t)); tmp3 = (mp_limb_t *) malloc((2*N+2)*sizeof(mp_limb_t)); if (x == NULL || y == NULL || z == NULL || m == NULL || tmp == NULL || tmp2 == NULL || tmp3 == NULL) { fprintf (stderr, "Cannot allocate memory in test_mulredc\n"); exit (1); } mpn_random2(m, N); m[0] |= 1UL; if (m[N-1] == 0) m[N-1] = 1UL; invm = 1UL; for (i = 0; i < 10; ++i) invm = (2*invm-m[0]*invm*invm); invm = -invm; assert( (invm*m[0] +1UL) == 0UL); yp = y; for (i=0; i < k; ++i) { /* Try a few special cases */ if (i == 0) { /* Try all 0, product should be 0 */ for (j = 0; j < N; j++) x[j] = y[j] = 0; } else if (i == 1) { /* Try all 1 */ for (j = 0; j < N; j++) x[j] = y[j] = 1; } else if (i == 2) { /* Try all 2^wordsize - 1 */ for (j = 0; j < N; j++) x[j] = y[j] = ~(0UL); } else { /* In the other cases, try random data */ if (i % 2 == 0) { /* Try squaring */ mpn_random2(x, N); yp = x; } else { /* Try multiplication */ mpn_random2(x, N); mpn_random2(y, N); } } /* Mixed mul and redc */ cy = call_mulredc (N, z, x, yp, m, invm); if (cy) printf("!"); z[N] = cy; /* Check with pure gmp : multiply by 2^(N*GMP_NUMB_BITS) and compare. */ for (j=0; j < N; ++j) { tmp[j] = 0; tmp[j+N] = z[j]; } tmp[2*N] = z[N]; mpn_tdiv_qr(tmp2, tmp3, 0, tmp, 2*N+1, m, N); for (j=0; j < N; ++j) z[j] = tmp3[j]; mpn_mul_n(tmp, x, yp, N); mpn_tdiv_qr(tmp2, tmp3, 0, tmp, 2*N, m, N); assert(mpn_cmp(z, tmp3, N) == 0); #if defined(HAVE_NATIVE_MULREDC1_N) /* Test mulredc1_n() */ z[N] = call_mulredc1 (N, z, x[0], yp, m, invm); tmp[0] = 0; for (j=0; j <= N; ++j) /* Multiply by 2^GMP_NUMB_BITS */ tmp[j+1] = z[j]; mpn_tdiv_qr(tmp2, tmp3, 0, tmp, N+2, m, N); for (j=0; j < N; ++j) z[j] = tmp3[j]; tmp[N] = mpn_mul_1 (tmp, yp, N, x[0]); mpn_tdiv_qr(tmp2, tmp3, 0, tmp, N+1, m, N); assert(mpn_cmp(z, tmp3, N) == 0); #endif } free(tmp); free(tmp2); free(tmp3); free(x); free(y); free(z); free(m); } int main(int argc, char** argv) { int i, len; if (argc > 1) /* Test a specific length */ { len = atoi (argv[1]); for (i = 0; i < 1; i++) test (len, 1000000); return 0; } for (;;) { for (i = 1; i <= 20; ++i) { test(i, 1000); } #if 0 test(1, 1000); test(2, 1000); test(3, 1000); test(4, 1000); test(5, 1000); test(6, 1000); test(7, 1000); test(8, 1000); test(9, 1000); test(10, 1000); test(11, 1000); test(12, 1000); test(13, 100); test(14, 100); test(15, 100); test(16, 100); test(17, 100); test(18, 100); test(44, 10); test(45, 10); test(46, 10); test(47, 10); test(48, 10); test(49, 10); #endif printf("."); fflush(stdout); } #if 0 x[0] = 12580274668139321508UL; x[1] = 9205793975152560417UL; x[2] = 7857372727033793057UL; y[0] = 13688385828267279103UL; y[1] = 10575011835742767258UL; y[2] = 8802048318027595690UL; m[0] = 2981542467342508025UL; m[1] = 5964669706257742025UL; m[2] = 18446744073678090270UL; invm = 9419286575570128311UL; carry = mulredc(z, x, y, m, 3, invm); printf("%lu + 2^64*(%lu + 2^64*%lu), carry=%lu\n", z[0], z[1], z[2], carry); #endif return 0; } #if 0 W := 2^64; x0:= 12580274668139321508; x1:= 9205793975152560417; x2:= 7857372727033793057; x := x0 + W*(x1 + W*x2); y0:= 13688385828267279103; y1:= 10575011835742767258; y2:= 8802048318027595690; y := y0 + W*(y1 + W*y2); m0:= 2981542467342508025; m1:= 5964669706257742025; m2:= 18446744073678090270; m := m0 + W*(m1 + W*m2); invm := 9419286575570128311; #endif ecm-6.4.4/pm1.c0000644023561000001540000010422212106741274010061 00000000000000/* Pollard 'P-1' algorithm. Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Paul Zimmermann and Alexander Kruppa. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include "ecm-impl.h" #define CASCADE_THRES 3 #define CASCADE_MAX 50000000.0 #ifndef POWM_THRESHOLD #define POWM_THRESHOLD 100 #endif typedef struct { unsigned int size; mpz_t *val; } mul_casc; /****************************************************************************** * * * Stage 1 * * * ******************************************************************************/ /* prime powers are accumulated up to about n^L1 */ #define L1 16 /*** Cascaded multiply ***/ /* return NULL if an error occurred */ static mul_casc * mulcascade_init (void) { mul_casc *t; t = (mul_casc *) malloc (sizeof (mul_casc)); if (t == NULL) { outputf (OUTPUT_ERROR, "mulcascade_init: could not allocate memory\n"); return NULL; } t->val = (mpz_t*) malloc (sizeof (mpz_t)); if (t->val == NULL) { outputf (OUTPUT_ERROR, "mulcascade_init: could not allocate memory\n"); free (t); return NULL; } mpz_init (t->val[0]); t->size = 1; return t; } static void mulcascade_free (mul_casc *c) { unsigned int i; for (i = 0; i < c->size; i++) mpz_clear (c->val[i]); free (c->val); free (c); } static mul_casc * mulcascade_mul_d (mul_casc *c, const double n, ATTRIBUTE_UNUSED mpz_t t) { unsigned int i; if (mpz_sgn (c->val[0]) == 0) { mpz_set_d (c->val[0], n); return c; } mpz_mul_d (c->val[0], c->val[0], n, t); if (mpz_size (c->val[0]) <= CASCADE_THRES) return c; for (i = 1; i < c->size; i++) { if (mpz_sgn (c->val[i]) == 0) { mpz_set (c->val[i], c->val[i-1]); mpz_set_ui (c->val[i-1], 0); return c; } else { mpz_mul (c->val[i], c->val[i], c->val[i-1]); mpz_set_ui (c->val[i-1], 0); } } /* Allocate more space for cascade */ i = c->size++; c->val = (mpz_t*) realloc (c->val, c->size * sizeof (mpz_t)); if (c->val == NULL) { fprintf (stderr, "Cannot allocate memory in mulcascade_mul_d\n"); exit (1); } mpz_init (c->val[i]); mpz_swap (c->val[i], c->val[i-1]); return c; } static mul_casc * mulcascade_mul (mul_casc *c, mpz_t n) { unsigned int i; if (mpz_sgn (c->val[0]) == 0) { mpz_set (c->val[0], n); return c; } mpz_mul (c->val[0], c->val[0], n); if (mpz_size (c->val[0]) <= CASCADE_THRES) return c; for (i = 1; i < c->size; i++) { if (mpz_sgn (c->val[i]) == 0) { mpz_set (c->val[i], c->val[i-1]); mpz_set_ui (c->val[i-1], 0); return c; } else { mpz_mul (c->val[i], c->val[i], c->val[i-1]); mpz_set_ui (c->val[i-1], 0); } } /* Allocate more space for cascade */ i = c->size++; c->val = (mpz_t*) realloc (c->val, c->size * sizeof (mpz_t)); if (c->val == NULL) { fprintf (stderr, "Cannot allocate memory in mulcascade_mul\n"); exit (1); } mpz_init (c->val[i]); mpz_swap (c->val[i], c->val[i-1]); return c; } static void mulcascade_get_z (mpz_t r, mul_casc *c) { unsigned int i; if (c->size == 0) { mpz_set_ui (r, 1); /* Empty product */ return; } mpz_set_ui (r, 1); for (i = 0; i < c->size; i++) if (mpz_sgn (c->val[i]) != 0) mpz_mul (r, r, c->val[i]); } /* Input: a is the generator (sigma) n is the number to factor B1 is the stage 1 bound B1done: stage 1 was already done up to that limit go is the group order to preload Output: f is the factor found, a is the value at end of stage 1 B1done is set to B1 if stage 1 completed normally, or to the largest prime processed if interrupted, but never to a smaller value than B1done was upon function entry. Return value: non-zero iff a factor was found (or an error occurred). */ static int pm1_stage1 (mpz_t f, mpres_t a, mpmod_t n, double B1, double *B1done, mpz_t go, int (*stop_asap)(void), char *chkfilename) { double p, q, r, cascade_limit, last_chkpnt_p; mpz_t g, d; int youpi = ECM_NO_FACTOR_FOUND; unsigned int size_n, max_size; unsigned int smallbase = 0; mul_casc *cascade; long last_chkpnt_time; const double B0 = sqrt (B1); mpz_init (g); mpz_init (d); size_n = mpz_sizeinbase (n->orig_modulus, 2); max_size = L1 * size_n; mpres_get_z (g, a, n); if (mpz_fits_uint_p (g)) smallbase = mpz_get_ui (g); /* suggestion from Peter Montgomery: start with exponent n-1, since any prime divisor of b^m-1 which does not divide any algebraic factor of b^m-1 must be of the form km+1 [Williams82]. Do this only when n is composite, otherwise all tests with prime n factor of a Cunningham number will succeed in stage 1. Since mpz_probab_prime_p and a^(n-1) mod n require about lg(n) modular multiplications, and P-1 perform about B1 modular multiplications, to ensure small overhead, use that trick only when lg(n) <= sqrt(B1). */ /* For now, this p^N-1 is left in. We might want it out at a later time */ if ((double) size_n <= B0 && mpz_probab_prime_p (n->orig_modulus, PROBAB_PRIME_TESTS) == 0) { mpz_sub_ui (g, n->orig_modulus, 1); mpres_pow (a, a, g, n); } else mpz_set_ui (g, 1); /* Set a limit of roughly 10000 * log_10(N) for the primes that are multiplied up in the exponent, i.e. 1M for a 100 digit number, but limit to CASCADE_MAX to avoid problems with stack allocation */ cascade_limit = 3000.0 * (double) size_n; if (cascade_limit > CASCADE_MAX) cascade_limit = CASCADE_MAX; if (cascade_limit > B1) cascade_limit = B1; cascade = mulcascade_init (); if (cascade == NULL) { youpi = ECM_ERROR; goto clear_pm1_stage1; } /* since B0 = sqrt(B1), we can have B0 > cascade_limit only when B1 > cascade_limit^2. This cannot happen when cascade_limit=B1, thus we need B1 > min(CASCADE_MAX, 3000*sizeinbase(n,2))^2. For sizeinbase(n,2) <= CASCADE_MAX/3000 (less than 5017 digits for CASCADE_MAX=5e7) this means B1 > 9e6*sizeinbase(n,2)^2. For sizeinbase(n,2) > CASCADE_MAX/3000, this means B1 > CASCADE_MAX^2, i.e. B1 > 25e14 for CASCADE_MAX=5e7. */ /* if the user knows that P-1 has a given divisor, he can supply it */ if (mpz_cmp_ui (go, 1) > 0) cascade = mulcascade_mul (cascade, go); last_chkpnt_time = cputime (); last_chkpnt_p = 2.; /* Fill the multiplication cascade with the product of small stage 1 primes */ /* Add small primes <= MIN(sqrt(B1), cascade_limit) in the appropriate power to the cascade */ for (p = 2.; p <= MIN(B0, cascade_limit); p = getprime ()) { for (q = 1., r = p; r <= B1; r *= p) if (r > *B1done) q *= p; cascade = mulcascade_mul_d (cascade, q, d); } /* If B0 < cascade_limit, we can add some primes > sqrt(B1) with exponent 1 to the cascade */ for ( ; p <= cascade_limit; p = getprime ()) if (p > *B1done) cascade = mulcascade_mul_d (cascade, p, d); /* Now p > cascade_limit, flush cascade and exponentiate */ mulcascade_get_z (g, cascade); mulcascade_free (cascade); outputf (OUTPUT_DEVVERBOSE, "Exponent has %u bits\n", mpz_sizeinbase (g, 2)); if (smallbase) { outputf (OUTPUT_DEVVERBOSE, "Using mpres_ui_pow, base %u\n", smallbase); mpres_ui_pow (a, smallbase, g, n); } else { mpres_pow (a, a, g, n); } mpz_set_ui (g, 1); /* If B0 > cascade_limit, we need to process the primes cascade_limit < p < B0 in the appropriate exponent yet */ for ( ; p <= B0; p = getprime ()) { for (q = 1, r = p; r <= B1; r *= p) if (r > *B1done) q *= p; mpz_mul_d (g, g, q, d); if (mpz_sizeinbase (g, 2) >= max_size) { mpres_pow (a, a, g, n); mpz_set_ui (g, 1); if (stop_asap != NULL && (*stop_asap) ()) { outputf (OUTPUT_NORMAL, "Interrupted at prime %.0f\n", p); if (p > *B1done) *B1done = p; goto clear_pm1_stage1; } } } /* All primes sqrt(B1) < p <= B1 appear in exponent 1. All primes <= B1done are already included in exponent of at least 1, so it's save to skip ahead to B1done+1 */ if (*B1done > p) { getprime_seek ((*B1done) + 1.); p = getprime (); } /* then remaining primes > max(sqrt(B1), cascade_limit) and taken with exponent 1 */ for (; p <= B1; p = getprime ()) { mpz_mul_d (g, g, p, d); if (mpz_sizeinbase (g, 2) >= max_size) { mpres_pow (a, a, g, n); mpz_set_ui (g, 1); if (stop_asap != NULL && (*stop_asap) ()) { outputf (OUTPUT_NORMAL, "Interrupted at prime %.0f\n", p); if (p > *B1done) *B1done = p; goto clear_pm1_stage1; } if (chkfilename != NULL && p > last_chkpnt_p + 10000. && elltime (last_chkpnt_time, cputime ()) > CHKPNT_PERIOD) { writechkfile (chkfilename, ECM_PM1, p, n, NULL, a, NULL); last_chkpnt_p = p; last_chkpnt_time = cputime (); } } } mpres_pow (a, a, g, n); /* If stage 1 finished normally, p is the smallest prime >B1 here. In that case, set to B1 */ if (p > B1) p = B1; if (p > *B1done) *B1done = p; mpres_sub_ui (a, a, 1, n); mpres_gcd (f, a, n); if (mpz_cmp_ui (f, 1) > 0) youpi = ECM_FACTOR_FOUND_STEP1; mpres_add_ui (a, a, 1, n); clear_pm1_stage1: if (chkfilename != NULL) writechkfile (chkfilename, ECM_PM1, *B1done, n, NULL, a, NULL); getprime_clear (); /* free the prime tables, and reinitialize */ mpz_clear (d); mpz_clear (g); return youpi; } /****************************************************************************** * * * Stage 2 * * * ******************************************************************************/ /* For each of the nr progressions each of S+1 entries in fd[], performs the update fd[k] *= fd[k+1], 0 <= k < S+1. */ static void update_fd (mpres_t *fd, unsigned int nr, unsigned int S, mpmod_t modulus, unsigned long *muls) { unsigned int j, k; for (j = 0; j < nr * (S + 1); j += S + 1) for (k = 0; k < S; k++) mpres_mul (fd[j + k], fd[j + k], fd[j + k + 1], modulus); if (muls != NULL) *muls += (unsigned long) nr * S; } /* Puts in F[0..dF-1] the successive values of x^(Dickson_{S, a}(j * d2)) for j == 1 mod 6 , j and d1 coprime, where Dickson_{S, a} is the degree S Dickson polynomial with parameter a. For a == 0, Dickson_{S, a} (x) = x^S. Uses the x+1/x trick whenever S > 6 and even, then the Dickson parameter a must be 0. Requires (dF+1) cells in t for the x+1/x trick. Returns non-zero iff a factor was found (then stored in f), or an error occurred. */ int pm1_rootsF (mpz_t f, listz_t F, root_params_t *root_params, unsigned long dF, mpres_t *x, listz_t t, mpmod_t modulus) { unsigned long i; unsigned long muls = 0, gcds = 0; long st, st1; pm1_roots_state_t state; progression_params_t *params = &state.params; /* for less typing */ listz_t coeffs; mpz_t ts; if (dF == 0) return 0; st = cputime (); /* Relative cost of point add during init and computing roots assumed =1 */ init_roots_params (&state.params, root_params->S, root_params->d1, root_params->d2, 1.0); /* The invtrick is profitable for x^S, S even and > 6. Does not work for Dickson polynomials (root_params->S < 0)! */ if (root_params->S > 6 && (root_params->S & 1) == 0) { state.invtrick = 1; params->S /= 2; params->size_fd = params->nr * (params->S + 1); } else state.invtrick = 0; outputf (OUTPUT_DEVVERBOSE, "pm1_rootsF: state: nr = %d, dsieve = %d, size_fd = %d, S = %d, " "dickson_a = %d, invtrick = %d\n", params->nr, params->dsieve, params->size_fd, params->S, params->dickson_a, state.invtrick); /* Init finite differences tables */ mpz_init (ts); /* ts = 0 */ coeffs = init_progression_coeffs (ts, params->dsieve, root_params->d2, 1, 6, params->S, params->dickson_a); mpz_clear (ts); if (coeffs == NULL) return ECM_ERROR; /* Allocate memory for fd[] and compute x^coeff[]*/ state.fd = (mpres_t *) malloc (params->size_fd * sizeof (mpres_t)); if (state.fd == NULL) { clear_list (coeffs, params->size_fd); return ECM_ERROR; } for (i = 0; i < params->size_fd; i++) { outputf (OUTPUT_TRACE, "pm1_rootsF: coeffs[%d] = %Zd\n", i, coeffs[i]); mpres_init (state.fd[i], modulus); /* The highest coefficient of all progressions is identical */ if (i > params->S + 1 && i % (params->S + 1) == params->S) { ASSERT (mpz_cmp (coeffs[i], coeffs[params->S]) == 0); mpres_set (state.fd[i], state.fd[params->S], modulus); } else mpres_pow (state.fd[i], *x, coeffs[i], modulus); } clear_list (coeffs, params->size_fd); coeffs = NULL; st1 = cputime (); outputf (OUTPUT_VERBOSE, "Initializing table of differences for F took %ldms\n", elltime (st, st1)); st = st1; /* Now for the actual calculation of the roots. */ for (i = 0; i < dF;) { /* Is this a rsieve value where we computed x^Dickson(j * d2) ? */ if (gcd (params->rsieve, params->dsieve) == 1) { /* Did we use every progression since the last update? */ if (params->next == params->nr) { /* Yes, time to update again */ update_fd (state.fd, params->nr, params->S, modulus, &muls); params->next = 0; } /* Is this a j value where we want x^Dickson(j * d2) as a root? */ if (gcd (params->rsieve, root_params->d1) == 1) mpres_get_z (F[i++], state.fd[params->next * (params->S + 1)], modulus); params->next ++; } params->rsieve += 6; } for (i = 0; i < params->size_fd; i++) mpres_clear (state.fd[i], modulus); free (state.fd); state.fd = NULL; if (state.invtrick) { if (list_invert (t, F, dF, t[dF], modulus)) { /* Should never happen */ outputf (OUTPUT_ERROR, "Found factor unexpectedly while inverting F[0]*..*F[dF]\n"); mpz_set (f, t[dF]); return ECM_FACTOR_FOUND_STEP2; } muls += 3 * (dF - 1); gcds ++; for (i = 0; i < dF; i++) { mpz_add (F[i], F[i], t[i]); mpz_mod (F[i], F[i], modulus->orig_modulus); } } outputf (OUTPUT_VERBOSE, "Computing roots of F took %ldms", elltime (st, cputime ())); outputf (OUTPUT_DEVVERBOSE, ", %lu muls and %lu extgcds", muls, gcds); outputf (OUTPUT_VERBOSE, "\n"); return ECM_NO_FACTOR_FOUND; } /* Perform the necessary initialisation to allow computation of x^(Dickson_{S, a}(s+n*d)) for successive n, where Dickson_{S, a} is the degree S Dickson polynomial with parameter a. For a == 0, Dickson_{S, a} (x) = x^S. Uses the x+1/x trick whenever S > 6 and even. Return NULL if an error occurred. */ pm1_roots_state_t * pm1_rootsG_init (mpres_t *x, root_params_t *root_params, mpmod_t modulus) { unsigned int i; listz_t coeffs; pm1_roots_state_t *state; progression_params_t *params; /* for less typing */ state = (pm1_roots_state_t *) malloc (sizeof (pm1_roots_state_t)); if (state == NULL) return NULL; params = &(state->params); params->dickson_a = (root_params->S < 0) ? -1 : 0; params->nr = (root_params->d2 > 1) ? root_params->d2 - 1 : 1; params->next = 0; state->invtrick = (root_params->S > 6 && (root_params->S & 1) == 0); params->S = (state->invtrick) ? abs (root_params->S) / 2 : abs (root_params->S); params->size_fd = params->nr * (params->S + 1); params->dsieve = 1; params->rsieve = 1; outputf (OUTPUT_DEVVERBOSE, "pm1_rootsG_init: d1 = %lu, d2 = %lu, state: dsieve = %d, " "nr = %d, size_fd = %d, S = %d, invtrick = %d\n", root_params->d1, root_params->d2, params->dsieve, params->nr, params->size_fd, params->S, state->invtrick); state->fd = (mpres_t *) malloc (params->size_fd * sizeof (mpres_t)); if (state->fd == NULL) { free (state); return NULL; } /* Init for Dickson_{E,a} (i0 * d + d1 * n) */ coeffs = init_progression_coeffs (root_params->i0, root_params->d2, root_params->d1, 1, 1, params->S, params->dickson_a); if (coeffs == NULL) { free (state->fd); free (state); return NULL; } for (i = 0; i < params->size_fd; i++) { outputf (OUTPUT_TRACE, "pm1_rootsG_init: coeffs[%d] = %Zd\n", i, coeffs[i]); mpres_init (state->fd[i], modulus); /* The S-th coeff of all progressions is identical */ if (i > params->S && i % (params->S + 1) == params->S) { ASSERT (mpz_cmp (coeffs[i], coeffs[params->S]) == 0); /* Simply copy from the first progression */ mpres_set (state->fd[i], state->fd[params->S], modulus); } else { if (mpz_sgn (coeffs[i]) < 0) { mpz_neg (coeffs[i], coeffs[i]); mpres_pow (state->fd[i], *x, coeffs[i], modulus); mpres_invert (state->fd[i], state->fd[i], modulus); mpz_neg (coeffs[i], coeffs[i]); } else { mpres_pow (state->fd[i], *x, coeffs[i], modulus); } } } clear_list (coeffs, params->size_fd); return state; } /* Frees all the dynamic variables allocated by pm1_rootsG_init() */ void pm1_rootsG_clear (pm1_roots_state_t *state, ATTRIBUTE_UNUSED mpmod_t modulus) { unsigned int k; for (k = 0; k < state->params.size_fd; k++) mpres_clear (state->fd[k], modulus); free (state->fd); state->fd = NULL; free (state); } /* Puts in G the successive values of x^(Dickson_{S, a}(s+j*k)) for 1 <= j <= d, where k is the 'd' value from pm1_rootsG_init() and s is the 's' value of pm1_rootsG_init() or where a previous call to pm1_rootsG has left off. Requires (d+1) cells in t for the x+1/x trick. Returns non-zero iff a factor was found (then stored in f). No error can occur. */ int pm1_rootsG (mpz_t f, listz_t G, unsigned long dF, pm1_roots_state_t *state, listz_t t, mpmod_t modulus) { unsigned long i; unsigned long muls = 0, gcds = 0; unsigned int st; progression_params_t *params = &(state->params); /* for less typing */ outputf (OUTPUT_TRACE, "pm1_rootsG: dF = %d, state: size_fd = %d, nr = %d, S = %d\n", dF, params->size_fd, params->nr, params->S); st = cputime (); for (i = 0; i < dF;) { /* Did we use every progression since the last update? */ if (params->next == params->nr) { /* Yes, time to update again */ outputf (OUTPUT_TRACE, "pm1_rootsG: Updating table at rsieve = %d\n", params->rsieve); update_fd (state->fd, params->nr, params->S, modulus, &muls); params->next = 0; } /* Is this a root we should skip? (Take only if gcd == 1) */ if (gcd (params->rsieve, params->dsieve) == 1) { outputf (OUTPUT_TRACE, "pm1_rootsG: Taking root G[%d] at rsieve = %d\n", i, params->rsieve); mpres_get_z (G[i++], state->fd[params->next * (params->S + 1)], modulus); } else outputf (OUTPUT_TRACE, "pm1_rootsG: Skipping root at rsieve = %d\n", params->rsieve); params->next ++; params->rsieve ++; } if (state->invtrick) { if (list_invert (t, G, dF, t[dF], modulus)) { outputf (OUTPUT_VERBOSE, "Found factor while inverting G[0]*..*G[d]\n"); mpz_set (f, t[dF]); return ECM_FACTOR_FOUND_STEP2; } muls += 3 * (dF - 1); gcds ++; for (i = 0; i < dF; i++) { mpz_add (G[i], G[i], t[i]); mpz_mod (G[i], G[i], modulus->orig_modulus); } } outputf (OUTPUT_VERBOSE, "Computing roots of G took %ldms", elltime (st, cputime ())); outputf (OUTPUT_DEVVERBOSE, ", %lu muls and %lu extgcds", muls, gcds); outputf (OUTPUT_VERBOSE, "\n"); return ECM_NO_FACTOR_FOUND; } static void print_prob (double B1, const mpz_t B2, unsigned long dF, unsigned long k, int S, const mpz_t go) { double prob; int i; char sep; outputf (OUTPUT_VERBOSE, "Probability of finding a factor of n digits:\n"); if (go != NULL && mpz_cmp_ui (go, 1UL) <= 0) outputf (OUTPUT_VERBOSE, "(Use -go parameter to specify known factors in P-1)\n"); outputf (OUTPUT_VERBOSE, "20\t25\t30\t35\t40\t45\t50\t55\t60\t65\n"); for (i = 20; i <= 65; i += 5) { sep = (i < 65) ? '\t' : '\n'; prob = pm1prob (B1, mpz_get_d (B2), pow (10., i - .5), (double) dF * dF * k, S, go); outputf (OUTPUT_VERBOSE, "%.2g%c", prob, sep); } } /****************************************************************************** * * * Pollard P-1 * * * ******************************************************************************/ /* Input: p is the initial generator (sigma), if 0, generate it at random. N is the number to factor B1 is the stage 1 bound B2 is the stage 2 bound B1done is the stage 1 limit to which supplied residue has already been computed k is the number of blocks for stage 2 verbose is the verbosity level Output: f is the factor found, p is the residue at end of stage 1 Return value: non-zero iff a factor is found (1 for stage 1, 2 for stage 2) */ int pm1 (mpz_t f, mpz_t p, mpz_t N, mpz_t go, double *B1done, double B1, mpz_t B2min_parm, mpz_t B2_parm, double B2scale, unsigned long k, const int S, int verbose, int repr, int use_ntt, FILE *os, FILE *es, char *chkfilename, char *TreeFilename, double maxmem, gmp_randstate_t rng, int (*stop_asap)(void)) { int youpi = ECM_NO_FACTOR_FOUND; int base2 = 0; int Nbits, smallbase; int po2 = 0; /* Whether we should use power-of-2 poly degree */ long st; mpmod_t modulus; mpres_t x; mpz_t B2min, B2; /* Local B2, B2min to avoid changing caller's values */ unsigned long dF; root_params_t root_params; faststage2_param_t faststage2_params; /* If stage2_variant != 0, we use the new fast stage 2 */ const int stage2_variant = (S == 1 || S == ECM_DEFAULT_S); set_verbose (verbose); ECM_STDOUT = (os == NULL) ? stdout : os; ECM_STDERR = (es == NULL) ? stdout : es; /* if n is even, return 2 */ if (mpz_divisible_2exp_p (N, 1)) { mpz_set_ui (f, 2); return ECM_FACTOR_FOUND_STEP1; } st = cputime (); if (mpz_cmp_ui (p, 0) == 0) pm1_random_seed (p, N, rng); mpz_init_set (B2min, B2min_parm); mpz_init_set (B2, B2_parm); /* Set default B2. See ecm.c for comments */ if (ECM_IS_DEFAULT_B2(B2)) { if (stage2_variant == 0) mpz_set_d (B2, B2scale * pow (B1 * PM1_COST, DEFAULT_B2_EXPONENT)); else mpz_set_d (B2, B2scale * pow (B1 * PM1FS2_COST, PM1FS2_DEFAULT_B2_EXPONENT)); } /* set B2min */ if (mpz_sgn (B2min) < 0) mpz_set_d (B2min, B1); if (repr != ECM_MOD_DEFAULT && repr != ECM_MOD_NOBASE2) { if (repr == ECM_MOD_MODMULN) mpmod_init_MODMULN (modulus, N); else if (repr == ECM_MOD_REDC) mpmod_init_REDC (modulus, N); else if (abs (repr) > 16) { if (mpmod_init_BASE2 (modulus, repr, N) == ECM_ERROR) return ECM_ERROR; } else mpmod_init_MPZ (modulus, N); } else /* automatic choice */ { /* Find a good arithmetic for this number */ Nbits = mpz_sizeinbase (N, 2); base2 = (repr == 0) ? isbase2 (N, BASE2_THRESHOLD) : 0; smallbase = mpz_fits_uint_p (p); /* TODO: make dependent on Nbits and base2 */ if (base2) { mpmod_init_BASE2 (modulus, base2, N); } else if (mpz_size (N) <= 2 * POWM_THRESHOLD && smallbase && B1 <= 1e6) /* Below POWM_THRESHOLD, mpz_powm uses MODMULN reduction, too, but without special code for small bases which makes our MODMULN faster. Above POWM_THRESHOLD mpz_powm uses faster mod reduction, at about 2*POWM_THRESHOLD it catches up with our smallbase-MODMULN and then is faster until REDC takes over. */ { outputf (OUTPUT_VERBOSE, "Using MODMULN\n"); mpmod_init_MODMULN (modulus, N); } else if (Nbits > 50000 || (Nbits > 3500 && smallbase)) { outputf (OUTPUT_VERBOSE, "Using REDC\n"); mpmod_init_REDC (modulus, N); } else { outputf (OUTPUT_VERBOSE, "Using mpz_powm\n"); mpmod_init_MPZ (modulus, N); } } /* Determine parameters (polynomial degree etc.) */ if (stage2_variant != 0) { long P_ntt, P_nontt; const unsigned long lmax = 1UL<<28; /* An upper bound */ unsigned long lmax_NTT, lmax_noNTT; faststage2_param_t params_ntt, params_nontt, *better_params; mpz_init (faststage2_params.m_1); faststage2_params.l = 0; mpz_init (params_ntt.m_1); params_ntt.l = 0; mpz_init (params_nontt.m_1); params_nontt.l = 0; /* Find out what the longest transform length is we can do at all. If no maxmem is given, the non-NTT can theoretically do any length. */ lmax_NTT = 0; if (use_ntt) { unsigned long t; /* See what transform length the NTT can handle (due to limited primes and limited memory) */ t = mpzspm_max_len (N); lmax_NTT = MIN (lmax, t); if (maxmem != 0.) { t = pm1fs2_maxlen (double_to_size (maxmem), N, use_ntt); lmax_NTT = MIN (lmax_NTT, t); } outputf (OUTPUT_DEVVERBOSE, "NTT can handle lmax <= %lu\n", lmax_NTT); /* FIXME: if both ntt and no-ntt are tried, but finally ntt is preferred, the last B2 bound computed is that of no-ntt, which is thus wrong */ P_ntt = choose_P (B2min, B2, lmax_NTT, k, ¶ms_ntt, B2min, B2, 1, ECM_PM1); if (P_ntt != ECM_ERROR) outputf (OUTPUT_DEVVERBOSE, "Parameters for NTT: P=%lu, l=%lu\n", params_ntt.P, params_ntt.l); } else P_ntt = 0; /* or GCC complains about uninitialized var */ /* See what transform length the non-NTT code can handle */ lmax_noNTT = lmax; if (maxmem != 0.) { unsigned long t; t = pm1fs2_maxlen (double_to_size (maxmem), N, 0); lmax_noNTT = MIN (lmax_noNTT, t); outputf (OUTPUT_DEVVERBOSE, "non-NTT can handle lmax <= %lu\n", lmax_noNTT); } if (use_ntt != 2) P_nontt = choose_P (B2min, B2, lmax_noNTT, k, ¶ms_nontt, B2min, B2, 0, ECM_PM1); else P_nontt = ECM_ERROR; if (P_nontt != ECM_ERROR) outputf (OUTPUT_DEVVERBOSE, "Parameters for non-NTT: P=%lu, l=%lu\n", params_nontt.P, params_nontt.l); if (((!use_ntt || P_ntt == ECM_ERROR) && P_nontt == ECM_ERROR) || (use_ntt == 2 && P_ntt == ECM_ERROR)) { outputf (OUTPUT_ERROR, "Error: cannot choose suitable P value for your stage 2 " "parameters.\nTry a shorter B2min,B2 interval.\n"); mpz_clear (faststage2_params.m_1); mpz_clear (params_ntt.m_1); mpz_clear (params_nontt.m_1); return ECM_ERROR; } /* Now decide wether to take NTT or non-NTT. How to choose the better one is not an easy question. It will depend on the speed ratio between NTT/non-NTT code, their difference in memory use and available memory. For now, we choose the one that uses a longer transform length. FIXME: Write something not brain-dead here */ if (use_ntt == 0 || P_ntt == ECM_ERROR || (use_ntt == 1 && params_nontt.l > params_ntt.l)) { better_params = ¶ms_nontt; use_ntt = 0; } else { better_params = ¶ms_ntt; use_ntt = 1; } faststage2_params.P = better_params->P; faststage2_params.s_1 = better_params->s_1; faststage2_params.s_2 = better_params->s_2; faststage2_params.l = better_params->l; mpz_set (faststage2_params.m_1, better_params->m_1); mpz_clear (params_ntt.m_1); mpz_clear (params_nontt.m_1); if (maxmem != 0.) outputf (OUTPUT_VERBOSE, "Using lmax = %lu with%s NTT which takes " "about %luMB of memory\n", faststage2_params.l, (use_ntt) ? "" : "out", pm1fs2_memory_use (faststage2_params.l, N, use_ntt)/1048576); } else { mpz_init (root_params.i0); root_params.d2 = 0; /* Enable automatic choice of d2 */ if (use_ntt || (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat > 0)) po2 = 1; if (bestD (&root_params, &k, &dF, B2min, B2, po2, use_ntt, maxmem, (TreeFilename != NULL), modulus) == ECM_ERROR) { youpi = ECM_ERROR; goto clear_and_exit; } root_params.S = S; /* Set default degree for Brent-Suyama extension */ if (root_params.S == ECM_DEFAULT_S) { if (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat > 0) { /* For Fermat numbers, default is 2 (no Brent-Suyama) */ root_params.S = 2; } else { mpz_t t; mpz_init (t); mpz_sub (t, B2, B2min); if (mpz_cmp_d (t, 3.5e5) < 0) /* B1 < 50000 */ root_params.S = -4; /* Dickson polys give a slightly better chance of success */ else if (mpz_cmp_d (t, 1.1e7) < 0) /* B1 < 500000 */ root_params.S = -6; else if (mpz_cmp_d (t, 1.25e8) < 0) /* B1 < 3000000 */ root_params.S = 12; /* but for S>6, S-th powers are faster thanks to invtrick */ else if (mpz_cmp_d (t, 7.e9) < 0) /* B1 < 50000000 */ root_params.S = 24; else if (mpz_cmp_d (t, 1.9e10) < 0) /* B1 < 100000000 */ root_params.S = 48; else if (mpz_cmp_d (t, 5.e11) < 0) /* B1 < 1000000000 */ root_params.S = 60; else root_params.S = 120; mpz_clear (t); } } /* We need Suyama's power even and at least 2 for P-1 stage 2 to work correctly */ if (root_params.S & 1) root_params.S *= 2; /* FIXME: Is this what the user would expect? */ } /* Print B1, B2, polynomial and x0 */ print_B1_B2_poly (OUTPUT_NORMAL, ECM_PM1, B1, *B1done, B2min_parm, B2min, B2, (stage2_variant == 0) ? root_params.S : 1, p, 0, NULL); /* If we do a stage 2, print its parameters */ if (mpz_cmp (B2, B2min) >= 0) { if (stage2_variant != 0) outputf (OUTPUT_VERBOSE, "P = %lu, l = %lu, s_1 = %lu, k = s_2 = %lu, " "m_1 = %Zd\n", faststage2_params.P, faststage2_params.l, faststage2_params.s_1,faststage2_params.s_2, faststage2_params.m_1); else outputf (OUTPUT_VERBOSE, "dF=%lu, k=%lu, d=%lu, d2=%lu, i0=%Zd\n", dF, k, root_params.d1, root_params.d2, root_params.i0); } if (test_verbose (OUTPUT_VERBOSE)) { if (mpz_sgn (B2min_parm) >= 0) { outputf (OUTPUT_VERBOSE, "Can't compute success probabilities for B1 <> B2min\n"); } else { rhoinit (256, 10); print_prob (B1, B2, dF, k, (stage2_variant == 0) ? root_params.S : 1, go); } } mpres_init (x, modulus); mpres_set_z (x, p, modulus); st = cputime (); if (B1 > *B1done) youpi = pm1_stage1 (f, x, modulus, B1, B1done, go, stop_asap, chkfilename); st = elltime (st, cputime ()); outputf (OUTPUT_NORMAL, "Step 1 took %ldms\n", st); if (test_verbose (OUTPUT_RESVERBOSE)) { mpz_t tx; mpz_init (tx); mpres_get_z (tx, x, modulus); outputf (OUTPUT_RESVERBOSE, "x=%Zd\n", tx); mpz_clear (tx); } if (stop_asap != NULL && (*stop_asap) ()) goto clear_and_exit; if (youpi == ECM_NO_FACTOR_FOUND && mpz_cmp (B2, B2min) >= 0) { if (stage2_variant != 0) { if (use_ntt) youpi = pm1fs2_ntt (f, x, modulus, &faststage2_params); else youpi = pm1fs2 (f, x, modulus, &faststage2_params); } else youpi = stage2 (f, &x, modulus, dF, k, &root_params, ECM_PM1, use_ntt, TreeFilename, stop_asap); } if (test_verbose (OUTPUT_VERBOSE)) { if (mpz_sgn (B2min_parm) < 0) rhoinit (1, 0); /* Free memory of rhotable */ } clear_and_exit: mpres_get_z (p, x, modulus); mpres_clear (x, modulus); mpmod_clear (modulus); if (stage2_variant != 0) mpz_clear (faststage2_params.m_1); else mpz_clear (root_params.i0); mpz_clear (B2); mpz_clear (B2min); return youpi; } ecm-6.4.4/mpmod.c0000644023561000001540000021523412110710163010472 00000000000000/* Modular multiplication. Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Paul Zimmermann, Alexander Kruppa and Cyril Bouvier. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include "ecm-gmp.h" #include "ecm-impl.h" #include "mpmod.h" #ifdef USE_ASM_REDC #include "mulredc.h" #endif FILE *ECM_STDOUT, *ECM_STDERR; /* define them here since needed in tune.c */ /* define WANT_ASSERT to check normalization of residues */ /* #define WANT_ASSERT 1 */ /* #define DEBUG */ /* #define WANT_ASSERT_EXPENSIVE 1 */ #define ASSERT_NORMALIZED(x) ASSERT ((modulus->repr != ECM_MOD_MODMULN && \ modulus->repr != ECM_MOD_REDC) || \ mpz_size (x) <= mpz_size (modulus->orig_modulus)) #define MPZ_NORMALIZED(x) ASSERT (PTR(x)[ABSIZ(x)-1] != 0) static void ecm_redc_basecase (mpz_ptr, mpz_ptr, mpmod_t) ATTRIBUTE_HOT; static void ecm_mulredc_basecase (mpres_t, const mpres_t, const mpres_t, mpmod_t) ATTRIBUTE_HOT; static void base2mod (mpres_t, const mpres_t, mpres_t, mpmod_t) ATTRIBUTE_HOT; static void REDC (mpres_t, const mpres_t, mpz_t, mpmod_t); /* Up from GMP 5.1.0, mpn_redc{1,2} do not subtract the modulus if needed, but return the carry of the final addition */ #ifdef HAVE___GMPN_REDC_1 #ifdef MPN_REDC12_RETURNS_CARRY #define REDC1(rp,cp,np,nn,invm) \ do {if (__gmpn_redc_1 (rp,cp,np,nn,invm)) \ mpn_sub_n (rp, rp, np, nn); \ } while(0) #else #define REDC1(rp,cp,np,nn,invm) __gmpn_redc_1(rp,cp,np,nn,invm) #endif #endif #ifdef HAVE___GMPN_REDC_2 #ifdef MPN_REDC12_RETURNS_CARRY #define REDC2(rp,cp,np,nn,invm) \ do {if (__gmpn_redc_2 (rp,cp,np,nn,invm)) \ mpn_sub_n (rp, rp, np, nn); \ } while (0) #else #define REDC2(rp,cp,np,nn,invm) __gmpn_redc_2(rp,cp,np,nn,invm) #endif #endif #if 0 /* PZ: commented out, since I don't see how to use this code. Indeed, we need a large enough value of K to get significant timings; however, for small B1 a too large value of K will increase the total time for a curve. */ /* return non-zero if base-2 division if better for n, with K multiplications */ static int mpmod_tune_base2 (const mpz_t n, int K, int base2) { mpmod_t modulus; int k; long t0, t1; mpres_t x; /* try first without base-2 division */ mpmod_init (modulus, n, ECM_MOD_NOBASE2, 0); mpres_init (x, modulus); mpres_set_z (x, n, modulus); mpres_sub_ui (x, x, 1, modulus); /* so that the initial value is dense */ t0 = cputime (); for (k = 0; k < K; k++) mpres_sqr (x, x, modulus); t0 = cputime () - t0; mpres_clear (x, modulus); mpmod_clear (modulus); /* now with base-2 division */ mpmod_init (modulus, n, ECM_MOD_BASE2, base2); mpres_init (x, modulus); mpres_set_z (x, n, modulus); mpres_sub_ui (x, x, 1, modulus); /* so that the initial value is dense */ t1 = cputime (); for (k = 0; k < K; k++) mpres_sqr (x, x, modulus); t1 = cputime () - t1; fprintf (stderr, "ECM_MOD_NOBASE2:%ld ECM_MOD_BASE2:%ld\n", t0, t1); mpres_clear (x, modulus); mpmod_clear (modulus); return (t1 < t0); } #endif /* returns +/-l if n is a factor of N = 2^l +/- 1 with N <= n^threshold, 0 otherwise. */ int isbase2 (const mpz_t n, const double threshold) { unsigned int k, lo; int res = 0; mpz_t u, w; MPZ_INIT (u); MPZ_INIT (w); lo = mpz_sizeinbase (n, 2) - 1; /* 2^lo <= n < 2^(lo+1) */ mpz_set_ui (u, 1UL); mpz_mul_2exp (u, u, 2UL * lo); mpz_mod (w, u, n); /* 2^(2lo) mod n = -/+2^(2lo-l) if m*n = 2^l+/-1 */ if (mpz_cmp_ui (w, 1UL) == 0) /* if 2^(2lo) mod n = 1, then n divides 2^(2lo)-1. If algebraic factors have been removed, n divides either 2^lo+1 or 2^lo-1. But since n has lo+1 bits, n can only divide 2^lo+1. More precisely, n must be 2^lo+1. */ { /* check that n equals 2^lo+1. Since n divides 2^(2lo)-1, n is odd. */ if (mpz_scan1 (n, 1UL) != lo) lo = 0; mpz_clear (w); mpz_clear (u); return lo; } k = mpz_sizeinbase (w, 2) - 1; /* if w = 2^k then n divides 2^(2*lo-k)-1 */ mpz_set_ui (u, 1UL); mpz_mul_2exp (u, u, k); if (mpz_cmp(w, u) == 0) res = k - 2 * lo; else /* if w = -2^k then n divides 2^(2*lo-k)+1 */ { mpz_neg (w, w); mpz_mod (w, w, n); k = mpz_sizeinbase (w, 2) - 1; mpz_set_ui (u, 1UL); mpz_mul_2exp (u, u, k); if (mpz_cmp (w, u) == 0) res = 2 * lo - k; } mpz_clear (u); mpz_clear (w); #if 0 if (res != 0) mpmod_tune_base2 (n, 1000000, res); #endif if (abs (res) > (int) (threshold * (double) lo)) res = 0; if (abs (res) < 16) res = 0; return res; } /* Do base-2 reduction. R must not equal S or t. */ static void base2mod (mpres_t R, const mpres_t S, mpres_t t, mpmod_t modulus) { unsigned long absbits = abs (modulus->bits); ASSERT (R != S && R != t); mpz_tdiv_q_2exp (R, S, absbits); mpz_tdiv_r_2exp (t, S, absbits); if (modulus->bits < 0) mpz_add (R, R, t); else mpz_sub (R, t, R); /* mpz_mod (R, R, modulus->orig_modulus); */ while (mpz_sizeinbase (R, 2) > absbits) { mpz_tdiv_q_2exp (t, R, absbits); mpz_tdiv_r_2exp (R, R, absbits); if (modulus->bits < 0) mpz_add (R, R, t); else mpz_sub (R, R, t); } } /* Modular reduction modulo the Fermat number 2^m+1. n = m / GMP_NUMB_BITS. Result is < 2^m+1. FIXME: this does not work with nails. Only copies the data to R if reduction is needed and returns 1 in that case. If the value in S is reduced already, nothing is done and 0 is returned. Yes, this is ugly. */ static int base2mod_2 (mpres_t R, const mpres_t S, mp_size_t n, mpz_t modulus) { mp_size_t s; s = ABSIZ(S); if (s > n) { if (s == n + 1) { mp_srcptr sp = PTR(S); mp_ptr rp; MPZ_REALLOC (R, s); rp = PTR(R); if ((rp[n] = mpn_sub_1 (rp, sp, n, sp[n]))) rp[n] = mpn_add_1 (rp, rp, n, rp[n]); MPN_NORMALIZE(rp, s); ASSERT (s <= n || (s == n && rp[n] == 1)); SIZ(R) = (SIZ(S) > 0) ? (int) s : (int) -s; } else /* should happen rarely */ mpz_mod (R, S, modulus); return 1; } return 0; } /* subquadratic REDC, at mpn level. {orig,n} is the original modulus. Requires xn = 2n or 2n-1 and ABSIZ(orig_modulus)=n. */ static void ecm_redc_n (mp_ptr rp, mp_srcptr x0p, mp_size_t xn, mp_srcptr orig, mp_srcptr invm, mp_size_t n) { mp_ptr tp, up, xp; mp_size_t nn = n + n; mp_limb_t cy, cin; TMP_DECL(marker); ASSERT((xn == 2 * n) || (xn == 2 * n - 1)); TMP_MARK(marker); up = TMP_ALLOC_LIMBS(nn + nn); if (xn < nn) { xp = TMP_ALLOC_LIMBS(nn); MPN_COPY (xp, x0p, xn); xp[nn - 1] = 0; } else xp = (mp_ptr) x0p; #ifdef HAVE___GMPN_MULLO_N /* available up from GMP 5.0.0 */ __gmpn_mullo_n (up, xp, invm, n); #else ecm_mul_lo_n (up, xp, invm, n); #endif tp = up + nn; mpn_mul_n (tp, up, orig, n); /* add {x, 2n} and {tp, 2n}. We know that {tp, n} + {xp, n} will give either 0, or a carry out. If xp[n-1] <> 0 or tp[n-1] <> 0, then there is a carry. We use a binary OR, which sets the zero flag if and only if both operands are zero. */ cin = (mp_limb_t) ((xp[n - 1] | tp[n - 1]) ? 1 : 0); #ifdef HAVE___GMPN_ADD_NC cy = __gmpn_add_nc (rp, tp + n, xp + n, n, cin); #else cy = mpn_add_n (rp, tp + n, xp + n, n); cy += mpn_add_1 (rp, rp, n, cin); #endif /* since we add at most N-1 to the upper half of {x0p,2n}, one adjustment is enough */ if (cy) cy -= mpn_sub_n (rp, rp, orig, n); ASSERT (cy == 0); TMP_FREE(marker); } /* REDC. x and t must not be identical, t has limb growth */ /* subquadratic REDC, at mpz level */ static void REDC (mpres_t r, const mpres_t x, mpz_t t, mpmod_t modulus) { mp_size_t n = modulus->bits / GMP_NUMB_BITS; mp_size_t xn = ABSIZ(x); ASSERT (xn <= 2 * n); if (xn == 2 * n) /* ecm_redc_n also accepts xn=2n-1, but this seems slower for now (see remark in TODO) */ { mp_ptr rp; MPZ_REALLOC (r, n); rp = PTR(r); ecm_redc_n (rp, PTR(x), xn, PTR(modulus->orig_modulus), PTR(modulus->aux_modulus), n); MPN_NORMALIZE(rp, n); SIZ(r) = (SIZ(x) > 0) ? (int) n : (int) -n; MPZ_NORMALIZED (r); } else { mpz_tdiv_r_2exp (t, x, modulus->bits); mpz_mul (t, t, modulus->aux_modulus); mpz_tdiv_r_2exp (t, t, modulus->bits); /* t = (x % R) * 1/N (mod R) */ mpz_mul (t, t, modulus->orig_modulus); mpz_add (t, t, x); mpz_tdiv_q_2exp (r, t, modulus->bits); /* r = (x + m*N) / R */ if (ABSIZ (r) > n) mpz_sub (r, r, modulus->multiple); } ASSERT (ABSIZ(r) <= n); } /* Quadratic time redc for n word moduli. */ static inline void redc_basecase_n (mp_ptr rp, mp_ptr cp, mp_srcptr np, const mp_size_t nn, const mp_ptr invm) { #ifdef HAVE___GMPN_REDC_2 REDC2(rp, cp, np, nn, invm); #else /* HAVE___GMPN_REDC_2 is not defined */ #ifdef HAVE___GMPN_REDC_1 REDC1(rp, cp, np, nn, invm[0]); #else /* neither HAVE___GMPN_REDC_2 nor HAVE___GMPN_REDC_1 is defined */ mp_limb_t cy; mp_size_t j; for (j = 0; j < nn; j++) { cy = mpn_addmul_1 (cp, np, nn, cp[0] * invm[0]); ASSERT(cp[0] == (mp_limb_t) 0); cp[0] = cy; cp++; } /* add vector of carries and shift */ cy = mpn_add_n (rp, cp, cp - nn, nn); /* the result of Montgomery's REDC is less than 2^Nbits + N, thus at most one correction is enough */ if (cy != 0) { mp_limb_t t; t = mpn_sub_n (rp, rp, np, nn); /* a borrow should always occur here */ ASSERT (t == 1); } #endif /* HAVE___GMPN_REDC_1 */ #endif /* HAVE___GMPN_REDC_2 */ } /* r <- c/R^nn mod n, where n has nn limbs, and R=2^GMP_NUMB_BITS. n must be odd. c must have space for at least 2*nn limbs. r must have space for at least n limbs. c and r can be the same variable. The data in c is clobbered. */ static void ecm_redc_basecase (mpz_ptr r, mpz_ptr c, mpmod_t modulus) { mp_ptr rp; mp_ptr cp; mp_srcptr np; mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS; ASSERT(ABSIZ(c) <= 2 * nn); ASSERT(ALLOC(c) >= 2 * nn); ASSERT(ALLOC(r) >= nn); cp = PTR(c); rp = PTR(r); np = PTR(modulus->orig_modulus); for (j = ABSIZ(c); j < 2 * nn; j++) cp[j] = 0; redc_basecase_n (rp, cp, np, nn, modulus->Nprim); MPN_NORMALIZE (rp, nn); SIZ(r) = SIZ(c) < 0 ? (int) -nn : (int) nn; } #ifdef USE_ASM_REDC /* Quadratic time multiplication and REDC with nn-limb modulus. x and y are nn-limb residues, the nn-limb result is written to z. This function merely calls the correct mulredc*() assembly function depending on nn, and processes any leftover carry. */ static void mulredc (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_srcptr m, const mp_size_t nn, const mp_limb_t invm) { mp_limb_t cy; switch (nn) { case 1: cy = mulredc1(z, x[0], y[0], m[0], invm); break; case 2: cy = mulredc2(z, x, y, m, invm); break; case 3: cy = mulredc3(z, x, y, m, invm); break; case 4: cy = mulredc4(z, x, y, m, invm); break; case 5: cy = mulredc5(z, x, y, m, invm); break; case 6: cy = mulredc6(z, x, y, m, invm); break; case 7: cy = mulredc7(z, x, y, m, invm); break; case 8: cy = mulredc8(z, x, y, m, invm); break; case 9: cy = mulredc9(z, x, y, m, invm); break; case 10: cy = mulredc10(z, x, y, m, invm); break; case 11: cy = mulredc11(z, x, y, m, invm); break; case 12: cy = mulredc12(z, x, y, m, invm); break; case 13: cy = mulredc13(z, x, y, m, invm); break; case 14: cy = mulredc14(z, x, y, m, invm); break; case 15: cy = mulredc15(z, x, y, m, invm); break; case 16: cy = mulredc16(z, x, y, m, invm); break; case 17: cy = mulredc17(z, x, y, m, invm); break; case 18: cy = mulredc18(z, x, y, m, invm); break; case 19: cy = mulredc19(z, x, y, m, invm); break; case 20: cy = mulredc20(z, x, y, m, invm); break; default: abort(); } /* the result of Montgomery's REDC is less than 2^Nbits + N, thus at most one correction is enough */ if (cy != 0) { ATTRIBUTE_UNUSED mp_limb_t t; t = mpn_sub_n (z, z, m, nn); /* a borrow should always occur here */ ASSERT (t == 1); } } /* {rp, n} <- {ap, n}^2/B^n mod {np, n} where B = 2^GMP_NUMB_BITS */ ATTRIBUTE_UNUSED static void sqrredc (mp_ptr rp, mp_srcptr ap, mp_srcptr np, const mp_size_t n, const mp_limb_t invm) { mp_ptr cp; mp_size_t i; mp_limb_t cy, q; TMP_DECL(marker); TMP_MARK(marker); cp = TMP_ALLOC_LIMBS(2*n); for (i = 0; i < n; i++) umul_ppmm (cp[2*i+1], cp[2*i], ap[i], ap[i]); if (UNLIKELY(n == 1)) { q = cp[0] * invm; rp[0] = mpn_addmul_1 (cp, np, 1, q); cy = mpn_add_n (rp, rp, cp + 1, 1); goto end_sqrredc; } if (cp[0] & (mp_limb_t) 1) /* cp[n] is either some ap[i]^2 mod B or floor(ap[i]^2/B), the latter is at most floor((B-1)^2/B) = B-2, and the former cannot be B-1 since -1 is not a square mod 2^n for n >1, thus there is no carry in cp[n] + ... below */ cp[n] += mpn_add_n (cp, cp, np, n); /* now {cp, 2n} is even: divide by two */ mpn_rshift (cp, cp, 2*n, 1); /* now cp[2n-1] is at most B/2-1 */ for (i = 0; i < n - 1; i++) { q = cp[i] * invm; cp[i] = mpn_addmul_1 (cp + i, np, n, q); /* accumulate ap[i+1..n-1] * ap[i] */ rp[i] = mpn_addmul_1 (cp + 2 * i + 1, ap + i + 1, n - 1 - i, ap[i]); } /* the last iteration did set cp[n-2] to zero, accumulated a[n-1] * a[n-2] */ /* cp[2n-1] was untouched so far, so it is still at most B/2-1 */ q = cp[n-1] * invm; rp[n-1] = mpn_addmul_1 (cp + n - 1, np, n, q); /* rp[n-1] <= floor((B^n-1)*(B-1)/B^n)<=B-2 */ /* now add {rp, n}, {cp+n, n} and {cp, n-1} */ /* cp[2n-1] still <= B/2-1 */ rp[n-1] += mpn_add_n (rp, rp, cp, n-1); /* no overflow in rp[n-1] + ... */ cy = mpn_add_n (rp, rp, cp + n, n); /* multiply by 2 */ cy = (cy << 1) + mpn_lshift (rp, rp, n, 1); end_sqrredc: while (cy) cy -= mpn_sub_n (rp, rp, np, n); TMP_FREE(marker); } #ifdef HAVE_NATIVE_MULREDC1_N /* Multiplies y by the 1-limb value of x and does modulo reduction. The resulting residue may be multiplied by some constant, which makes this function useful only for cases where, e.g., all projective coordinates are multiplied by the same constant. More precisely it computes: {z, N} = {y, N} * x / 2^GMP_NUMB_BITS mod {m, N} */ static void mulredc_1 (mp_ptr z, const mp_limb_t x, mp_srcptr y, mp_srcptr m, const mp_size_t N, const mp_limb_t invm) { mp_limb_t cy; switch (N) { case 1: cy = mulredc1(z, x, y[0], m[0], invm); break; case 2: cy = mulredc1_2(z, x, y, m, invm); break; case 3: cy = mulredc1_3(z, x, y, m, invm); break; case 4: cy = mulredc1_4(z, x, y, m, invm); break; case 5: cy = mulredc1_5(z, x, y, m, invm); break; case 6: cy = mulredc1_6(z, x, y, m, invm); break; case 7: cy = mulredc1_7(z, x, y, m, invm); break; case 8: cy = mulredc1_8(z, x, y, m, invm); break; case 9: cy = mulredc1_9(z, x, y, m, invm); break; case 10: cy = mulredc1_10(z, x, y, m, invm); break; case 11: cy = mulredc1_11(z, x, y, m, invm); break; case 12: cy = mulredc1_12(z, x, y, m, invm); break; case 13: cy = mulredc1_13(z, x, y, m, invm); break; case 14: cy = mulredc1_14(z, x, y, m, invm); break; case 15: cy = mulredc1_15(z, x, y, m, invm); break; case 16: cy = mulredc1_16(z, x, y, m, invm); break; case 17: cy = mulredc1_17(z, x, y, m, invm); break; case 18: cy = mulredc1_18(z, x, y, m, invm); break; case 19: cy = mulredc1_19(z, x, y, m, invm); break; case 20: cy = mulredc1_20(z, x, y, m, invm); break; default: { abort (); } } /* the result of Montgomery's REDC is less than 2^Nbits + N, thus one correction (at most) is enough */ if (cy != 0) { ATTRIBUTE_UNUSED mp_limb_t t; t = mpn_sub_n (z, z, m, N); /* a borrow should always occur here */ ASSERT (t == 1); } } #endif /* ifdef HAVE_NATIVE_MULREDC1_N */ #endif #ifndef TUNE_MULREDC_TABLE #define TUNE_MULREDC_TABLE {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} #endif #ifndef TUNE_SQRREDC_TABLE #define TUNE_SQRREDC_TABLE {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} #endif static int tune_mulredc_table[] = TUNE_MULREDC_TABLE; static int tune_sqrredc_table[] = TUNE_SQRREDC_TABLE; static void ecm_mulredc_basecase_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_srcptr np, mp_size_t nn, mp_ptr invm, mp_ptr tmp) { mp_limb_t cy; mp_size_t j; if (nn <= MULREDC_ASSEMBLY_MAX) { switch (tune_mulredc_table[nn]) { case MPMOD_MULREDC: /* use quadratic assembly mulredc */ #ifdef USE_ASM_REDC mulredc (rp, s1p, s2p, np, nn, invm[0]); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDC1: /* mpn_mul_n + __gmpn_redc_1 */ #ifdef HAVE___GMPN_REDC_1 mpn_mul_n (tmp, s1p, s2p, nn); REDC1(rp, tmp, np, nn, invm[0]); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDC2: /* mpn_mul_n + __gmpn_redc_2 */ #ifdef HAVE___GMPN_REDC_2 mpn_mul_n (tmp, s1p, s2p, nn); REDC2(rp, tmp, np, nn, invm); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDCN: /* mpn_mul_n + __gmpn_redc_n */ #ifdef HAVE___GMPN_REDC_N mpn_mul_n (tmp, s1p, s2p, nn); __gmpn_redc_n (rp, tmp, np, nn, invm); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDC_C: /* plain C quadratic reduction */ mpn_mul_n (tmp, s1p, s2p, nn); for (j = 0; j < nn; j++, tmp++) tmp[0] = mpn_addmul_1 (tmp, np, nn, tmp[0] * invm[0]); cy = mpn_add_n (rp, tmp - nn, tmp, nn); if (cy != 0) mpn_sub_n (rp, rp, np, nn); /* a borrow should always occur here */ break; default: { outputf (OUTPUT_ERROR, "Invalid mulredc mode: %d\n", tune_mulredc_table[nn]); exit (EXIT_FAILURE); } } } else /* nn > MULREDC_ASSEMBLY_MAX */ { mpn_mul_n (tmp, s1p, s2p, nn); ecm_redc_n (rp, tmp, 2 * nn, np, invm, nn); } } static void ecm_sqrredc_basecase_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr np, mp_size_t nn, mp_ptr invm, mp_ptr tmp) { mp_limb_t cy; mp_size_t j; if (nn <= MULREDC_ASSEMBLY_MAX) { switch (tune_sqrredc_table[nn]) { case MPMOD_MULREDC: /* use quadratic assembly mulredc */ #ifdef USE_ASM_REDC mulredc (rp, s1p, s1p, np, nn, invm[0]); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDC1: /* mpn_sqr + __gmpn_redc_1 */ #ifdef HAVE___GMPN_REDC_1 mpn_sqr (tmp, s1p, nn); REDC1(rp, tmp, np, nn, invm[0]); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDC2: /* mpn_sqr + __gmpn_redc_2 */ #ifdef HAVE___GMPN_REDC_2 mpn_sqr (tmp, s1p, nn); REDC2(rp, tmp, np, nn, invm); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDCN: /* mpn_sqr + __gmpn_redc_n */ #ifdef HAVE___GMPN_REDC_N mpn_sqr (tmp, s1p, nn); __gmpn_redc_n (rp, tmp, np, nn, invm); break; #endif /* otherwise go through to the next available mode */ case MPMOD_MUL_REDC_C: /* plain C quadratic reduction */ mpn_sqr (tmp, s1p, nn); for (j = 0; j < nn; j++, tmp++) tmp[0] = mpn_addmul_1 (tmp, np, nn, tmp[0] * invm[0]); cy = mpn_add_n (rp, tmp - nn, tmp, nn); if (cy != 0) mpn_sub_n (rp, rp, np, nn); /* a borrow should always occur here */ break; default: { outputf (OUTPUT_ERROR, "Invalid sqrredc mode: %d\n", tune_sqrredc_table[nn]); exit (EXIT_FAILURE); } } } else /* nn > MULREDC_ASSEMBLY_MAX */ { mpn_sqr (tmp, s1p, nn); ecm_redc_n (rp, tmp, 2 * nn, np, invm, nn); } } /* R <- S1 * S2 mod modulus i.e. R <- S1*S2/r^nn mod n, where n has nn limbs, and r=2^GMP_NUMB_BITS. Same as ecm_redc_basecase previous, but combined with mul Neither input argument must be in modulus->temp1 */ static void ecm_mulredc_basecase (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { mp_ptr s1p, s2p, rp = PTR(R); mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS; ASSERT(ALLOC(R) >= nn); ASSERT(ALLOC(S1) >= nn); ASSERT(ALLOC(S2) >= nn); s1p = PTR(S1); s2p = PTR(S2); /* FIXME: S1 and S2 are input and marked const, we mustn't write to them */ for (j = ABSIZ(S1); j < nn; j++) s1p[j] = 0; for (j = ABSIZ(S2); j < nn; j++) s2p[j] = 0; ecm_mulredc_basecase_n (rp, s1p, s2p, PTR(modulus->orig_modulus), nn, modulus->Nprim, PTR(modulus->temp1)); MPN_NORMALIZE (rp, nn); SIZ(R) = (SIZ(S1)*SIZ(S2)) < 0 ? (int) -nn : (int) nn; } /* R <- S1^2 mod modulus i.e. R <- S1^2/r^nn mod n, where n has nn limbs, and r=2^GMP_NUMB_BITS. Same as ecm_redc_basecase previous, but combined with sqr The input argument must not be in modulus->temp1 */ static void ecm_sqrredc_basecase (mpres_t R, const mpres_t S1, mpmod_t modulus) { mp_ptr rp; mp_ptr s1p; mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS; ASSERT(ALLOC(R) >= nn); ASSERT(ALLOC(S1) >= nn); rp = PTR(R); s1p = PTR(S1); /* FIXME: S1 is input and marked const, we mustn't write to it */ for (j = ABSIZ(S1); j < nn; j++) s1p[j] = 0; ecm_sqrredc_basecase_n (rp, s1p, PTR(modulus->orig_modulus), nn, modulus->Nprim, PTR(modulus->temp1)); MPN_NORMALIZE (rp, nn); SIZ(R) = (int) nn; } /* Multiplies S1 by the one-limb integer S2, and does modulo reduction. The modulo reduction may imply multiplication of the residue class by some constant, since we may not do the correct number of REDC reduction passes and so fail to divide by the correct power of 2 for Montgomery representation. The constant is the same for each call of this function with a given modulus, however. */ static void ecm_mulredc_1_basecase (mpres_t R, const mpres_t S1, const mp_limb_t S2, mpmod_t modulus) { mp_ptr s1p; mp_size_t j, nn = modulus->bits / GMP_NUMB_BITS; ASSERT(ALLOC(R) >= nn); ASSERT(ALLOC(S1) >= nn); s1p = PTR(S1); for (j = ABSIZ(S1); j < nn; j++) s1p[j] = 0; #ifdef HAVE_NATIVE_MULREDC1_N if (nn < 20) { mp_ptr rp = PTR(R); mulredc_1(rp, S2, s1p, PTR(modulus->orig_modulus), nn, modulus->Nprim[0]); MPN_NORMALIZE (rp, nn); SIZ(R) = (SIZ(S1)) < 0 ? (int) -nn : (int) nn; } else #endif { /* FIXME, we can do much better than this */ mpz_mul_ui (modulus->temp1, S1, S2); mpz_mod(R, modulus->temp1, modulus->orig_modulus); } } /* If the user asked for a particular representation, always use it. If repr = ECM_MOD_DEFAULT, use the thresholds. Don't use base2 if repr = ECM_MOD_NOBASE2. If a value is <= -16 or >= 16, it is a base2 exponent. Return a non-zero value if an error occurred. */ int mpmod_init (mpmod_t modulus, const mpz_t N, int repr) { int base2 = 0, r = 0; mp_size_t n = mpz_size (N); switch (repr) { case ECM_MOD_DEFAULT: if ((base2 = isbase2 (N, BASE2_THRESHOLD))) { repr = ECM_MOD_BASE2; break; } /* else go through */ case ECM_MOD_NOBASE2: if (mpz_size (N) < MPZMOD_THRESHOLD) repr = ECM_MOD_MODMULN; else if (mpz_size (N) < REDC_THRESHOLD) repr = ECM_MOD_MPZ; else repr = ECM_MOD_REDC; } /* now repr is {ECM_MOD_BASE2, ECM_MOD_MODMULN, ECM_MOD_MPZ, ECM_MOD_REDC}, or |repr| >= 16. */ switch (repr) { case ECM_MOD_MPZ: outputf (OUTPUT_VERBOSE, "Using mpz_mod\n"); mpmod_init_MPZ (modulus, N); break; case ECM_MOD_MODMULN: outputf (OUTPUT_VERBOSE, "Using MODMULN [mulredc:%d, sqrredc:%d]\n", (n <= MULREDC_ASSEMBLY_MAX) ? tune_mulredc_table[n] : 4, (n <= MULREDC_ASSEMBLY_MAX) ? tune_sqrredc_table[n] : 4); mpmod_init_MODMULN (modulus, N); break; case ECM_MOD_REDC: outputf (OUTPUT_VERBOSE, "Using REDC\n"); mpmod_init_REDC (modulus, N); break; default: /* base2 case: either repr=ECM_MOD_BASE2, and base2 was determined above, or |repr| >= 16, and we want base2 = repr */ if (repr != ECM_MOD_BASE2) base2 = repr; r = mpmod_init_BASE2 (modulus, base2, N); ASSERT (r == 0); /* error should not happen if isbase2 is correct */ break; } return r; } void mpres_clear (mpres_t a, ATTRIBUTE_UNUSED const mpmod_t modulus) { mpz_clear (a); PTR(a) = NULL; /* Make sure we segfault if we access it again */ } void mpmod_init_MPZ (mpmod_t modulus, const mpz_t N) { size_t n; mpz_init_set (modulus->orig_modulus, N); modulus->repr = ECM_MOD_MPZ; n = mpz_size (N); /* number of limbs of N */ modulus->bits = n * GMP_NUMB_BITS; /* Number of bits, rounded up to full limb */ MPZ_INIT2 (modulus->temp1, 2UL * modulus->bits + GMP_NUMB_BITS); MPZ_INIT2 (modulus->temp2, modulus->bits); MPZ_INIT2 (modulus->aux_modulus, modulus->bits); mpz_set_ui (modulus->aux_modulus, 1UL); /* we precompute B^(n + ceil(n/2)) mod N, where B=2^GMP_NUMB_BITS */ mpz_mul_2exp (modulus->aux_modulus, modulus->aux_modulus, (n + (n + 1) / 2) * GMP_NUMB_BITS); mpz_mod (modulus->aux_modulus, modulus->aux_modulus, N); return; } int mpmod_init_BASE2 (mpmod_t modulus, const int base2, const mpz_t N) { int Nbits; outputf (OUTPUT_VERBOSE, "Using special division for factor of 2^%d%c1\n", abs (base2), (base2 < 0) ? '-' : '+'); mpz_init_set (modulus->orig_modulus, N); modulus->repr = ECM_MOD_BASE2; modulus->bits = base2; Nbits = mpz_size (N) * GMP_NUMB_BITS; /* Number of bits, rounded up to full limb */ MPZ_INIT2 (modulus->temp1, 2UL * Nbits + GMP_NUMB_BITS); MPZ_INIT2 (modulus->temp2, Nbits); mpz_set_ui (modulus->temp1, 1UL); mpz_mul_2exp (modulus->temp1, modulus->temp1, abs (base2)); if (base2 < 0) mpz_sub_ui (modulus->temp1, modulus->temp1, 1UL); else mpz_add_ui (modulus->temp1, modulus->temp1, 1UL); if (!mpz_divisible_p (modulus->temp1, N)) { outputf (OUTPUT_ERROR, "mpmod_init_BASE2: n does not divide 2^%d%c1\n", abs (base2), base2 < 0 ? '-' : '+'); mpz_clear (modulus->temp2); mpz_clear (modulus->temp1); mpz_clear (modulus->orig_modulus); return ECM_ERROR; } modulus->Fermat = 0; if (base2 > 0) { unsigned long i; for (i = base2; (i & 1) == 0; i >>= 1); if (i == 1) { modulus->Fermat = base2; } } return 0; } /* initialize the following fields: orig_modulus - the original modulus bits - # of bits of N, rounded up to a multiple of GMP_NUMB_BITS temp1, temp2 - auxiliary variables Nprim - -1/N mod B^n where B=2^GMP_NUMB_BITS and n = #limbs(N) R2 - (2^bits)^2 (mod N) R3 - (2^bits)^3 (mod N) multiple - smallest multiple of N >= 2^bits */ void mpmod_init_MODMULN (mpmod_t modulus, const mpz_t N) { int Nbits; MEMORY_TAG; mpz_init_set (modulus->orig_modulus, N); MEMORY_UNTAG; modulus->repr = ECM_MOD_MODMULN; Nbits = mpz_size (N) * GMP_NUMB_BITS; /* Number of bits, rounded up to full limb */ modulus->bits = Nbits; MPZ_INIT2 (modulus->temp1, 2UL * Nbits + GMP_NUMB_BITS); MPZ_INIT2 (modulus->temp2, Nbits + 1); modulus->Nprim = (mp_limb_t*) malloc (mpz_size (N) * sizeof (mp_limb_t)); MPZ_INIT2 (modulus->R2, Nbits); mpz_set_ui (modulus->temp1, 1UL); mpz_mul_2exp (modulus->temp1, modulus->temp1, 2 * Nbits); mpz_mod (modulus->R2, modulus->temp1, modulus->orig_modulus); /* Now R2 = (2^bits)^2 (mod N) */ MPZ_INIT2 (modulus->R3, Nbits); mpz_mul_2exp (modulus->temp1, modulus->R2, Nbits); mpz_mod (modulus->R3, modulus->temp1, modulus->orig_modulus); /* Now R3 = (2^bits)^3 (mod N) */ MPZ_INIT2 (modulus->multiple, Nbits); mpz_set_ui (modulus->temp1, 1UL); mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits); /* compute ceil(2^bits / N) */ mpz_cdiv_q (modulus->temp1, modulus->temp1, modulus->orig_modulus); mpz_mul (modulus->multiple, modulus->temp1, modulus->orig_modulus); /* Now multiple is the smallest multiple of N >= 2^bits */ mpz_set_ui (modulus->temp1, 1UL); mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits); /* since we directly check even modulus in ecm/pm1/pp1, N is odd here, thus 1/N mod 2^Nbits always exist */ mpz_invert (modulus->temp2, N, modulus->temp1); /* temp2 = 1/N mod B^n */ mpz_sub (modulus->temp2, modulus->temp1, modulus->temp2); /* temp2 = -1/N mod B^n */ /* ensure Nprim has all its n limbs correctly set, for ecm_redc_n */ MPN_ZERO(modulus->Nprim, mpz_size (N)); mpn_copyi (modulus->Nprim, PTR(modulus->temp2), ABSIZ(modulus->temp2)); } void mpmod_init_REDC (mpmod_t modulus, const mpz_t N) { mp_size_t n; int Nbits; mpz_init_set (modulus->orig_modulus, N); n = mpz_size (N); modulus->repr = ECM_MOD_REDC; Nbits = n * GMP_NUMB_BITS; /* Number of bits, rounded up to full limb */ modulus->bits = Nbits; MPZ_INIT2 (modulus->temp1, 2 * Nbits + GMP_NUMB_BITS); MPZ_INIT2 (modulus->temp2, Nbits); MPZ_INIT2 (modulus->aux_modulus, Nbits); mpz_set_ui (modulus->temp1, 1UL); mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits); /* since we directly check even modulus in ecm/pm1/pp1, N is odd here, thus 1/N mod 2^Nbits always exist */ mpz_invert (modulus->aux_modulus, N, modulus->temp1); mpz_sub (modulus->aux_modulus, modulus->temp1, modulus->aux_modulus); /* ensure aux_modulus has n allocated limbs, for ecm_redc_n */ if (ABSIZ(modulus->aux_modulus) < n) { _mpz_realloc (modulus->aux_modulus, n); /* in case the reallocation fails, _mpz_realloc sets the value to 0 */ ASSERT_ALWAYS (mpz_cmp_ui (modulus->aux_modulus, 0) != 0); MPN_ZERO (PTR(modulus->aux_modulus) + ABSIZ(modulus->aux_modulus), n - ABSIZ(modulus->aux_modulus)); } MPZ_INIT2 (modulus->R2, Nbits); mpz_set_ui (modulus->temp1, 1UL); mpz_mul_2exp (modulus->temp1, modulus->temp1, 2 * Nbits); mpz_mod (modulus->R2, modulus->temp1, modulus->orig_modulus); /* Now R2 = (2^bits)^2 (mod N) */ MPZ_INIT2 (modulus->R3, Nbits); mpz_mul_2exp (modulus->temp1, modulus->R2, Nbits); mpz_mod (modulus->R3, modulus->temp1, modulus->orig_modulus); /* Now R3 = (2^bits)^3 (mod N) */ MPZ_INIT (modulus->multiple); mpz_set_ui (modulus->temp1, 1UL); mpz_mul_2exp (modulus->temp1, modulus->temp1, Nbits); /* compute ceil(2^bits / N) */ mpz_cdiv_q (modulus->temp1, modulus->temp1, modulus->orig_modulus); mpz_mul (modulus->multiple, modulus->temp1, modulus->orig_modulus); /* Now multiple is the largest multiple of N >= 2^bits */ } void mpmod_clear (mpmod_t modulus) { mpz_clear (modulus->orig_modulus); mpz_clear (modulus->temp1); mpz_clear (modulus->temp2); if (modulus->repr == ECM_MOD_REDC || modulus->repr == ECM_MOD_MPZ) mpz_clear (modulus->aux_modulus); if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { mpz_clear (modulus->R2); mpz_clear (modulus->R3); mpz_clear (modulus->multiple); } if (modulus->repr == ECM_MOD_MODMULN) free (modulus->Nprim); return; } /* initialize r and set all entries from those of modulus */ void mpmod_init_set (mpmod_t r, const mpmod_t modulus) { const unsigned long Nbits = abs(modulus->bits); const unsigned long n = mpz_size (modulus->orig_modulus); r->repr = modulus->repr; r->bits = modulus->bits; r->Fermat = modulus->Fermat; mpz_init_set (r->orig_modulus, modulus->orig_modulus); MPZ_INIT2 (r->temp1, 2 * Nbits + GMP_NUMB_BITS); MPZ_INIT2 (r->temp2, Nbits + GMP_NUMB_BITS); if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { MPZ_INIT2 (r->multiple, Nbits); MPZ_INIT2 (r->R2, Nbits); MPZ_INIT2 (r->R3, Nbits); mpz_set (r->multiple, modulus->multiple); mpz_set (r->R2, modulus->R2); mpz_set (r->R3, modulus->R3); } if (modulus->repr == ECM_MOD_REDC || modulus->repr == ECM_MOD_MPZ) { MPZ_INIT2 (r->aux_modulus, Nbits); mpz_set (r->aux_modulus, modulus->aux_modulus); } if (modulus->repr == ECM_MOD_MODMULN) { r->Nprim = (mp_limb_t*) malloc (n * sizeof (mp_limb_t)); mpn_copyi (r->Nprim, modulus->Nprim, n); } } void mpres_init (mpres_t R, const mpmod_t modulus) { /* use mpz_sizeinbase since modulus->bits may not be initialized yet */ mpz_init2 (R, mpz_sizeinbase (modulus->orig_modulus, 2) + GMP_NUMB_BITS); } /* realloc R so that it has at least the same number of limbs as modulus */ void mpres_realloc (mpres_t R, const mpmod_t modulus) { if (modulus->repr == ECM_MOD_MODMULN) MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS); } /* Returns non-zero if the two residues are equal, and zero if they are not */ int mpres_equal (const mpres_t S1, const mpres_t S2, mpmod_t modulus) { mpz_mod (modulus->temp1, S1, modulus->orig_modulus); mpz_mod (modulus->temp2, S2, modulus->orig_modulus); return (mpz_cmp (modulus->temp1, modulus->temp2) == 0); } /* R <- BASE^EXP mod modulus. Assume EXP >= 0. */ void mpres_pow (mpres_t R, const mpres_t BASE, const mpz_t EXP, mpmod_t modulus) { ASSERT_NORMALIZED (BASE); if (modulus->repr == ECM_MOD_MPZ) { mpz_powm (R, BASE, EXP, modulus->orig_modulus); } else if (modulus->repr == ECM_MOD_BASE2 || modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { size_t expidx; mp_limb_t bitmask, expbits; /* case EXP=0 */ if (mpz_sgn (EXP) == 0) { mpres_set_ui (R, 1UL, modulus); /* set result to 1 */ ASSERT_NORMALIZED (R); return; } ASSERT (mpz_size (EXP) > 0); /* probably redundant with _sgn() test */ expidx = mpz_size (EXP) - 1; /* point at most significant limb */ expbits = mpz_getlimbn (EXP, expidx); /* get most significant limb */ ASSERT (expbits != 0); /* Scan for the MSB in expbits */ bitmask = ((mp_limb_t) 1) << (GMP_NUMB_BITS - 1); for (; (bitmask & expbits) == 0; bitmask >>= 1); /* here the most significant limb with any set bits is in expbits, */ /* bitmask is set to mask in the msb of expbits */ mpz_set (modulus->temp2, BASE); bitmask >>= 1; while (1) { for ( ; bitmask != 0; bitmask >>= 1) { /* Set temp2 = temp2*temp2 */ if (modulus->repr == ECM_MOD_BASE2) { mpz_mul (modulus->temp1, modulus->temp2, modulus->temp2); base2mod (modulus->temp2 , modulus->temp1, modulus->temp1, modulus); } else if (modulus->repr == ECM_MOD_MODMULN) { ecm_mulredc_basecase (modulus->temp2, modulus->temp2, modulus->temp2, modulus); } else { mpz_mul (modulus->temp1, modulus->temp2, modulus->temp2); REDC (modulus->temp2, modulus->temp1, modulus->temp2, modulus); } /* If bit is 1, set temp2 = temp2 * BASE */ if (expbits & bitmask) { if (modulus->repr == ECM_MOD_BASE2) { mpz_mul (modulus->temp1, modulus->temp2, BASE); base2mod (modulus->temp2, modulus->temp1, modulus->temp1, modulus); } else if (modulus->repr == ECM_MOD_MODMULN) { ecm_mulredc_basecase (modulus->temp2, BASE, modulus->temp2, modulus); } else { mpz_mul (modulus->temp1, modulus->temp2, BASE); REDC (modulus->temp2, modulus->temp1, modulus->temp2, modulus); } } } if (expidx == 0) /* if we just processed the least */ break; /* significant limb, we are done */ expidx --; expbits = mpz_getlimbn (EXP, expidx); bitmask = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1); } mpz_set (R, modulus->temp2); /* mpz_getlimbn() ignores sign of argument, so we computed BASE^|EXP|. If EXP was negative, do a modular inverse */ if (mpz_sgn (EXP) < 0) { mpres_invert (R, R, modulus); } } /* if (modulus->repr == ECM_MOD_BASE2 || ... ) */ ASSERT_NORMALIZED (R); } /* Returns 1 if S == 0 (mod modulus), 0 otherwise */ int mpres_is_zero (const mpres_t S, mpmod_t modulus) { mpz_mod (modulus->temp1, S, modulus->orig_modulus); /* For all currently implemented representations, a zero residue has zero integer representation */ return (mpz_sgn (modulus->temp1) == 0) ? 1 : 0; } /* R <- BASE^EXP mod modulus */ void mpres_ui_pow (mpres_t R, const unsigned long BASE, const mpres_t EXP, mpmod_t modulus) { if (modulus->repr == ECM_MOD_MPZ) { mpz_set_ui (modulus->temp1, BASE); mpz_powm (R, modulus->temp1, EXP, modulus->orig_modulus); } else if (modulus->repr == ECM_MOD_BASE2 || modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { size_t expidx; mp_limb_t bitmask, expbits; expidx = mpz_size (EXP) -1; /* point at most significant limb */ expbits = mpz_getlimbn (EXP, expidx); /* get most significant limb */ bitmask = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1); /* case EXP=0 */ if (mpz_sgn (EXP) == 0) { mpres_set_ui (R, 1UL, modulus); /* set result to 1 */ ASSERT_NORMALIZED (R); return; } ASSERT (mpz_size (EXP) > 0); /* probably redundant with _sgn() test */ expidx = mpz_size (EXP) - 1; /* point at most significant limb */ expbits = mpz_getlimbn (EXP, expidx); /* get most significant limb */ ASSERT (expbits != 0); /* Scan for the MSB in expbits */ bitmask = ((mp_limb_t) 1) << (GMP_NUMB_BITS - 1); for (; (bitmask & expbits) == 0; bitmask >>= 1); /* here the most significant limb with any set bits is in expbits, */ /* bitmask is set to mask in the msb of expbits */ mpz_set_ui (modulus->temp2, BASE); /* temp2 = BASE */ if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { mpz_mul_2exp (modulus->temp1, modulus->temp2, modulus->bits); mpz_mod (modulus->temp2, modulus->temp1, modulus->orig_modulus); } bitmask >>= 1; while (1) { for ( ; bitmask != 0; bitmask >>= 1) { /* Set temp2 = temp2*temp2 */ if (modulus->repr == ECM_MOD_BASE2) { mpz_mul (modulus->temp1, modulus->temp2, modulus->temp2); base2mod (modulus->temp2 , modulus->temp1, modulus->temp1, modulus); } else if (modulus->repr == ECM_MOD_MODMULN) { ecm_mulredc_basecase (modulus->temp2, modulus->temp2, modulus->temp2, modulus); } else { mpz_mul (modulus->temp1, modulus->temp2, modulus->temp2); REDC (modulus->temp2, modulus->temp1, modulus->temp2, modulus); } /* If bit is 1, set temp2 = temp2 * BASE */ if (expbits & bitmask) { if (BASE == 2UL) { mpz_mul_2exp (modulus->temp2, modulus->temp2, 1); if (mpz_cmp (modulus->temp2, modulus->orig_modulus) >= 0) mpz_sub (modulus->temp2, modulus->temp2, modulus->orig_modulus); } else { mpz_mul_ui (modulus->temp1, modulus->temp2, BASE); mpz_mod (modulus->temp2, modulus->temp1, modulus->orig_modulus); } } } if (expidx == 0) /* if we just processed the least */ break; /* significant limb, we are done */ expidx--; expbits = mpz_getlimbn (EXP, expidx); bitmask = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1); } mpz_set (R, modulus->temp2); /* mpz_getlimbn() ignores sign of argument, so we computed BASE^|EXP|. If EXP was negative, do a modular inverse */ if (mpz_sgn (EXP) < 0) { mpres_invert (R, R, modulus); } } /* if (modulus->repr == ECM_MOD_BASE2 || ... ) */ ASSERT_NORMALIZED (R); } /* We use here the algorithm described in "Fast Modular Reduction" from Hasenplaugh, Gaubatz and Gobal, Arith'18, 2007: assuming N has n limbs, we have precomputed C = B^(n + ceil(n/2)) mod N. */ static void mpres_mpz_mod (mpres_t R, mpz_t T, mpz_t N, mpz_t C) { size_t n = mpz_size (N); size_t t = mpz_size (T); size_t m = n + (n + 1) / 2; /* n + ceil(n/2) */ if (t > m && n > 1) /* if n=1, then m=2, thus h=0 */ { size_t c = mpz_size (C); size_t h, l; mp_ptr rp; mp_ptr tp = PTR(T); /* Warning: we might have t > 2n. In that case we reduce {tp+l+m, t-(m+l)} where l = t-2n. */ l = (t > 2 * n) ? t - 2 * n : 0; tp += l; h = t - (m + l); /* since t-l <= 2n and m = n + ceil(n/2), we have h <= n - ceil(n/2) = floor(n/2). On the other hand, if l=0 we have h = t-m > 0; if l>0, then l=t-2n, thus h=2n-m = floor(n/2) > 0 since n > 1. */ mpz_realloc (R, c + h); rp = PTR(R); if (c > h) mpn_mul (rp, PTR(C), c, tp + m, h); else mpn_mul (rp, tp + m, h, PTR(C), c); /* now add {rp, c+h} to {tp, m}: we have c <= n and h <= n/2, thus c + h <= m */ if (c + h > m) abort(); tp[m] = mpn_add (tp, tp, m, rp, c + h); m += l + tp[m]; tp -= l; /* put back the low l limbs */ MPN_NORMALIZE(tp, m); SIZ(T) = (SIZ(T) > 0) ? m : -m; } mpz_mod (R, T, N); } void mpres_mul (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { ASSERT_NORMALIZED (S1); ASSERT_NORMALIZED (S2); #ifdef WANT_ASSERT_EXPENSIVE mpz_t test1, test2, test_result1, test_result2; ASSERT_ALWAYS (S1 != modulus->temp1 && S2 != modulus->temp1 && R != modulus->temp1); mpz_init (test1); mpz_init (test2); mpz_init (test_result1); mpz_init (test_result2); mpres_get_z (test1, S1, modulus); mpres_get_z (test2, S2, modulus); mpz_mul (test_result1, test1, test2); mpz_mod (test_result1, test_result1, modulus->orig_modulus); #endif if (UNLIKELY(modulus->repr == ECM_MOD_BASE2 && modulus->Fermat >= 32768)) { mp_size_t n = modulus->Fermat / GMP_NUMB_BITS; unsigned long k; mp_srcptr s1p, s2p; mp_size_t s1s, s2s; MPZ_REALLOC (R, n + 1); s1p = PTR(S1); s1s = SIZ(S1); s2p = PTR(S2); s2s = SIZ(S2); k = mpn_fft_best_k (n, S1 == S2); ASSERT(mpn_fft_next_size (n, k) == n); if (base2mod_2 (modulus->temp1, S1, n, modulus->orig_modulus)) { s1p = PTR(modulus->temp1); s1s = SIZ(modulus->temp1); } if (S1 == S2) { s2p = s1p; s2s = s1s; } else if (base2mod_2 (modulus->temp2, S2, n, modulus->orig_modulus)) { s2p = PTR(modulus->temp2); s2s = SIZ(modulus->temp2); } /* mpn_mul_fft() computes the product modulo B^n + 1, where B = 2^(machine word size in bits). So the result can be = B^n, in that case R is set to zero and 1 is returned as carry-out. In all other cases 0 is returned. Hence the complete result is R + cy * B^n, where cy is the value returned by mpn_mul_fft(). */ PTR(R)[n] = mpn_mul_fft (PTR(R), n, s1p, ABS(s1s), s2p, ABS(s2s), k); n ++; MPN_NORMALIZE(PTR(R), n); SIZ(R) = ((s1s ^ s2s) >= 0) ? (int) n : (int) -n; return; } switch (modulus->repr) { case ECM_MOD_BASE2: mpz_mul (modulus->temp1, S1, S2); base2mod (R, modulus->temp1, modulus->temp1, modulus); break; case ECM_MOD_MODMULN: MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS); ecm_mulredc_basecase (R, S1, S2, modulus); break; case ECM_MOD_REDC: mpz_mul (modulus->temp1, S1, S2); REDC (R, modulus->temp1, modulus->temp2, modulus); break; default: /* case ECM_MOD_MPZ */ mpz_mul (modulus->temp1, S1, S2); mpres_mpz_mod (R, modulus->temp1, modulus->orig_modulus, modulus->aux_modulus); break; } ASSERT_NORMALIZED (R); #ifdef WANT_ASSERT_EXPENSIVE mpres_get_z (test_result2, R, modulus); if (mpz_cmp (test_result1, test_result2) != 0) { printf ("mpres_mul and mpz_mul/mpz_mod produced different results.\n"); gmp_printf ("input 1: %Zd\n", test1); gmp_printf ("input 2: %Zd\n", test2); gmp_printf ("mpres_mul: %Zd\n", test_result2); gmp_printf ("mpz_mul/mpz_mod: %Zd\n", test_result1); abort (); } mpz_clear (test1); mpz_clear (test2); mpz_clear (test_result1); mpz_clear (test_result2); #endif } /* R <- S1^2 mod modulus */ void mpres_sqr (mpres_t R, const mpres_t S1, mpmod_t modulus) { ASSERT_NORMALIZED (S1); #ifdef WANT_ASSERT_EXPENSIVE mpz_t test1, test2, test_result1, test_result2; ASSERT_ALWAYS (S1 != modulus->temp1 && R != modulus->temp1); mpz_init (test1); mpz_init (test_result1); mpz_init (test_result2); mpres_get_z (test1, S1, modulus); mpz_mul (test_result1, test1, test1); mpz_mod (test_result1, test_result1, modulus->orig_modulus); #endif if (UNLIKELY(modulus->repr == ECM_MOD_BASE2 && modulus->Fermat >= 32768)) { mpres_mul (R, S1, S1, modulus); return; } switch (modulus->repr) { case ECM_MOD_BASE2: mpz_mul (modulus->temp1, S1, S1); base2mod (R, modulus->temp1, modulus->temp1, modulus); break; case ECM_MOD_MODMULN: MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS); ecm_sqrredc_basecase (R, S1, modulus); break; case ECM_MOD_REDC: mpz_mul (modulus->temp1, S1, S1); REDC (R, modulus->temp1, modulus->temp2, modulus); break; default: /* case ECM_MOD_MPZ */ mpz_mul (modulus->temp1, S1, S1); mpres_mpz_mod (R, modulus->temp1, modulus->orig_modulus, modulus->aux_modulus); break; } ASSERT_NORMALIZED (R); #ifdef WANT_ASSERT_EXPENSIVE mpres_get_z (test_result2, R, modulus); if (mpz_cmp (test_result1, test_result2) != 0) { printf ("mpres_sqr and mpz_mul/mpz_mod produced different results.\n"); gmp_printf ("input 1: %Zd\n", test1); gmp_printf ("mpres_mul: %Zd\n", test_result2); gmp_printf ("mpz_mul/mpz_mod: %Zd\n", test_result1); abort (); } mpz_clear (test1); mpz_clear (test_result1); mpz_clear (test_result2); #endif } /* R <- S * n mod modulus */ void mpres_mul_ui (mpres_t R, const mpres_t S, const unsigned long n, mpmod_t modulus) { ASSERT_NORMALIZED (S); mpz_mul_ui (modulus->temp1, S, n); /* This is the same for all methods: just reduce with original modulus */ mpz_mod (R, modulus->temp1, modulus->orig_modulus); ASSERT_NORMALIZED (R); } /* R <- S * 2^k mod modulus */ void mpres_mul_2exp (mpres_t R, const mpres_t S, const unsigned long k, mpmod_t modulus) { ASSERT_NORMALIZED (S); mpz_mul_2exp (modulus->temp1, S, k); /* This is the same for all methods: just reduce with original modulus */ mpz_mod (R, modulus->temp1, modulus->orig_modulus); ASSERT_NORMALIZED (R); } /* Multiplies S by n and possibly divides by some constant. Whether or not it divides depends on the modulus representation and the modulus size. */ void mpres_muldivbysomething_si (mpres_t R, const mpres_t S, const long n, mpmod_t modulus) { ASSERT_NORMALIZED (S); if (modulus->repr == ECM_MOD_MODMULN && modulus->bits / GMP_NUMB_BITS <= 20) /* FIXME: is the 20 here the same constant as in mulredc1_20? If so, it should be changed into a macro. */ { MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS); if (n < 0) { ecm_mulredc_1_basecase (R, S, (mp_limb_t) -n, modulus); mpres_neg (R, R, modulus); } else { ecm_mulredc_1_basecase (R, S, (mp_limb_t) n, modulus); } } else { mpz_mul_si (modulus->temp1, S, n); /* This is the same for all methods: just reduce with original modulus */ mpz_mod (R, modulus->temp1, modulus->orig_modulus); } ASSERT_NORMALIZED (R); } /* This function multiplies an integer in mpres_t form with an integer in mpz_t form, and stores the output in mpz_t form. The advantage is that one REDC suffices to reduce the product and convert it to non-Montgomery representation. */ void mpres_mul_z_to_z (mpz_t R, const mpres_t S1, const mpz_t S2, mpmod_t modulus) { ASSERT_NORMALIZED (S1); if (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat >= 32768) { mp_size_t n = modulus->Fermat / GMP_NUMB_BITS; unsigned long k; mp_srcptr s1p = PTR(S1), s2p = PTR(S2); mp_size_t s1s = SIZ(S1), s2s = SIZ(S2); MPZ_REALLOC (R, n + 1); k = mpn_fft_best_k (n, S1 == S2); ASSERT(mpn_fft_next_size (n, k) == n); if (base2mod_2 (modulus->temp1, S1, n, modulus->orig_modulus)) { s1p = PTR(modulus->temp1); s1s = SIZ(modulus->temp1); } if (S1 == S2) { s2p = s1p; s2s = s1s; } else if (base2mod_2 (modulus->temp2, S2, n, modulus->orig_modulus)) { s2p = PTR(modulus->temp2); s2s = SIZ(modulus->temp2); } /* mpn_mul_fft() computes the product modulo B^n + 1, where B = 2^(machine word size in bits). So the result can be = B^n, in that case R is set to zero and 1 is returned as carry-out. In all other cases 0 is returned. Hence the complete result is R + cy * B^n, where cy is the value returned by mpn_mul_fft(). */ PTR(R)[n] = mpn_mul_fft (PTR(R), n, s1p, ABS(s1s), s2p, ABS(s2s), k); n ++; MPN_NORMALIZE(PTR(R), n); SIZ(R) = ((s1s ^ s2s) >= 0) ? (int) n : (int) -n; mpz_mod (R, R, modulus->orig_modulus); return; } switch (modulus->repr) { case ECM_MOD_BASE2: if (mpz_sizeinbase (S2, 2) > (unsigned) abs (modulus->bits)) { base2mod (modulus->temp2, S2, modulus->temp1, modulus); mpz_mul (modulus->temp1, S1, modulus->temp2); } else mpz_mul (modulus->temp1, S1, S2); base2mod (R, modulus->temp1, modulus->temp1, modulus); mpz_mod (R, R, modulus->orig_modulus); break; case ECM_MOD_MODMULN: if (mpz_cmp (S2, modulus->orig_modulus) >= 0) { mpz_mod (modulus->temp2, S2, modulus->orig_modulus); MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS); ecm_mulredc_basecase (R, S1, modulus->temp2, modulus); mpz_mod (R, R, modulus->orig_modulus); } else { MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS); ecm_mulredc_basecase (R, S1, S2, modulus); mpz_mod (R, R, modulus->orig_modulus); } break; case ECM_MOD_REDC: if (mpz_cmp (S2, modulus->orig_modulus) >= 0) { mpz_mod (modulus->temp2, S2, modulus->orig_modulus); mpz_mul (modulus->temp1, S1, modulus->temp2); } else mpz_mul (modulus->temp1, S1, S2); REDC (R, modulus->temp1, modulus->temp2, modulus); mpz_mod (R, R, modulus->orig_modulus); break; default: if (mpz_cmp (S2, modulus->orig_modulus) >= 0) { mpz_mod (modulus->temp2, S2, modulus->orig_modulus); mpz_mul (modulus->temp1, S1, modulus->temp2); } else mpz_mul (modulus->temp1, S1, S2); mpz_mod (R, modulus->temp1, modulus->orig_modulus); break; } ASSERT_NORMALIZED (R); } /* Sets R = S * c, for some constant c that is coprime to modulus. This is primarily useful for multiplying numbers together for a gcd with modulus. The advantage is that we don't need to convert the mpz_t to Montgomery representation before applying REDC. */ void mpres_set_z_for_gcd (mpres_t R, const mpz_t S, mpmod_t modulus) { mpz_mod (R, S, modulus->orig_modulus); ASSERT_NORMALIZED (R); } /* R <- S / 2^n mod modulus. Does not need to be fast. */ void mpres_div_2exp (mpres_t R, const mpres_t S, const unsigned int n, mpmod_t modulus) { int i; ASSERT_NORMALIZED (S); if (n == 0) { mpres_set (R, S, modulus); ASSERT_NORMALIZED (R); return; } if (mpz_odd_p (S)) { ASSERT (mpz_odd_p (modulus->orig_modulus)); mpz_add (R, S, modulus->orig_modulus); mpz_tdiv_q_2exp (R, R, 1); } else mpz_tdiv_q_2exp (R, S, 1); for (i = n ; i > 1; i--) { if (mpz_odd_p (R)) { ASSERT (mpz_odd_p (modulus->orig_modulus)); mpz_add (R, R, modulus->orig_modulus); } mpz_tdiv_q_2exp (R, R, 1); } ASSERT_NORMALIZED (R); } void mpres_add_ui (mpres_t R, const mpres_t S, const unsigned long n, mpmod_t modulus) { ASSERT_NORMALIZED (S); if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) { mpz_add_ui (R, S, n); if (mpz_cmp (R, modulus->orig_modulus) > 0) mpz_sub (R, R, modulus->orig_modulus); /* This assumes modulus >= n */ } else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { mpz_set_ui (modulus->temp1, n); mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits); mpz_add (modulus->temp1, modulus->temp1, S); mpz_mod (R, modulus->temp1, modulus->orig_modulus); } ASSERT_NORMALIZED (R); } /* R <- S1 + S2 mod modulus */ void mpres_add (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { ASSERT_NORMALIZED (S1); ASSERT_NORMALIZED (S2); mpz_add (R, S1, S2); if ((modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) && ABSIZ(R) > ABSIZ(modulus->orig_modulus)) { if (SIZ(R) > 0) mpz_sub (R, R, modulus->multiple); else mpz_add (R, R, modulus->multiple); /* N <= since multiple < 2^Nbits + N, now |R| < B */ } ASSERT_NORMALIZED (R); } /* R <- S - n mod modulus If repr == ECM_MOD_MODMULN or ECM_MOD_REDC, we need to convert n to Montgomery representation before substracting */ void mpres_sub_ui (mpres_t R, const mpres_t S, const unsigned long n, mpmod_t modulus) { ASSERT_NORMALIZED (S); if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) { mpz_sub_ui (R, S, n); if (mpz_sgn (R) < 0) mpz_add (R, R, modulus->orig_modulus); /* Assumes modulus >= n */ } else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { mpz_set_ui (modulus->temp1, n); mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits); mpz_sub (modulus->temp1, S, modulus->temp1); mpz_mod (R, modulus->temp1, modulus->orig_modulus); } ASSERT_NORMALIZED (R); } /* R <- n - S mod modulus If repr == ECM_MOD_MODMULN or ECM_MOD_REDC, we need to convert n to Montgomery representation before substracting */ void mpres_ui_sub (mpres_t R, const unsigned long n ,const mpres_t S, mpmod_t modulus) { ASSERT_NORMALIZED (S); if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) { mpz_ui_sub (R, n, S); if (mpz_sgn (R) < 0) mpz_add (R, R, modulus->orig_modulus); /* Assumes modulus >= n */ } else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { mpz_set_ui (modulus->temp1, n); mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits); mpz_sub (modulus->temp1, modulus->temp1, S); mpz_mod (R, modulus->temp1, modulus->orig_modulus); } ASSERT_NORMALIZED (R); } /* R <- S1 - S2 mod modulus */ void mpres_sub (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { ASSERT_NORMALIZED (S1); ASSERT_NORMALIZED (S2); mpz_sub (R, S1, S2); if ((modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) && ABSIZ(R) > ABSIZ(modulus->orig_modulus)) { if (SIZ(R) > 0) mpz_sub (R, R, modulus->multiple); else mpz_add (R, R, modulus->multiple); /* N <= since multiple < 2^Nbits + N, now |R| < B */ } ASSERT_NORMALIZED (R); } void mpres_set_z (mpres_t R, const mpz_t S, mpmod_t modulus) { if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) mpz_mod (R, S, modulus->orig_modulus); else if (modulus->repr == ECM_MOD_MODMULN) { mpz_mod (modulus->temp2, S, modulus->orig_modulus); ecm_mulredc_basecase (R, modulus->temp2, modulus->R2, modulus); } else if (modulus->repr == ECM_MOD_REDC) { mpz_mod (modulus->temp2, S, modulus->orig_modulus); mpz_mul (modulus->temp1, modulus->temp2, modulus->R2); REDC (R, modulus->temp1, modulus->temp2, modulus); } ASSERT_NORMALIZED (R); } /* R and S must not be modulus->temp1 */ void mpres_get_z (mpz_t R, const mpres_t S, mpmod_t modulus) { ASSERT_NORMALIZED (S); if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) { mpz_mod (R, S, modulus->orig_modulus); } else if (modulus->repr == ECM_MOD_MODMULN) { mpz_set (modulus->temp1, S); MPZ_REALLOC (R, modulus->bits / GMP_NUMB_BITS); ecm_redc_basecase (R, modulus->temp1, modulus); mpz_mod (R, R, modulus->orig_modulus); /* FIXME: can we avoid this? */ } else if (modulus->repr == ECM_MOD_REDC) { REDC (R, S, modulus->temp1, modulus); mpz_mod (R, R, modulus->orig_modulus); /* FIXME: can we avoid this? */ } #ifdef DEBUG else { fprintf (ECM_STDERR, "mpres_get_z: Unexpected representation %d\n", modulus->repr); exit (EXIT_FAILURE); } #endif } /* R <- n mod modulus If repr==ECM_MOD_MPZ or ECM_MOD_BASE2, we convert n to Montgomery representation */ void mpres_set_ui (mpres_t R, const unsigned long n, mpmod_t modulus) { if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) { mpz_set_ui (R, n); mpz_mod (R, R, modulus->orig_modulus); } else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { mpz_set_ui (modulus->temp1, n); mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits); mpz_mod (R, modulus->temp1, modulus->orig_modulus); } ASSERT_NORMALIZED (R); } /* same as previous but with signed long */ void mpres_set_si (mpres_t R, const long n, mpmod_t modulus) { if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) { mpz_set_si (R, n); mpz_mod (R, R, modulus->orig_modulus); } else if (modulus->repr == ECM_MOD_MODMULN || modulus->repr == ECM_MOD_REDC) { mpz_set_si (modulus->temp1, n); mpz_mul_2exp (modulus->temp1, modulus->temp1, modulus->bits); mpz_mod (R, modulus->temp1, modulus->orig_modulus); } ASSERT_NORMALIZED (R); } /* R <- -S mod modulus. Does not need to be efficient. */ void mpres_neg (mpres_t R, const mpres_t S, ATTRIBUTE_UNUSED mpmod_t modulus) { ASSERT_NORMALIZED (S); mpz_neg (R, S); ASSERT_NORMALIZED (R); } /* Returns non-zero if inversion succeeded, and zero if not */ int mpres_invert (mpres_t R, const mpres_t S, mpmod_t modulus) { #ifdef WANT_ASSERT_EXPENSIVE mpres_t test; mpz_t test_result; mpres_init (test, modulus); mpres_set (test, S, modulus); #endif ASSERT_NORMALIZED (S); if (mpz_invert (modulus->temp2, S, modulus->orig_modulus) == 0) return 0; if (modulus->repr == ECM_MOD_MPZ || modulus->repr == ECM_MOD_BASE2) { mpz_set (R, modulus->temp2); ASSERT_NORMALIZED (R); } else if (modulus->repr == ECM_MOD_MODMULN) { ecm_mulredc_basecase (R, modulus->temp2, modulus->R3, modulus); ASSERT_NORMALIZED (R); } else if (modulus->repr == ECM_MOD_REDC) { MPZ_NORMALIZED (S); mpz_mul (modulus->temp1, modulus->temp2, modulus->R3); REDC (R, modulus->temp1, modulus->temp2, modulus); ASSERT_NORMALIZED (R); } #ifdef DEBUG else { fprintf (ECM_STDERR, "mpres_invert: Unexpected representation %d\n", modulus->repr); exit (EXIT_FAILURE); } #endif #ifdef WANT_ASSERT_EXPENSIVE mpres_mul (test, test, R, modulus); mpz_init (test_result); mpres_get_z (test_result, test, modulus); ASSERT_ALWAYS(mpz_cmp_ui (test_result, 1UL) == 0); mpz_clear (test_result); mpres_clear (test, modulus); #endif return 1; } void mpres_gcd (mpz_t R, const mpres_t S, const mpmod_t modulus) { /* In MODMULN and REDC form, M(x) = x*R with gcd(R, modulus) = 1 . Therefore gcd(M(x), modulus) = gcd(x, modulus) and we need not bother to convert out of Montgomery form. */ ASSERT_NORMALIZED (S); mpz_gcd (R, S, modulus->orig_modulus); } void mpres_out_str (FILE *fd, const unsigned int base, const mpres_t S, mpmod_t modulus) { mpres_get_z (modulus->temp2, S, modulus); mpz_out_str (fd, base, modulus->temp2); } int mpmod_selftest (const mpz_t n) { mpres_t test1, test2; mpmod_t modulus; printf ("Performing self test\n"); mpmod_init (modulus, n, 0); mpres_init (test1, modulus); mpres_init (test2, modulus); mpres_set_ui (test1, 2, modulus); mpres_set_ui (test2, 5, modulus); mpres_muldivbysomething_si (test1, test1, 5, modulus); mpres_muldivbysomething_si (test2, test2, 2, modulus); if (!mpres_equal (test1, test2, modulus)) { printf ("mpres_muldivbysomething_si() wrong\n"); fflush (stdout); abort(); } mpres_clear (test1, modulus); mpres_clear (test2, modulus); mpmod_clear (modulus); return 0; } /****************************************************/ /* mpresn: modular arithmetic based directly on mpn */ /****************************************************/ /* We use here a signed word-based redundant representation. In case N < B^n/16 (since for redc where we add to the absolute value of the residue), where n is the number of limbs of N in base B (2^32 or 2^64 usually), we can prove there is no adjustment (adding or subtracting N), cf http://www.loria.fr/~zimmerma/papers/norm.pdf. However current branch predictors are quite good, thus we prefer to keep the tests and to allow any input N (instead of only N < B^n/16). */ /* ensure R has allocated space for at least n limbs, and if less than n limbs are used, pad with zeros, and set SIZ(R) to n if positive or -n if negative */ void mpresn_pad (mpres_t R, mpmod_t N) { mp_size_t n = ABSIZ(N->orig_modulus); mp_size_t rn; _mpz_realloc (R, n); rn = mpz_size (R); ASSERT_ALWAYS (rn <= n); if (rn < n) { MPN_ZERO (PTR(R) + rn, n - rn); SIZ(R) = SIZ(R) >= 0 ? n : -n; } } void mpresn_unpad (mpres_t R) { mp_size_t n = ABSIZ(R); while (n > 0 && PTR(R)[n-1] == 0) n--; SIZ(R) = SIZ(R) >= 0 ? n : -n; } /* R <- S1 * S1 mod N, used only for ECM_MOD_MODMULN */ void mpresn_sqr (mpres_t R, const mpres_t S1, mpmod_t modulus) { mp_size_t n = ABSIZ(modulus->orig_modulus); ASSERT (SIZ(S1) == n || -SIZ(S1) == n); ecm_sqrredc_basecase_n (PTR(R), PTR(S1), PTR(modulus->orig_modulus), n, modulus->Nprim, PTR(modulus->temp1)); SIZ(R) = n; } /* R <- S1 * S2 mod N, used only for ECM_MOD_MODMULN */ void mpresn_mul (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { mp_size_t n = ABSIZ(modulus->orig_modulus); ASSERT (SIZ(S1) == n || -SIZ(S1) == n); ASSERT (SIZ(S2) == n || -SIZ(S2) == n); ecm_mulredc_basecase_n (PTR(R), PTR(S1), PTR(S2), PTR(modulus->orig_modulus), n, modulus->Nprim, PTR(modulus->temp1)); SIZ(R) = SIZ(S1) == SIZ(S2) ? n : -n; } /* R <- S*m/B mod modulus where m fits in a mp_limb_t. Here S (w in dup_add_batch1) is the result of a subtraction, thus with the notations from http://www.loria.fr/~zimmerma/papers/norm.pdf we have S < 2 \alpha N. Then R < (2 \alpha N \beta + \beta N) = (2 \alpha + 1) N. This result R is used in an addition with u being the result of a squaring thus u < \alpha N, which gives a result < (3 \alpha + 1) N. Finally this result is used in a multiplication with another operand less than 2 \alpha N, thus we want: ((2 \alpha) (3 \alpha + 1) N^2 + \beta N)/\beta \leq \alpha N, i.e., 2 \alpha (3 \alpha + 1) \varepsilon + 1 \leq \alpha This implies \varepsilon \leq 7/2 - sqrt(3)/2 ~ 0.0359, in which case we can take \alpha = 2/3*sqrt(3)+1 ~ 2.1547. In that case no adjustment is needed in mpresn_mul_1. However we prefer to keep the adjustment here, to allow a larger set of inputs (\varepsilon \leq 1/16 = 0.0625 instead of 0.0359). */ void mpresn_mul_1 (mpres_t R, const mpres_t S, const mp_limb_t m, mpmod_t modulus) { mp_ptr t1 = PTR(modulus->temp1); mp_ptr t2 = PTR(modulus->temp2); mp_size_t n = ABSIZ(modulus->orig_modulus); mp_limb_t q; ASSERT (SIZ(S) == n || -SIZ(S) == n); ASSERT (ALLOC(modulus->temp1) >= n+1); #if defined(USE_ASM_REDC) && defined(HAVE_NATIVE_MULREDC1_N) if (n <= MULREDC_ASSEMBLY_MAX) mulredc_1 (PTR(R), m, PTR(S), PTR(modulus->orig_modulus), n, modulus->Nprim[0]); else #endif { t1[n] = mpn_mul_1 (t1, PTR(S), n, m); q = t1[0] * modulus->Nprim[0]; t2[n] = mpn_mul_1 (t2, PTR(modulus->orig_modulus), n, q); #ifdef HAVE___GMPN_ADD_NC q = __gmpn_add_nc (PTR(R), t1 + 1, t2 + 1, n, t1[0] != 0); #else q = mpn_add_n (PTR(R), t1 + 1, t2 + 1, n); q += mpn_add_1 (PTR(R), PTR(R), n, t1[0] != 0); #endif while (q != 0) q -= mpn_sub_n (PTR(R), PTR(R), PTR(modulus->orig_modulus), n); } SIZ(R) = SIZ(S); /* sign is unchanged */ } /* R <- S1 + S2 mod modulus */ /* we assume all numbers are allocated to n limbs, and unused most significant limbs are set to zero */ void mpresn_add (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { mp_ptr r = PTR(R); mp_ptr s1 = PTR(S1); mp_ptr s2 = PTR(S2); mp_size_t n = ABSIZ(modulus->orig_modulus); ATTRIBUTE_UNUSED mp_limb_t cy; ASSERT (SIZ(S1) == n || -SIZ(S1) == n); ASSERT (SIZ(S2) == n || -SIZ(S2) == n); if (SIZ(S1) == SIZ(S2)) /* S1 and S2 are of same sign */ { cy = mpn_add_n (r, s1, s2, n); /* for N < B^n/16, the while loop will be never performed, which proves it will be performed a small number of times. In practice we observed up to 7 loops, but it happens rarely. */ #ifndef MPRESN_NO_ADJUSTMENT while (cy != 0) cy -= mpn_sub_n (r, r, PTR(modulus->orig_modulus), n); #endif SIZ(R) = SIZ(S1); } else /* different signs */ { if (mpn_cmp (s1, s2, n) >= 0) { mpn_sub_n (r, s1, s2, n); /* no borrow here */ SIZ(R) = SIZ(S1); } else { mpn_sub_n (r, s2, s1, n); /* idem */ SIZ(R) = SIZ(S2); } } } void mpresn_sub (mpres_t R, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { mp_ptr r = PTR(R); mp_ptr s1 = PTR(S1); mp_ptr s2 = PTR(S2); mp_size_t n = ABSIZ(modulus->orig_modulus); ATTRIBUTE_UNUSED mp_limb_t cy; ASSERT (SIZ(S1) == n || -SIZ(S1) == n); ASSERT (SIZ(S2) == n || -SIZ(S2) == n); if (SIZ(S1) != SIZ(S2)) /* S1 and S2 are of different signs */ { cy = mpn_add_n (r, s1, s2, n); #ifndef MPRESN_NO_ADJUSTMENT while (cy != 0) cy -= mpn_sub_n (r, r, PTR(modulus->orig_modulus), n); #endif SIZ(R) = SIZ(S1); } else /* same signs, it's a real subtraction */ { if (mpn_cmp (s1, s2, n) >= 0) { mpn_sub_n (r, s1, s2, n); /* no borrow here */ SIZ(R) = SIZ(S1); } else { mpn_sub_n (r, s2, s1, n); /* idem */ SIZ(R) = -SIZ(S2); } } } /* (R, T) <- (S1 + S2, S1 - S2) Assume R differs from both S1 and S2. */ void mpresn_addsub (mpres_t R, mpres_t T, const mpres_t S1, const mpres_t S2, mpmod_t modulus) { mp_ptr r = PTR(R); mp_ptr t = PTR(T); mp_ptr s1 = PTR(S1); mp_ptr s2 = PTR(S2); mp_size_t n = ABSIZ(modulus->orig_modulus); ATTRIBUTE_UNUSED mp_limb_t cy; ASSERT (R != S1); ASSERT (R != S2); ASSERT (SIZ(S1) == n || -SIZ(S1) == n); ASSERT (SIZ(S2) == n || -SIZ(S2) == n); if (SIZ(S1) == SIZ(S2)) /* S1 and S2 are of same sign */ { cy = mpn_add_n (r, s1, s2, n); #ifndef MPRESN_NO_ADJUSTMENT while (cy != 0) cy -= mpn_sub_n (r, r, PTR(modulus->orig_modulus), n); #endif SIZ(R) = SIZ(S1); if (mpn_cmp (s1, s2, n) >= 0) { mpn_sub_n (t, s1, s2, n); /* no borrow since {s1,n} >= {s2,n} */ SIZ(T) = SIZ(S1); } else { mpn_sub_n (t, s2, s1, n); /* idem since {s2,n} >= {s1,n} */ SIZ(T) = -SIZ(S2); } } else /* different signs */ { if (mpn_cmp (s1, s2, n) >= 0) { mpn_sub_n (r, s1, s2, n); /* no borrow since {s1,n} >= {s2,n} */ SIZ(R) = SIZ(S1); } else { mpn_sub_n (r, s2, s1, n); /* idem since {s2,n} >= {s1,n} */ SIZ(R) = SIZ(S2); } cy = mpn_add_n (t, s1, s2, n); #ifndef MPRESN_NO_ADJUSTMENT while (cy != 0) cy -= mpn_sub_n (t, t, PTR(modulus->orig_modulus), n); #endif SIZ(T) = SIZ(S1); } } ecm-6.4.4/mpmod.h0000644023561000001540000000234612106741273010510 00000000000000/* Header for modular multiplication. Copyright 2012 Paul Zimmermann. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #define MPMOD_MULREDC 0 /* assembly combined mulredc */ #define MPMOD_MUL_REDC1 1 /* mpn_mul_n or mpn_sqr followed by mpn_redc_1 */ #define MPMOD_MUL_REDC2 2 /* mpn_mul_n or mpn_sqr followed by mpn_redc_2 */ #define MPMOD_MUL_REDCN 3 /* mpn_mul_n or mpn_sqr followed by mpn_redc_n */ #define MPMOD_MUL_REDC_C 4 /* mpn_mul_n or mpn_sqr followed by plain C redc */ ecm-6.4.4/ecm-params.h0000644023561000001540000000316612106741273011422 00000000000000/* produced on pasta.loria.fr (Intel(R) Core(TM)2 CPU 6700 @ 2.66GHz) */ #ifndef HAVE_MPIR /* tuning parameters for GMP, tuned for GMP 5.0.4 */ /* 0:mulredc 1:mul+redc_1 2:mul+redc_2 3:mul+redc_n */ #define TUNE_MULREDC_TABLE {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0} /* 0:mulredc 1:sqr+redc_1 2:sqr+redc_2 3:sqr+redc_n */ #define TUNE_SQRREDC_TABLE {0,0,0,0,0,0,0,2,0,2,0,2,1,1,1,1,2,2,1,2,2} #define MPZMOD_THRESHOLD 21 #define REDC_THRESHOLD 512 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 8, 8, 9, 10, 12, 11, 12, 13, 12, 12, 14, 16, 16, 16, 18, 18, 18} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 17 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 256 #define PREREVERTDIVISION_NTT_THRESHOLD 8 #define POLYINVERT_NTT_THRESHOLD 128 #define POLYEVALT_NTT_THRESHOLD 256 #define MPZSPV_NORMALISE_STRIDE 128 #else /* tuning parameters for MPIR, tuned for MPIR 2.5.1 */ /* 0:mulredc 1:mul+redc_1 2:mul+redc_2 3:mul+redc_n */ #define TUNE_MULREDC_TABLE {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0} /* 0:mulredc 1:sqr+redc_1 2:sqr+redc_2 3:sqr+redc_n */ #define TUNE_SQRREDC_TABLE {0,0,0,0,0,0,0,0,1,1,2,2,1,1,1,1,1,1,2,1,2} #define MPZMOD_THRESHOLD 21 #define REDC_THRESHOLD 512 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 1, 1, 1, 1, 0, 6, 6, 7, 8, 9, 9, 11, 10, 10, 11, 12, 13, 14, 14, 11, 13, 18, 18, 14, 20, 16, 18, 18, 20} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 17 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 256 #define PREREVERTDIVISION_NTT_THRESHOLD 16 #define POLYINVERT_NTT_THRESHOLD 128 #define POLYEVALT_NTT_THRESHOLD 256 #define MPZSPV_NORMALISE_STRIDE 32 #endif ecm-6.4.4/ecm-params.h.athlon640000644023561000001540000000315012106741274013052 00000000000000/* tuned on frite.loria.fr (AMD Phenom(tm) II X2 B55 Processor) */ #ifndef HAVE_MPIR /* tuning parameters for GMP, tuned with GMP 5.0.4 */ /* 0:mulredc 1:mul+redc_1 2:mul+redc_2 3:mul+redc_n */ #define TUNE_MULREDC_TABLE {0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,1,2,1,1,1,2} /* 0:mulredc 1:sqr+redc_1 2:sqr+redc_2 3:sqr+redc_n */ #define TUNE_SQRREDC_TABLE {0,0,0,0,0,0,0,0,2,2,1,2,2,1,2,1,2,1,1,1,2} #define MPZMOD_THRESHOLD 21 #define REDC_THRESHOLD 512 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 9, 10, 11, 10, 12, 12, 12, 14, 14, 16, 16, 16, 18, 19, 19, 20, 21, 18, 19} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 12 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 256 #define PREREVERTDIVISION_NTT_THRESHOLD 16 #define POLYINVERT_NTT_THRESHOLD 512 #define POLYEVALT_NTT_THRESHOLD 128 #define MPZSPV_NORMALISE_STRIDE 128 #else /* tuning parameters for MPIR, tuned with MPIR 2.5.1 */ /* 0:mulredc 1:mul+redc_1 2:mul+redc_2 3:mul+redc_n */ #define TUNE_MULREDC_TABLE {0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1} /* 0:mulredc 1:sqr+redc_1 2:sqr+redc_2 3:sqr+redc_n */ #define TUNE_SQRREDC_TABLE {0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1} #define MPZMOD_THRESHOLD 21 #define REDC_THRESHOLD 512 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 18, 20, 24} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 12 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 512 #define PREREVERTDIVISION_NTT_THRESHOLD 32 #define POLYINVERT_NTT_THRESHOLD 512 #define POLYEVALT_NTT_THRESHOLD 256 #define MPZSPV_NORMALISE_STRIDE 128 #endif ecm-6.4.4/COPYING0000644023561000001540000010451312106741273010255 00000000000000 GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ecm-6.4.4/compile0000755023561000001540000001533712106744312010602 00000000000000#! /bin/sh # Wrapper for compilers which do not understand '-c -o'. scriptversion=2012-01-04.17; # UTC # Copyright (C) 1999, 2000, 2003, 2004, 2005, 2009, 2010, 2012 Free # Software Foundation, Inc. # Written by Tom Tromey . # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # This file is maintained in Automake, please report # bugs to or send patches to # . nl=' ' # We need space, tab and new line, in precisely that order. Quoting is # there to prevent tools from complaining about whitespace usage. IFS=" "" $nl" file_conv= # func_file_conv build_file lazy # Convert a $build file to $host form and store it in $file # Currently only supports Windows hosts. If the determined conversion # type is listed in (the comma separated) LAZY, no conversion will # take place. func_file_conv () { file=$1 case $file in / | /[!/]*) # absolute file, and not a UNC file if test -z "$file_conv"; then # lazily determine how to convert abs files case `uname -s` in MINGW*) file_conv=mingw ;; CYGWIN*) file_conv=cygwin ;; *) file_conv=wine ;; esac fi case $file_conv/,$2, in *,$file_conv,*) ;; mingw/*) file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` ;; cygwin/*) file=`cygpath -m "$file" || echo "$file"` ;; wine/*) file=`winepath -w "$file" || echo "$file"` ;; esac ;; esac } # func_cl_wrapper cl arg... # Adjust compile command to suit cl func_cl_wrapper () { # Assume a capable shell lib_path= shared=: linker_opts= for arg do if test -n "$eat"; then eat= else case $1 in -o) # configure might choose to run compile as 'compile cc -o foo foo.c'. eat=1 case $2 in *.o | *.[oO][bB][jJ]) func_file_conv "$2" set x "$@" -Fo"$file" shift ;; *) func_file_conv "$2" set x "$@" -Fe"$file" shift ;; esac ;; -I*) func_file_conv "${1#-I}" mingw set x "$@" -I"$file" shift ;; -l*) lib=${1#-l} found=no save_IFS=$IFS IFS=';' for dir in $lib_path $LIB do IFS=$save_IFS if $shared && test -f "$dir/$lib.dll.lib"; then found=yes set x "$@" "$dir/$lib.dll.lib" break fi if test -f "$dir/$lib.lib"; then found=yes set x "$@" "$dir/$lib.lib" break fi done IFS=$save_IFS test "$found" != yes && set x "$@" "$lib.lib" shift ;; -L*) func_file_conv "${1#-L}" if test -z "$lib_path"; then lib_path=$file else lib_path="$lib_path;$file" fi linker_opts="$linker_opts -LIBPATH:$file" ;; -static) shared=false ;; -Wl,*) arg=${1#-Wl,} save_ifs="$IFS"; IFS=',' for flag in $arg; do IFS="$save_ifs" linker_opts="$linker_opts $flag" done IFS="$save_ifs" ;; -Xlinker) eat=1 linker_opts="$linker_opts $2" ;; -*) set x "$@" "$1" shift ;; *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) func_file_conv "$1" set x "$@" -Tp"$file" shift ;; *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) func_file_conv "$1" mingw set x "$@" "$file" shift ;; *) set x "$@" "$1" shift ;; esac fi shift done if test -n "$linker_opts"; then linker_opts="-link$linker_opts" fi exec "$@" $linker_opts exit 1 } eat= case $1 in '') echo "$0: No command. Try '$0 --help' for more information." 1>&2 exit 1; ;; -h | --h*) cat <<\EOF Usage: compile [--help] [--version] PROGRAM [ARGS] Wrapper for compilers which do not understand '-c -o'. Remove '-o dest.o' from ARGS, run PROGRAM with the remaining arguments, and rename the output as expected. If you are trying to build a whole package this is not the right script to run: please start by reading the file 'INSTALL'. Report bugs to . EOF exit $? ;; -v | --v*) echo "compile $scriptversion" exit $? ;; cl | *[/\\]cl | cl.exe | *[/\\]cl.exe ) func_cl_wrapper "$@" # Doesn't return... ;; esac ofile= cfile= for arg do if test -n "$eat"; then eat= else case $1 in -o) # configure might choose to run compile as 'compile cc -o foo foo.c'. # So we strip '-o arg' only if arg is an object. eat=1 case $2 in *.o | *.obj) ofile=$2 ;; *) set x "$@" -o "$2" shift ;; esac ;; *.c) cfile=$1 set x "$@" "$1" shift ;; *) set x "$@" "$1" shift ;; esac fi shift done if test -z "$ofile" || test -z "$cfile"; then # If no '-o' option was seen then we might have been invoked from a # pattern rule where we don't need one. That is ok -- this is a # normal compilation that the losing compiler can handle. If no # '.c' file was seen then we are probably linking. That is also # ok. exec "$@" fi # Name of file we expect compiler to create. cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` # Create the lock directory. # Note: use '[/\\:.-]' here to ensure that we don't use the same name # that we are using for the .o file. Also, base the name on the expected # object file name, since that is what matters with a parallel build. lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d while true; do if mkdir "$lockdir" >/dev/null 2>&1; then break fi sleep 1 done # FIXME: race condition here if user kills between mkdir and trap. trap "rmdir '$lockdir'; exit 1" 1 2 15 # Run the compile. "$@" ret=$? if test -f "$cofile"; then test "$cofile" = "$ofile" || mv "$cofile" "$ofile" elif test -f "${cofile}bj"; then test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" fi rmdir "$lockdir" exit $ret # Local Variables: # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: ecm-6.4.4/main.c0000644023561000001540000015045512111676237010323 00000000000000/* GMP-ECM -- Integer factorization with ECM, P-1 and P+1 methods. Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Jim Fougeron, Laurent Fousse, Alexander Kruppa, Paul Zimmermann, Cyril Bouvier. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #ifdef _MSC_VER # include #endif #include "ecm-impl.h" #include "ecm-ecm.h" #ifdef HAVE_UNISTD_H /* for access() */ # include #else # define F_OK 0 # ifdef HAVE_IO_H # include # endif #endif #ifdef HAVE_SIGNAL_H # include #endif #ifdef HAVE_GWNUM /* For GWNUM_VERSION */ #include "gwnum.h" #endif /* Used in print_config() */ #include "ecm-params.h" /* #define DEBUG */ #include "champions.h" /* probab_prime_p() can get called from other modules. Instead of passing prpcmd to those functions, we make it static here - this variable will be set only in main, and read only in probab_prime_p() */ #ifdef WANT_SHELLCMD static char *prpcmd = NULL; #endif static int exit_asap_value = 0; static int exit_asap_signalnr = 0; /* Remembers which signal we received */ void signal_handler (int sig) { if (sig == SIGINT || sig == SIGTERM) { exit_asap_value = 1; exit_asap_signalnr = sig; /* If one of these two signals arrives again, we'll let the default handler take over, which will usually terminate the process immediately. */ signal (SIGINT, SIG_DFL); signal (SIGTERM, SIG_DFL); } else { /* How did this happen? Let's ignore it for now, abort instead? */ } } int stop_asap_test () { return exit_asap_value; } static void usage (void) { printf ("Usage: ecm [options] B1 [[B2min-]B2] < file\n"); printf ("\nParameters:\n"); printf (" B1 stage 1 bound\n"); printf (" B2 stage 2 bound (or interval B2min-B2max)\n"); printf ("\nOptions:\n"); printf (" -x0 x use x as initial point\n"); printf (" -sigma s use s as curve generator [ecm]\n"); printf (" -A a use a as curve parameter [ecm]\n"); printf (" -k n perform >= n steps in stage 2\n"); printf (" -power n use x^n for Brent-Suyama's extension\n"); printf (" -dickson n use n-th Dickson's polynomial for Brent-Suyama's extension\n"); printf (" -c n perform n runs for each input\n"); printf (" -pm1 perform P-1 instead of ECM\n"); printf (" -pp1 perform P+1 instead of ECM\n"); printf (" -q quiet mode\n"); printf (" -v verbose mode\n"); printf (" -timestamp print a time stamp with each number\n"); printf (" -mpzmod use GMP's mpz_mod for modular reduction\n"); printf (" -modmuln use Montgomery's MODMULN for modular reduction\n"); printf (" -redc use Montgomery's REDC for modular reduction\n"); printf (" -nobase2 disable special base-2 code\n"); printf (" -nobase2s2 disable special base-2 code in ecm stage 2 only\n"); printf (" -base2 n force base 2 mode with 2^n+1 (n>0) or 2^|n|-1 (n<0)\n"); printf (" -ntt enable NTT convolution routines in stage 2\n"); printf (" -no-ntt disable NTT convolution routines in stage 2\n"); printf (" -save file save residues at end of stage 1 to file\n"); printf (" -savea file like -save, appends to existing files\n"); printf (" -resume file resume residues from file, reads from stdin if file is \"-\"\n"); printf (" -chkpnt file save periodic checkpoints during stage 1 to file\n"); printf (" -primetest perform a primality test on input\n"); printf (" -treefile f [ECM only] store stage 2 data in files f.0, ... \n"); printf (" -maxmem n use at most n MB of memory in stage 2\n"); printf (" -stage1time n add n seconds to ECM stage 1 time (for expected time est.)\n"); #ifdef WANT_SHELLCMD printf (" -faccmd cmd execute cmd when factor is found. Input number, factor\n" " and cofactor are given to cmd via stdin, each on a line\n"); printf (" -prpcmd cmd use shell command cmd to do prp tests (number via stdin)\n"); printf (" -idlecmd cmd before each curve run cmd and terminate if exit code >0\n"); #endif /*printf (" -extra functions added by JimF\n"); */ printf (" -i n increment B1 by this constant on each run\n"); printf (" -I f auto-calculated increment for B1 multiplied by 'f' scale factor\n"); printf (" -inp file Use file as input (instead of redirecting stdin)\n"); printf (" -b Use breadth-first mode of file processing\n"); printf (" -d Use depth-first mode of file processing (default)\n"); printf (" -one Stop processing a candidate if a factor is found (looping mode)\n"); printf (" -n run ecm in \"nice\" mode (below normal priority)\n"); printf (" -nn run ecm in \"very nice\" mode (idle priority)\n"); printf (" -ve n Verbosely show short (< n character) expressions on each loop\n"); printf (" -cofdec Force cofactor output in decimal (even if expressions are used)\n"); printf (" -B2scale f Multiplies the default B2 value by f \n"); printf (" -go val Preload with group order val, which can be a simple expression,\n"); printf (" or can use N as a placeholder for the number being factored.\n"); printf (" -printconfig Print compile-time configuration and exit.\n"); printf (" -batch[=1|2] (experimental) use Montgomery parametrization and batch\n" " computation. Option -batch is equivalent to -batch=1\n"); printf (" -bsaves file In the batch mode, save s in file.\n"); printf (" -bloads file In the batch mode, load s from file.\n"); printf (" -h, --help Prints this help and exit.\n"); } /* Print parameters that were used to build GMP-ECM */ static void print_config () { printf ("Compilation options:\n"); #ifdef __MPIR_VERSION printf ("Included MPIR header files version %d.%d.%d\n", __MPIR_VERSION, __MPIR_VERSION_MINOR, __MPIR_VERSION_PATCHLEVEL); #else /* __MPIR_VERSION */ #ifdef __GNU_MP_VERSION_PATCHLEVEL printf ("Included GMP header files version %d.%d.%d\n", __GNU_MP_VERSION, __GNU_MP_VERSION_MINOR, __GNU_MP_VERSION_PATCHLEVEL); #else printf ("Included GMP header files version %d.%d\n", __GNU_MP_VERSION, __GNU_MP_VERSION_MINOR); #endif #endif /* __MPIR_VERSION */ #ifdef GWNUM_VERSION printf ("Included GWNUM header files version %s\n", GWNUM_VERSION); #else printf ("GWNUM_VERSION undefined\n"); #endif #ifdef HAVE_SSE2 printf ("HAVE_SSE2 = %d\n", HAVE_SSE2); #else printf ("HAVE_SSE2 undefined\n"); #endif #ifdef HAVE___GMPN_ADD_NC printf ("HAVE___GMPN_ADD_NC = %d\n", HAVE___GMPN_ADD_NC); #else printf ("HAVE___GMPN_ADD_NC undefined\n"); #endif #ifdef HAVE___GMPN_MOD_34LSUB1 printf ("HAVE___GMPN_MOD_34LSUB1 = %d\n", HAVE___GMPN_MOD_34LSUB1); #else printf ("HAVE___GMPN_MOD_34LSUB1 undefined\n"); #endif #ifdef HAVE___GMPN_REDC_1 printf ("HAVE___GMPN_REDC_1 = %d\n", HAVE___GMPN_REDC_1); #else printf ("HAVE___GMPN_REDC_1 undefined\n"); #endif #ifdef MEMORY_DEBUG printf ("MEMORY_DEBUG = %d\n", MEMORY_DEBUG); #else printf ("MEMORY_DEBUG undefined\n"); #endif #ifdef USE_ASM_REDC printf ("USE_ASM_REDC = %d\n", USE_ASM_REDC); #ifdef WINDOWS64_ABI printf ("WINDOWS64_ABI = %d\n", WINDOWS64_ABI); #else printf ("WINDOWS64_ABI undefined\n"); #endif #else printf ("USE_ASM_REDC undefined\n"); #endif #ifdef WANT_ASSERT printf ("WANT_ASSERT = %d\n", WANT_ASSERT); #else printf ("WANT_ASSERT undefined\n"); #endif #ifdef WANT_SHELLCMD printf ("WANT_SHELLCMD = %d\n", WANT_SHELLCMD); #else printf ("WANT_SHELLCMD undefined\n"); #endif #ifdef _OPENMP printf ("_OPENMP = %d\n", _OPENMP); #else printf ("_OPENMP undefined\n"); #endif #ifdef MPZMOD_THRESHOLD printf ("MPZMOD_THRESHOLD = %d\n", MPZMOD_THRESHOLD); #else printf ("MPZMOD_THRESHOLD undefined\n"); #endif #ifdef REDC_THRESHOLD printf ("REDC_THRESHOLD = %d\n", REDC_THRESHOLD); #else printf ("REDC_THRESHOLD undefined\n"); #endif #ifdef MUL_NTT_THRESHOLD printf ("MUL_NTT_THRESHOLD = %d\n", MUL_NTT_THRESHOLD); #else printf ("MUL_NTT_THRESHOLD undefined\n"); #endif #ifdef NTT_GFP_TWIDDLE_DIF_BREAKOVER printf ("NTT_GFP_TWIDDLE_DIF_BREAKOVER = %d\n", NTT_GFP_TWIDDLE_DIF_BREAKOVER); #else printf ("NTT_GFP_TWIDDLE_DIF_BREAKOVER undefined\n"); #endif #ifdef NTT_GFP_TWIDDLE_DIT_BREAKOVER printf ("NTT_GFP_TWIDDLE_DIT_BREAKOVER = %d\n", NTT_GFP_TWIDDLE_DIT_BREAKOVER); #else printf ("NTT_GFP_TWIDDLE_DIT_BREAKOVER undefined\n"); #endif #ifdef PREREVERTDIVISION_NTT_THRESHOLD printf ("PREREVERTDIVISION_NTT_THRESHOLD = %d\n", PREREVERTDIVISION_NTT_THRESHOLD); #else printf ("PREREVERTDIVISION_NTT_THRESHOLD undefined\n"); #endif #ifdef POLYINVERT_NTT_THRESHOLD printf ("POLYINVERT_NTT_THRESHOLD = %d\n", POLYINVERT_NTT_THRESHOLD); #else printf ("POLYINVERT_NTT_THRESHOLD undefined\n"); #endif #ifdef POLYEVALT_NTT_THRESHOLD printf ("POLYEVALT_NTT_THRESHOLD = %d\n", POLYEVALT_NTT_THRESHOLD); #else printf ("POLYEVALT_NTT_THRESHOLD undefined\n"); #endif #ifdef MPZSPV_NORMALISE_STRIDE printf ("MPZSPV_NORMALISE_STRIDE = %d\n", MPZSPV_NORMALISE_STRIDE); #else printf ("MPZSPV_NORMALISE_STRIDE undefined\n"); #endif } /****************************************************************************** * * * Main program * * * ******************************************************************************/ int main (int argc, char *argv[]) { char **argv0 = argv; mpz_t seed, x, sigma, A, f, orig_x0, B2, B2min, startingB2min; mpcandi_t n; mpgocandi_t go; mpq_t rat_x0; double B1, B1done; int result = 0, returncode = 0; int verbose = OUTPUT_NORMAL; /* verbose level */ int timestamp = 0; int method = ECM_ECM, method1; int use_ntt = 1; /* Default, use NTT if input is small enough */ int specific_x0 = 0, /* 1=starting point supplied by user, 0=random or */ /* compute from sigma */ specific_sigma = 0; /* 1=sigma from command line, 0=make random */ int factor_is_prime; /* If a factor was found, indicate whether factor, cofactor are */ /* prime. If no factor was found, both are zero. */ int repr = ECM_MOD_DEFAULT; /* automatic choice */ int nobase2step2 = 0; /* flag to turn off base 2 arithmetic in ecm stage 2 */ unsigned long k = ECM_DEFAULT_K; /* default number of blocks in stage 2 */ int S = ECM_DEFAULT_S; /* Degree for Brent-Suyama extension requested by user. Positive value: use S-th power, negative: use degree |S| Dickson poly, default (0): automatic choice. */ gmp_randstate_t randstate; char *savefilename = NULL, *resumefilename = NULL, *infilename = NULL; char *TreeFilename = NULL, *chkfilename = NULL; char rtime[256] = "", who[256] = "", comment[256] = "", program[256] = ""; FILE *resumefile = NULL, *infile = NULL; mpz_t resume_lastN, resume_lastfac; /* When resuming residues from a file, store the last number processed and the factors found for this it */ int resume_wasPrp = 0; /* 1 if resume_lastN/resume_lastfac is a PRP */ int primetest = 0, saveappend = 0; double autoincrementB1 = 0.0, startingB1; unsigned int autoincrementB1_calc = 0; unsigned int breadthfirst_maxcnt=0, breadthfirst_cnt=0; int breadthfirst = 0; unsigned int count = 1; /* number of curves for each number */ unsigned int cnt = 0; /* number of remaining curves for current number */ unsigned int linenum = 0, factsfound = 0; mpcandi_t *pCandidates = NULL; unsigned int nCandidates=0, nMaxCandidates=0; int deep=1, trial_factor_found; unsigned int displayexpr = 0; unsigned int decimal_cofactor = 0; double B2scale = 1.0; double maxmem = 0.; double stage1time = 0.; ecm_params params; int batch = 0; /* By default we don't use batch mode */ char *savefile_s = NULL; char *loadfile_s = NULL; #ifdef WANT_SHELLCMD char *faccmd = NULL; char *idlecmd = NULL; #endif #ifdef HAVE_GWNUM double gw_k = 0.0; /* set default values for gwnum poly k*b^n+c */ unsigned long gw_b = 0; /* set default values for gwnum poly k*b^n+c */ unsigned long gw_n = 0; /* set default values for gwnum poly k*b^n+c */ signed long gw_c = 0; /* set default values for gwnum poly k*b^n+c */ #endif /* check ecm is linked with a compatible library */ if (mp_bits_per_limb != GMP_NUMB_BITS) { fprintf (stderr, "Error, mp_bits_per_limb and GMP_NUMB_BITS differ\n"); fprintf (stderr, "Please check your LD_LIBRARY_PATH variable\n"); exit (1); } #ifdef MEMORY_DEBUG tests_memory_start (); #endif ecm_init (params); /* initialize the group order candidate */ mpgocandi_t_init (&go); /* Init variables we might need to store options */ MPZ_INIT (seed); MPZ_INIT (sigma); MPZ_INIT (A); MPZ_INIT (B2); MPZ_INIT (B2min); MPZ_INIT (startingB2min); mpq_init (rat_x0); /* first look for options */ while ((argc > 1) && (argv[1][0] == '-')) { if (strcmp (argv[1], "-pm1") == 0) { method = ECM_PM1; argv++; argc--; } else if (strcmp (argv[1], "-pp1") == 0) { method = ECM_PP1; argv++; argc--; } else if (strcmp (argv[1], "-q") == 0) { verbose = OUTPUT_ALWAYS; argv++; argc--; } else if (strcmp (argv[1], "-v") == 0) { verbose ++; argv++; argc--; } else if (strcmp (argv[1], "-timestamp") == 0) { timestamp = 1; argv++; argc--; } else if (strcmp (argv[1], "-mpzmod") == 0) { repr = ECM_MOD_MPZ; argv++; argc--; } else if (strcmp (argv[1], "-modmuln") == 0) { repr = ECM_MOD_MODMULN; argv++; argc--; } else if (strcmp (argv[1], "-redc") == 0) { repr = ECM_MOD_REDC; argv++; argc--; } else if (strcmp (argv[1], "-nobase2") == 0) { repr = ECM_MOD_NOBASE2; argv++; argc--; } else if (strcmp (argv[1], "-nobase2s2") == 0) { nobase2step2 = 1; argv++; argc--; } else if (strcmp (argv[1], "-ntt") == 0) { use_ntt = 2; /* Use NTT, even for large input numbers */ argv++; argc--; } else if (strcmp (argv[1], "-no-ntt") == 0) { use_ntt = 0; /* Never use NTT */ argv++; argc--; } else if (strcmp (argv[1], "-primetest") == 0) { primetest = 1; argv++; argc--; } else if (strcmp (argv[1], "-one") == 0) { deep = 0; argv++; argc--; } else if (strcmp (argv[1], "-b") == 0) { breadthfirst = 1; argv++; argc--; } else if (strcmp (argv[1], "-batch") == 0 || strcmp (argv[1], "-batch=1") == 0) { batch = 1; argv++; argc--; } else if (strcmp (argv[1], "-batch=2") == 0) { batch = 2; argv++; argc--; } else if ((argc > 2) && (strcmp (argv[1], "-bsaves") == 0)) { savefile_s = argv[2]; argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-bloads") == 0)) { loadfile_s = argv[2]; argv += 2; argc -= 2; } else if (strcmp (argv[1], "-h") == 0 || strcmp (argv[1], "--help") == 0) { usage (); exit (EXIT_SUCCESS); } else if (strcmp (argv[1], "-printconfig") == 0) { print_config (); exit (EXIT_SUCCESS); } else if (strcmp (argv[1], "-d") == 0) { /* -1 is a flag used during argv processing where a subsquent -i file will NOT change it. Then when done processing args, we change a -1 to a 0 */ breadthfirst = -1; argv++; argc--; } else if (strcmp (argv[1], "-cofdec") == 0) { decimal_cofactor = 1; argv++; argc--; } else if (strcmp (argv[1], "-n") == 0) { NICE10; argv++; argc--; } else if (strcmp (argv[1], "-nn") == 0) { NICE20; argv++; argc--; } else if ((argc > 2) && (strcmp (argv[1], "-x0")) == 0) { if (mpq_set_str (rat_x0, argv[2], 0)) { fprintf (stderr, "Error, invalid starting point: %s\n", argv[2]); exit (EXIT_FAILURE); } specific_x0 = 1; argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-sigma")) == 0) { if (mpz_set_str (sigma, argv[2], 0) || mpz_cmp_ui (sigma, 6) < 0) { fprintf (stderr, "Error, invalid sigma value: %s\n", argv[2]); exit (EXIT_FAILURE); } specific_sigma = 1; argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-A")) == 0) { if (mpz_set_str (A, argv[2], 0)) { fprintf (stderr, "Error, invalid A value: %s\n", argv[2]); exit (EXIT_FAILURE); } argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-power")) == 0) { S = abs (atoi (argv[2])); /* should this be validated? and a error/abort issued if 0 ??? */ argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-dickson") == 0)) { S = - abs ( atoi (argv[2])); /* should this be validated? and a error/abort issued if 0 ??? */ argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-k") == 0)) { k = atol (argv[2]); /* should this be validated? and a error/abort issued if 0 ??? */ argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-c") == 0)) { count = atoi (argv[2]); /* should this be validated? and a error/abort issued if 0 ??? */ argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-save") == 0)) { savefilename = argv[2]; saveappend = 0; argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-savea") == 0)) { savefilename = argv[2]; saveappend = 1; argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-resume") == 0)) { resumefilename = argv[2]; argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-chkpnt") == 0)) { chkfilename = argv[2]; argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-treefile") == 0)) { TreeFilename = argv[2]; argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-base2") == 0)) { int b = atoi (argv[2]); if (abs (b) >= 16) /* |Values| < 16 are reserved for other methods */ repr = b; /* keep method unchanged in that case */ argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-i") == 0)) { autoincrementB1 = strtod (argv[2], NULL); if (autoincrementB1 < 1.0) { fprintf (stderr, "Error, the -i n option requires n >= 1\n"); exit (EXIT_FAILURE); } argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-I") == 0)) { autoincrementB1 = strtod (argv[2], NULL); autoincrementB1_calc = 1; if (autoincrementB1 <= 0.0) { fprintf (stderr, "Error, the -I f option requires f > 0\n"); exit (EXIT_FAILURE); } argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-inp") == 0)) { infilename = argv[2]; infile = fopen (infilename, "r"); /* a -d depth-first switch has already been processed, so DO NOT reset to breadth-first */ if (breadthfirst != -1) breadthfirst = 1; if (!infile) { fprintf (stderr, "Can't find input file %s\n", infilename); exit (EXIT_FAILURE); } argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-ve") == 0)) { displayexpr = atoi (argv[2]); if (displayexpr == 0) { fprintf (stderr, "Error, the -ve option requires a number argument\n"); exit (EXIT_FAILURE); } argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-B2scale") == 0)) { B2scale = atof (argv[2]); if (verbose >= 2) printf ("Scaling B2 values by a factor of %.4f\n", B2scale); argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-maxmem") == 0)) { maxmem = atof (argv[2]) * 1048576.; argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-stage1time") == 0)) { stage1time = atof (argv[2]); argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-go") == 0)) { if (go.cpOrigExpr) { fprintf (stderr, "Warning, for multiple -go options, only the last one is taken into account.\n"); free (go.cpOrigExpr); } go.cpOrigExpr = malloc (strlen (argv[2]) + 1); if (go.cpOrigExpr == NULL) { fprintf (stderr, "Cannot allocate memory in main\n"); exit (1); } strcpy (go.cpOrigExpr, argv[2]); if (strchr (go.cpOrigExpr, 'N')) { go.containsN = 1; go.Valid = 1; /* we actually do not know if it is valid here, but we "assume" until the first time it gets run through */ } else { go.containsN = 0; /* have "fully" parsed expr or number. Do not recompute for each N */ if (eval_str (&(go.Candi), go.cpOrigExpr, 0, NULL)) go.Valid = 1; } argv += 2; argc -= 2; } #ifdef WANT_SHELLCMD else if ((argc > 2) && (strcmp (argv[1], "-prpcmd") == 0)) { prpcmd = argv[2]; argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-faccmd") == 0)) { faccmd = argv[2]; argv += 2; argc -= 2; } else if ((argc > 2) && (strcmp (argv[1], "-idlecmd") == 0)) { idlecmd = argv[2]; argv += 2; argc -= 2; } #endif else { fprintf (stderr, "Unknown option: %s\n", argv[1]); exit (EXIT_FAILURE); } } /* check that S is even for old P-1 stage 2 */ if ((method == ECM_PM1) && (S != ECM_DEFAULT_S) && (S % 2 != 0)) { fprintf (stderr, "Error, S should be even for P-1\n"); exit (EXIT_FAILURE); } /* Ok, now we can "reset" the breadthfirst switch so that we do depthfirst as requested */ if (breadthfirst == -1) breadthfirst = 0; if (argc < 2) { fprintf (stderr, "Invalid arguments. See %s --help.\n", argv0[0]); exit (EXIT_FAILURE); } /* start of the program */ if (verbose >= 1) { char Gmp_version[64]; char out0[128], *out = out0; #ifdef __MPIR_VERSION sprintf (Gmp_version, "MPIR %d.%d.%d", __MPIR_VERSION, __MPIR_VERSION_MINOR, __MPIR_VERSION_PATCHLEVEL); #else /* original GMP */ sprintf (Gmp_version, "GMP %s", gmp_version); #endif /* __MPIR_VERSION */ out += sprintf (out, "GMP-ECM %s [configured with %s", VERSION, Gmp_version); #ifdef HAVE_GWNUM out += sprintf (out, ", GWNUM %s", GWNUM_VERSION); #endif #ifdef USE_ASM_REDC out += sprintf (out, ", --enable-asm-redc"); #endif #ifdef WANT_ASSERT out += sprintf (out, ", --enable-assert"); #endif printf ("%s] [", out0); switch (method) { case ECM_PM1: printf ("P-1"); break; case ECM_PP1: printf ("P+1"); break; default: printf ("ECM"); } printf ("]\n"); #ifdef HAVE_GETHOSTNAME if (verbose >= 2) { #define MNAMESIZE 64 char mname[MNAMESIZE]; if (gethostname (mname, MNAMESIZE) == 0) { mname[MNAMESIZE - 1] = 0; /* gethostname() may omit trailing 0 */ printf ("Running on %s\n", mname); } } #endif #ifdef HAVE_GWNUM #ifdef gwnum_is_gpl if (! gwnum_is_gpl()) #endif printf ("Due to incompatible licenses, this binary file must not " "be distributed.\n"); #endif } /* set first stage bound B1 */ B1 = strtod (argv[1], &argv[1]); if (*argv[1] == '-') { B1done = B1; B1 = strtod (argv[1] + 1, NULL); } else B1done = ECM_DEFAULT_B1_DONE; mpz_set_si (B2min, -1); /* default, means that B2min will be set to B1 by ecm(), pm1() and pp1() */ if (B1 < 0.0 || B1done < 0.0) { fprintf (stderr, "Bound values must be positive\n"); exit (EXIT_FAILURE); } /* check B1 is not too large */ if (B1 > MAX_B1) { fprintf (stderr, "Too large stage 1 bound, limit is %1.0f\n", MAX_B1); exit (EXIT_FAILURE); } init_expr (); mpz_set_si (B2, ECM_DEFAULT_B2); /* compute it automatically from B1 */ /* parse B2 or B2min-B2max */ if (argc >= 3) { int c; double d; char *endptr; /* This is like strtok, but SunOS does not seem to have it declared in any header files, in spite of saying it does in the man pages... */ for (endptr = argv[2]; *endptr != '\0' && *endptr != '-'; endptr++); if (*endptr == '-') *(endptr++) = '\0'; else endptr = NULL; c = -1; { int r; r = gmp_sscanf (argv[2], "%Zd%n", B2, &c); /* Try parsing as integer */ if (r <= 0) { /* restore original value */ if (endptr != NULL) *(--endptr) = '-'; fprintf (stderr, "Invalid B2 value: %s\n", argv[2]); exit (EXIT_FAILURE); } } #ifdef __MINGW32__ /* MinGW scanf() returns a value 1 too high for %n */ /* Reported to MinGW as bug number 1163607 */ if (c > 0 && argv[2][c - 1] == 0) c--; #endif if (c < 0 || argv[2][c] != '\0') { c = -1; gmp_sscanf (argv[2], "%lf%n", &d, &c); /* Try parsing scientific */ #ifdef __MINGW32__ if (c > 0 && argv[2][c - 1] == 0) c--; #endif mpz_set_d (B2, d); } if (c < 0 || argv[2][c] != '\0' || argv[2][0] == '\0') /* If not the whole token could be parsed either way, or if there was no token to begin with (i.e string starting with '-') signal error */ c = -1; else if (endptr != NULL) /* Did we have a '-' in there? */ { mpz_set (B2min, B2); c = -1; gmp_sscanf (endptr, "%Zd%n", B2, &c); #ifdef __MINGW32__ if (c > 0 && endptr[c - 1] == 0) c--; #endif if (c < 0 || endptr[c] != '\0') { gmp_sscanf (endptr, "%lf%n", &d, &c); #ifdef __MINGW32__ if (c > 0 && endptr[c - 1] == 0) c--; #endif mpz_set_d (B2, d); } if (c < 0 || endptr[c] != '\0') c = -1; } if (c == -1) { fprintf (stderr, "Error: expected positive integer(s) B2 or " "B2min-B2\n"); exit (EXIT_FAILURE); } } /* set static parameters (i.e. those that don't change during the program) */ params->verbose = verbose; params->method = method; mpz_set (params->B2, B2); params->k = k; params->S = S; params->repr = repr; params->nobase2step2 = nobase2step2; params->chkfilename = chkfilename; params->TreeFilename = TreeFilename; params->maxmem = maxmem; params->stage1time = stage1time; /* -treefile is valid for ECM only */ if (TreeFilename != NULL && method != ECM_ECM) { fprintf (stderr, "Error: the -treefile option is for ECM only\n"); exit (EXIT_FAILURE); } /* Open resume file for reading, if resuming is requested */ if (resumefilename != NULL) { if (strcmp (resumefilename, "-") == 0) resumefile = stdin; else resumefile = fopen (resumefilename, "r"); if (resumefile == NULL) { fprintf (stderr, "Could not open file %s for reading\n", resumefilename); exit (EXIT_FAILURE); } mpz_init (resume_lastN); mpz_init (resume_lastfac); mpz_set_ui (resume_lastfac, 1); } /* Open save file for writing, if saving is requested */ if (savefilename != NULL) { FILE *savefile; /* Are we not appending and does this file already exist ? */ if (!saveappend && access (savefilename, F_OK) == 0) { printf ("Save file %s already exists, will not overwrite\n", savefilename); exit (EXIT_FAILURE); } /* Test if we can open the file for writing */ savefile = fopen (savefilename, "a"); if (savefile == NULL) { fprintf (stderr, "Could not open file %s for writing\n", savefilename); exit (EXIT_FAILURE); } fclose (savefile); } if (resumefile && (specific_sigma || mpz_sgn (A) || specific_x0)) { printf ("Warning: -sigma, -A and -x0 parameters are ignored when resuming from\nsave files.\n"); mpz_set_ui (sigma, 0); mpz_set_ui (A, 0); specific_x0 = 0; } mpcandi_t_init (&n); /* number(s) to factor */ MPZ_INIT (f); /* factor found */ MPZ_INIT (x); /* stage 1 residue */ MPZ_INIT (orig_x0); /* starting point, for save file */ /* We may need random numbers for sigma/starting point */ gmp_randinit_default (randstate); mpz_set_ui (seed, get_random_ul ()); if (mpz_sizeinbase (seed, 2) <= 32) { mpz_mul_2exp (seed, seed, 32); mpz_add_ui (seed, seed, get_random_ul ()); } if (verbose >= 3) gmp_printf ("Random seed: %Zd\n", seed); gmp_randseed (randstate, seed); /* Install signal handlers */ #ifdef HAVE_SIGNAL /* We catch signals only if there is a savefile. Otherwise there's nothing we could save by exiting cleanly, but the waiting for the code to check for signals may delay program end unacceptably */ if (savefilename != NULL) { signal (SIGINT, &signal_handler); signal (SIGTERM, &signal_handler); params->stop_asap = &stop_asap_test; } #endif /* loop for number in standard input or file */ startingB1 = B1; mpz_set (startingB2min, B2min); if (!infilename) infile = stdin; if (breadthfirst == 1) { breadthfirst_maxcnt = count; count = 1; breadthfirst_cnt = 0; } BreadthFirstDoAgain:; if (breadthfirst == 1) { if (breadthfirst_maxcnt > breadthfirst_cnt) { linenum = 0; if (breadthfirst_cnt++) { double NewB1; NewB1 = calc_B1_AutoIncrement (B1, autoincrementB1, autoincrementB1_calc); if (mpz_cmp_d (B2min, B1) <= 0) /* floating-point equality is unreliable, a comparison might be better */ mpz_set_d (B2min, NewB1); B1 = NewB1; } else { /* This is ONLY entered upon the first time through. We load the entire file here so that we can loop deep, or remove a candidate if factor found, or if in deep mode and cofactor is prp (or if original candidate is prp and we are prp testing) */ nMaxCandidates = 100; pCandidates = (mpcandi_t*) malloc (nMaxCandidates * sizeof(mpcandi_t)); if (pCandidates == NULL) { fprintf (stderr, "Error: not enough memory\n"); exit (EXIT_FAILURE); } while (!feof (infile)) { if (read_number (&n, infile, primetest)) { mpcandi_t_init (&pCandidates[nCandidates]); mpcandi_t_copy (&pCandidates[nCandidates++], &n); if (nCandidates == nMaxCandidates) { mpcandi_t *tmp = pCandidates; pCandidates = (mpcandi_t*) malloc ((nMaxCandidates + 100) * sizeof(mpcandi_t)); if (pCandidates == NULL) { fprintf (stderr, "Error: not enough memory\n"); exit (EXIT_FAILURE); } /* perform a "shallow" copy, in which we do NOT need to free any of the individual elements, but just the array memory */ if (pCandidates) memcpy (pCandidates, tmp, nMaxCandidates*sizeof(mpcandi_t)); nMaxCandidates += 100; /* Free the original "array" memory */ free (tmp); } } } /* Now infile is at EOF, but we are in breadthfirst mode, so the main while loop will work with linenum 0 || feof (infile) == 0) && !exit_asap_value) { trial_factor_found = 0; params->B1done = B1done; /* may change with resume */ if (resumefile != NULL) /* resume case */ { if (count != 1) { fprintf (stderr, "Error, option -c and -resume are incompatible\n"); exit (EXIT_FAILURE); } if (!read_resumefile_line (&method, x, &n, sigma, A, orig_x0, &(params->B1done), program, who, rtime, comment, resumefile)) break; if (mpz_cmp (n.n, resume_lastN) == 0) { /* Aha, we're trying the same number again. */ /* We skip this attempt if: 1. the remaining cofactor after the last attempt was a probable prime, or 2. if a factor was found and the user gave the -one option */ if (resume_wasPrp || (deep == 0 && mpz_cmp_ui (resume_lastfac, 1) != 0)) continue; /* If we found a factor in an earlier attempt, divide it out */ if (mpz_cmp_ui (resume_lastfac, 1) > 0) mpcandi_t_addfoundfactor (&n, resume_lastfac, 1); } else { /* It's a different number. Set resume_lastN and resume_lastfac */ mpz_set (resume_lastN, n.n); mpz_set_ui (resume_lastfac, 1); resume_wasPrp = n.isPrp; } cnt = count; /* i.e. 1 */ if (verbose >= 1) { printf ("Resuming "); if (method == ECM_ECM) printf ("ECM"); else if (method == ECM_PM1) printf ("P-1"); else if (method == ECM_PP1) printf ("P+1"); printf (" residue "); if (program[0] || who[0] || rtime[0]) printf ("saved "); if (who[0]) printf ("by %s ", who); if (program[0]) printf ("with %s ", program); if (rtime[0]) printf ("on %s ", rtime); if (comment[0]) printf ("(%s)", comment); printf ("\n"); } } else /* no-resume case */ { if (cnt) /* nothing to read: reuse old number */ { if (verbose >= OUTPUT_NORMAL) printf ("Run %u out of %u:\n", count - cnt + 1, count); } else /* new number */ { if (!breadthfirst && !read_number (&n, infile, primetest)) break; else if (breadthfirst) mpcandi_t_copy (&n,&pCandidates[linenum]); linenum++; cnt = count; /* reset B1 (and B2min) values, as they could have been advanced on the prior candidate */ if (!breadthfirst) { B1 = startingB1; mpz_set (B2min, startingB2min); } } /* in breadthfirst deep mode, a value of 1 is left after FULLY factoring the number, so we then skip it */ /* Also "blank" lines, or lines that could not be parsed correctly will leave a 1 in this value */ if (n.isPrp) { /* n is 0 or 1 (or -1 I guess) so do NOT proceed with it */ cnt = 0; continue; } /* Set effective seed for factoring attempt on this number */ if (specific_x0) /* convert rational value to integer */ { mpz_t inv; if (count != 1) { fprintf (stderr, "Error, option -c is incompatible with -x0\n"); exit (EXIT_FAILURE); } MPZ_INIT (inv); mpz_invert (inv, mpq_denref (rat_x0), n.n); mpz_mul (inv, mpq_numref (rat_x0), inv); mpz_mod (x, inv, n.n); mpz_clear (inv); } else /* Make a random starting point for P-1 and P+1. ECM will */ /* compute a suitable value from sigma or A if x is zero */ { if (method == ECM_ECM) mpz_set_ui (x, 0); if (method == ECM_PP1) pp1_random_seed (x, n.n, randstate); if (method == ECM_PM1) pm1_random_seed (x, n.n, randstate); } if (ECM_IS_DEFAULT_B1_DONE(B1done)) mpz_set (orig_x0, x); /* Make a random sigma if we have neither specific sigma nor A given. Warning: sigma may still contain previous random value and thus be nonzero here even if no specific sigma was given */ if (method == ECM_ECM && !specific_sigma && !mpz_sgn (A)) { /* Make random sigma, 0 < sigma <= 2^32 */ mpz_urandomb (sigma, randstate, 32); mpz_add_ui (sigma, sigma, 6); /* we need sigma >= 6 */ } } if (verbose >= 1) { if ((!breadthfirst && cnt == count) || (breadthfirst && 1 == breadthfirst_cnt)) { /* first time this candidate has been run (if looping more than once */ if (n.cpExpr && n.nexprlen < MAX_NUMBER_PRINT_LEN) printf ("Input number is %s (%u digits)\n", n.cpExpr, n.ndigits); else if (n.ndigits < MAX_NUMBER_PRINT_LEN) { char *s; s = mpz_get_str (NULL, 10, n.n); printf ("Input number is %s (%u digits)\n", s, n.ndigits); FREE (s, n.ndigits + 1); } else { /* Print only first and last ten digits of the number */ mpz_t t, u; mpz_init (t); mpz_init (u); mpz_ui_pow_ui (u, 5, n.ndigits - 10); mpz_tdiv_q_2exp (t, n.n, n.ndigits - 10); mpz_tdiv_q (t, t, u); gmp_printf ("Input number is %Zd...", t); mpz_ui_pow_ui (u, 10, 10); mpz_tdiv_r (t, n.n, u); gmp_printf ("%Zd (%u digits)\n", t, n.ndigits); mpz_clear (u); mpz_clear (t); } if (n.isPrp) printf ("****** Warning: input is probably prime ******\n"); } else /* 2nd or more try for same composite */ { /* Since the expression is usually "so" short, why not just drop it out for ALL loops? */ if (displayexpr) { if (n.nexprlen && n.nexprlen <= displayexpr) printf ("Input number is %s (%u digits)\n", n.cpExpr, n.ndigits); else if (n.ndigits <= displayexpr) { char *s; s = mpz_get_str (NULL, 10, n.n); printf ("Input number is %s (%u digits)\n", s, n.ndigits); FREE (s, n.ndigits + 1); } } } fflush (stdout); } /* Even in verbose=0 we should primality check if told to do so, however, we will print to stderr to keep stdout "clean" for verbose=0 like behavior */ else if (((!breadthfirst && cnt == count) || (breadthfirst && breadthfirst_cnt==1)) && n.isPrp) { char *s; s = mpz_get_str (NULL, 10, n.n); fprintf (stderr, "Input number is %s (%u digits)\n" "****** Warning: input is probably prime ******\n", s, n.ndigits); FREE (s, n.ndigits + 1); } factor_is_prime = 0; cnt --; /* one more curve performed */ mpgocandi_fixup_with_N (&go, &n); /* If we are in batch mode: If A was given one should check that d fits in one word and that x0=2. If A was not given one chooses it at random (and if x0 exists it must be 2). */ if (batch != 0) { if (method != ECM_ECM) { fprintf (stderr, "Error, the -batch option is only valid for ECM\n"); exit (EXIT_FAILURE); } mpz_set_ui (sigma, 0); if (mpz_sgn (orig_x0) == 0) mpz_set_ui (orig_x0, 2); else if (mpz_cmp_ui (orig_x0, 2) != 0) { fprintf (stderr, "Error, x0 should be equal to 2" " in batch mode.\n"); exit (EXIT_FAILURE); } mpz_set (x, orig_x0); } params->batch = batch; if (params->batch != 0 && params->batch_B1 != B1) { int st; params->batch_B1 = B1; if (verbose > OUTPUT_NORMAL) printf ("Batch mode %d: ", batch); st = cputime (); /* construct the batch exponent */ if (loadfile_s != NULL) { /* For now, there is no check that it correspond to the actual B1*/ read_s_from_file (params->batch_s, loadfile_s); if (verbose > OUTPUT_NORMAL) printf ("reading prime product of %zu bits took %ldms\n", mpz_sizeinbase (params->batch_s, 2), cputime () - st); } else { compute_s (params->batch_s, params->batch_B1); if (verbose > OUTPUT_NORMAL) printf ("computing prime product of %zu bits took %ldms\n", mpz_sizeinbase (params->batch_s, 2), cputime () - st); if (savefile_s != NULL) { int ret = write_s_in_file (savefile_s, params->batch_s); if (verbose > OUTPUT_NORMAL && ret > 0) printf ("Save s (%u bytes) in %s.\n", ret, savefile_s); } } } /* set parameters that may change from one curve to another */ params->method = method; /* may change with resume */ mpz_set (params->x, x); /* may change with resume */ /* if sigma is zero, then we use the A value instead */ params->sigma_is_A = ((mpz_sgn (sigma) == 0 || batch != 0) ? 1 : 0); mpz_set (params->sigma, (params->sigma_is_A) ? A : sigma); mpz_set (params->go, go.Candi.n); /* may change if contains N */ mpz_set (params->B2min, B2min); /* may change with -c */ /* Here's an ugly hack to pass B2scale to the library somehow. It gets piggy-backed onto B1done */ params->B1done = params->B1done + floor (B2scale * 128.) / 134217728.; /* Default, for P-1/P+1 with old stage 2 and ECM, use NTT only for small input */ if (use_ntt == 1 && (method == ECM_ECM || S != ECM_DEFAULT_S)) params->use_ntt = (mpz_size (n.n) <= NTT_SIZE_THRESHOLD); else params->use_ntt = use_ntt; #ifdef HAVE_GWNUM /* check if the input number can be represented as k*b^n+c */ if (kbnc_z (&gw_k, &gw_b, &gw_n, &gw_c, n.n)) { params->gw_k = gw_k; params->gw_b = gw_b; params->gw_n = gw_n; params->gw_c = gw_c; if (verbose > OUTPUT_NORMAL) printf ("Found number: %.0f*%lu^%lu + %ld\n", gw_k, gw_b, gw_n, gw_c); } else if (kbnc_str (&gw_k, &gw_b, &gw_n, &gw_c, n.cpExpr, n.n)) { params->gw_k = gw_k; params->gw_b = gw_b; params->gw_n = gw_n; params->gw_c = gw_c; if (verbose > OUTPUT_NORMAL) printf ("Found number: %.0f*%lu^%lu + %ld\n", gw_k, gw_b, gw_n, gw_c); } else { if (verbose > OUTPUT_NORMAL) printf ("Did not find a gwnum poly for the input number.\n"); } #endif #ifdef WANT_SHELLCMD /* See if the system is currently idle, if -idlecmd was given */ if (idlecmd != NULL) { int r; FILE *fc; fc = popen (idlecmd, "r"); if (fc == NULL) { fprintf (stderr, "Error executing idle command: %s\n", idlecmd); exit (EXIT_FAILURE); } r = pclose (fc); if (r != 0) /* If exit status of idle command is non-zero */ { printf ("Idle command returned %d, exiting\n", r); breadthfirst = 0; /* Avoid looping due to goto (ugly, FIXME!) */ break; } } #endif /* WANT_SHELLCMD */ if (timestamp) { time_t t; t = time (NULL); printf ("[%.24s]\n", ctime (&t)); } #if 0 /* Test mpres_muldivbysomething_si() which is not called in normal operation */ mpmod_selftest (n.n); #endif if (mpz_cmp_ui (n.n, 0) <= 0) { fprintf (stderr, "Error, input number should be positive\n"); exit (EXIT_FAILURE); } /* now call the ecm library */ result = ecm_factor (f, n.n, B1, params); if (result == ECM_ERROR) { fprintf (stderr, "Please report internal errors at <%s>.\n", PACKAGE_BUGREPORT); exit (EXIT_FAILURE); } if (result == ECM_NO_FACTOR_FOUND) { if (trial_factor_found) { factor_is_prime = 1; mpz_set_ui (f, 1); returncode = ECM_NO_FACTOR_FOUND; goto OutputFactorStuff; } } else { factsfound++; if (verbose > 0) printf ("********** Factor found in step %u: ", ABS (result)); mpz_out_str (stdout, 10, f); if (verbose > 0) printf ("\n"); /* Complain about non-proper factors (0, negative) */ if (mpz_cmp_ui (f, 1) < 0) { fprintf (stderr, "Error: factor found is "); mpz_out_str (stderr, 10, f); fprintf (stderr, "\nPlease report internal errors at <%s>.\n", PACKAGE_BUGREPORT); exit (EXIT_FAILURE); } #ifdef WANT_SHELLCMD if (faccmd != NULL) { FILE *fc; fc = popen (faccmd, "w"); if (fc != NULL) { mpz_t cof; mpz_init_set (cof, n.n); mpz_divexact (cof, cof, f); gmp_fprintf (fc, "%Zd\n", n.n); gmp_fprintf (fc, "%Zd\n", f); gmp_fprintf (fc, "%Zd\n", cof); mpz_clear (cof); pclose (fc); } } #endif if (mpz_cmp (f, n.n) != 0) { /* prints factor found and cofactor on standard output. */ factor_is_prime = probab_prime_p (f, PROBAB_PRIME_TESTS); if (verbose >= 1) { printf ("Found %s factor of %2u digits: ", factor_is_prime ? "probable prime" : "composite", nb_digits (f)); mpz_out_str (stdout, 10, f); printf ("\n"); } mpcandi_t_addfoundfactor (&n, f, 1); /* 1 for display warning if factor does not divide the current candidate */ if (resumefile != NULL) { /* If we are resuming from a save file, add factor to the discovered factors for the current number */ mpz_mul (resume_lastfac, resume_lastfac, f); resume_wasPrp = n.isPrp; } if (factor_is_prime) returncode = (n.isPrp) ? ECM_PRIME_FAC_PRIME_COFAC : ECM_PRIME_FAC_COMP_COFAC; else returncode = (n.isPrp) ? ECM_COMP_FAC_PRIME_COFAC : ECM_COMP_FAC_COMP_COFAC; OutputFactorStuff:; if (verbose >= 1) { printf ("%s cofactor ", n.isPrp ? "Probable prime" : "Composite"); if (n.cpExpr && !decimal_cofactor) printf ("%s", n.cpExpr); else mpz_out_str (stdout, 10, n.n); printf (" has %u digits\n", n.ndigits); } else /* quiet mode: just print a space here, remaining cofactor will be printed after last curve */ printf (" "); /* check for champions (top ten for each method) */ method1 = ((method == ECM_PP1) && (result < 0)) ? ECM_PM1 : method; if ((verbose > 0) && factor_is_prime && nb_digits (f) >= champion_digits[method1]) { printf ("Report your potential champion to %s\n", champion_keeper[method1]); printf ("(see %s)\n", champion_url[method1]); } /* Take care of fully factoring this number, in case we are in deep mode */ if (n.isPrp) cnt = 0; /* no more curve to perform */ if (!deep) { if (breadthfirst) /* I know it may not be prp, but setting this will cause all future loops to NOT check this candidate again */ pCandidates[linenum-1].isPrp = 1; cnt = 0; } else if (breadthfirst) mpcandi_t_copy (&pCandidates[linenum-1], &n); } else { if (breadthfirst) /* I know it may not be prp, but setting this will cause all future loops to NOT check this candidate again */ pCandidates[linenum-1].isPrp = 1; cnt = 0; /* no more curve to perform */ if (verbose > 0) printf ("Found input number N"); printf ("\n"); returncode = ECM_INPUT_NUMBER_FOUND; } fflush (stdout); } /* if quiet mode, prints remaining cofactor after last curve */ if ((cnt == 0) && (verbose == 0)) { if (n.cpExpr && !decimal_cofactor) printf ("%s", n.cpExpr); else mpz_out_str (stdout, 10, n.n); putchar ('\n'); fflush (stdout); } /* Write composite cofactors to savefile if requested */ /* If no factor was found, we consider cofactor composite and write it */ if (savefilename != NULL && !n.isPrp) { mpz_mod (x, params->x, n.n); /* Reduce stage 1 residue wrt new co- factor, in case a factor was found */ /* We write the B1done value to the safe file. This requires that a correct B1done is returned by the factoring functions */ write_resumefile_line (savefilename, method, params->B1done, sigma, A, x, &n, orig_x0, comment); } /* advance B1, if autoincrement value had been set during command line parsing */ if (!breadthfirst && autoincrementB1 > 0.0) { double NewB1; NewB1 = calc_B1_AutoIncrement (B1, autoincrementB1, autoincrementB1_calc); if (mpz_cmp_d (B2min, B1) <= 0) /* <= might be better than == */ mpz_set_d (B2min, NewB1); B1 = NewB1; } } /* Allow our "breadthfirst" search to re-run the file again if enough curves have not yet been run */ if (breadthfirst == 1 && !exit_asap_value) goto BreadthFirstDoAgain; /* NOTE finding a factor may have caused the loop to exit, but what is left on screen is the wrong count of factors (missing the just found factor. Update the screen to at least specify the current count */ if (infilename) /* infile might be stdin, don't fclose that! */ fclose (infile); if (resumefile) { fclose (resumefile); mpz_clear (resume_lastN); mpz_clear (resume_lastfac); } if (nCandidates) { while (nCandidates--) mpcandi_t_free (&pCandidates[nCandidates]); free (pCandidates); } free_expr (); gmp_randclear (randstate); mpz_clear (orig_x0); mpz_clear (startingB2min); mpz_clear (B2min); mpz_clear (B2); mpz_clear (x); mpz_clear (f); mpcandi_t_free (&n); mpz_clear (sigma); mpz_clear (A); mpq_clear (rat_x0); mpz_clear (seed); mpgocandi_t_free (&go); ecm_clear (params); #ifdef MEMORY_DEBUG tests_memory_end (); #endif /* exit 0 if a factor was found for the last input, except if we exit due to a signal */ #ifdef HAVE_SIGNAL if (returncode == 0 && exit_asap_value != 0) returncode = 143; #endif return returncode; } ecm-6.4.4/ecm-params.h.hppa2.00000644023561000001540000000172012106741273012563 00000000000000/* those parameters were obtained on gcc61.fsffrance.org with ecm-6.4.1-rc3 gmp-5.0.2, and gcc 4.4.1 -O2 -pedantic -mpa-risc-1-1 (note that GMP must be configured with ABI=1.0, see http://gmplib.org/list-archives/gmp-bugs/2009-August/001585.html */ /* 0:mulredc 1:mul+redc_1 2:mul+redc_2 3:mul+redc_n */ #define TUNE_MULREDC_TABLE {0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1} /* 0:mulredc 1:sqr+redc_1 2:sqr+redc_2 3:sqr+redc_n */ #define TUNE_SQRREDC_TABLE {0,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1} #define MPZMOD_THRESHOLD 49 #define REDC_THRESHOLD 512 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 17 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 262144 #define PREREVERTDIVISION_NTT_THRESHOLD 262144 #define POLYINVERT_NTT_THRESHOLD 262144 #define POLYEVALT_NTT_THRESHOLD 262144 #define MPZSPV_NORMALISE_STRIDE 256 ecm-6.4.4/configure0000755023561000001540000172407412113353766011150 00000000000000#! /bin/sh # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.69 for ecm 6.4.4. # # Report bugs to . # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # # # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo # Prefer a ksh shell builtin over an external printf program on Solaris, # but without wasting forks for bash or zsh. if test -z "$BASH_VERSION$ZSH_VERSION" \ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='print -r --' as_echo_n='print -rn --' elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in #( *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi # Unset variables that we do not need and which cause bugs (e.g. in # pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" # suppresses any "Segmentation fault" message there. '((' could # trigger a bug in pdksh 5.2.14. for as_var in BASH_ENV ENV MAIL MAILPATH do eval test x\${$as_var+set} = xset \ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # CDPATH. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH # Use a proper internal environment variable to ensure we don't fall # into an infinite loop, continuously re-executing ourselves. if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then _as_can_reexec=no; export _as_can_reexec; # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also # works around shells that cannot unset nonexistent variables. # Preserve -v and -x to the replacement shell. BASH_ENV=/dev/null ENV=/dev/null (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV case $- in # (((( *v*x* | *x*v* ) as_opts=-vx ;; *v* ) as_opts=-v ;; *x* ) as_opts=-x ;; * ) as_opts= ;; esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. $as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 as_fn_exit 255 fi # We don't want this to propagate to other subprocesses. { _as_can_reexec=; unset _as_can_reexec;} if test "x$CONFIG_SHELL" = x; then as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST else case \`(set -o) 2>/dev/null\` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi " as_required="as_fn_return () { (exit \$1); } as_fn_success () { as_fn_return 0; } as_fn_failure () { as_fn_return 1; } as_fn_ret_success () { return 0; } as_fn_ret_failure () { return 1; } exitcode=0 as_fn_success || { exitcode=1; echo as_fn_success failed.; } as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : else exitcode=1; echo positional parameters were not saved. fi test x\$exitcode = x0 || exit 1 test -x / || exit 1" as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 test \$(( 1 + 1 )) = 2 || exit 1 test -n \"\${ZSH_VERSION+set}\${BASH_VERSION+set}\" || ( ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO PATH=/empty FPATH=/empty; export PATH FPATH test \"X\`printf %s \$ECHO\`\" = \"X\$ECHO\" \\ || test \"X\`print -r -- \$ECHO\`\" = \"X\$ECHO\" ) || exit 1" if (eval "$as_required") 2>/dev/null; then : as_have_required=yes else as_have_required=no fi if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_found=false for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. as_found=: case $as_dir in #( /*) for as_base in sh bash ksh sh5; do # Try only shells that exist, to save several forks. as_shell=$as_dir/$as_base if { test -f "$as_shell" || test -f "$as_shell.exe"; } && { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : CONFIG_SHELL=$as_shell as_have_required=yes if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : break 2 fi fi done;; esac as_found=false done $as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : CONFIG_SHELL=$SHELL as_have_required=yes fi; } IFS=$as_save_IFS if test "x$CONFIG_SHELL" != x; then : export CONFIG_SHELL # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also # works around shells that cannot unset nonexistent variables. # Preserve -v and -x to the replacement shell. BASH_ENV=/dev/null ENV=/dev/null (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV case $- in # (((( *v*x* | *x*v* ) as_opts=-vx ;; *v* ) as_opts=-v ;; *x* ) as_opts=-x ;; * ) as_opts= ;; esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. $as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 exit 255 fi if test x$as_have_required = xno; then : $as_echo "$0: This script requires a shell more modern than all" $as_echo "$0: the shells that I found on your system." if test x${ZSH_VERSION+set} = xset ; then $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" $as_echo "$0: be upgraded to zsh 4.3.4 or later." else $as_echo "$0: Please tell bug-autoconf@gnu.org and $0: ecm-discuss@lists.gforge.inria.fr about your system, $0: including any error possibly output before this $0: message. Then install a modern shell, or manually run $0: the script under such a shell if you do have one." fi exit 1 fi fi fi SHELL=${CONFIG_SHELL-/bin/sh} export SHELL # Unset more variables known to interfere with behavior of common tools. CLICOLOR_FORCE= GREP_OPTIONS= unset CLICOLOR_FORCE GREP_OPTIONS ## --------------------- ## ## M4sh Shell Functions. ## ## --------------------- ## # as_fn_unset VAR # --------------- # Portably unset VAR. as_fn_unset () { { eval $1=; unset $1;} } as_unset=as_fn_unset # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. as_fn_set_status () { return $1 } # as_fn_set_status # as_fn_exit STATUS # ----------------- # Exit the shell with STATUS, even in a "trap 0" or "set -e" context. as_fn_exit () { set +e as_fn_set_status $1 exit $1 } # as_fn_exit # as_fn_mkdir_p # ------------- # Create "$as_dir" as a directory, including parents if necessary. as_fn_mkdir_p () { case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || eval $as_mkdir_p || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" } # as_fn_mkdir_p # as_fn_executable_p FILE # ----------------------- # Test if FILE is an executable regular file. as_fn_executable_p () { test -f "$1" && test -x "$1" } # as_fn_executable_p # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : eval 'as_fn_append () { eval $1+=\$2 }' else as_fn_append () { eval $1=\$$1\$2 } fi # as_fn_append # as_fn_arith ARG... # ------------------ # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : eval 'as_fn_arith () { as_val=$(( $* )) }' else as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are # provided, also output the error to LOG_FD, referencing LINENO. Then exit the # script with STATUS, using 1 if that was 0. as_fn_error () { as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi $as_echo "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits as_lineno_1=$LINENO as_lineno_1a=$LINENO as_lineno_2=$LINENO as_lineno_2a=$LINENO eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) sed -n ' p /[$]LINENO/= ' <$as_myself | sed ' s/[$]LINENO.*/&-/ t lineno b :lineno N :loop s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ t loop s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } # If we had to re-execute with $CONFIG_SHELL, we're ensured to have # already done that, so ensure we don't try to do so again and fall # in an infinite loop. This has already happened in practice. _as_can_reexec=no; export _as_can_reexec # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensitive to this). . "./$as_me.lineno" # Exit status is that of the last command. exit } ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) case `echo 'xy\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. xy) ECHO_C='\c';; *) echo `echo ksh88 bug on AIX 6.1` > /dev/null ECHO_T=' ';; esac;; *) ECHO_N='-n';; esac rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -pR' fi else as_ln_s='cp -pR' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null if mkdir -p . 2>/dev/null; then as_mkdir_p='mkdir -p "$as_dir"' else test -d ./-p && rmdir ./-p as_mkdir_p=false fi as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" SHELL=${CONFIG_SHELL-/bin/sh} test -n "$DJDIR" || exec 7<&0 &1 # Name of the host. # hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, # so uname gets run too. ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` # # Initializations. # ac_default_prefix=/usr/local ac_clean_files= ac_config_libobj_dir=. LIBOBJS= cross_compiling=no subdirs= MFLAGS= MAKEFLAGS= # Identity of this package. PACKAGE_NAME='ecm' PACKAGE_TARNAME='ecm' PACKAGE_VERSION='6.4.4' PACKAGE_STRING='ecm 6.4.4' PACKAGE_BUGREPORT='ecm-discuss@lists.gforge.inria.fr' PACKAGE_URL='' # Factoring default headers for most tests. ac_includes_default="\ #include #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #ifdef STDC_HEADERS # include # include #else # ifdef HAVE_STDLIB_H # include # endif #endif #ifdef HAVE_STRING_H # if !defined STDC_HEADERS && defined HAVE_MEMORY_H # include # endif # include #endif #ifdef HAVE_STRINGS_H # include #endif #ifdef HAVE_INTTYPES_H # include #endif #ifdef HAVE_STDINT_H # include #endif #ifdef HAVE_UNISTD_H # include #endif" ac_c_werror_flag= ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS GSL_LD_FLAGS ASMPATH XSLDIR VALGRIND MAKE_MANPAGE_FALSE MAKE_MANPAGE_TRUE XSLTPROC GMPLIB LIBOBJS POW_LIB ALLOCA WITH_GWNUM_FALSE WITH_GWNUM_TRUE ENABLE_ASM_REDC_FALSE ENABLE_ASM_REDC_TRUE M4 OPENMP_CFLAGS CPP OTOOL64 OTOOL LIPO NMEDIT DSYMUTIL MANIFEST_TOOL RANLIB ac_ct_AR AR DLLTOOL OBJDUMP LN_S NM ac_ct_DUMPBIN DUMPBIN LD FGREP LIBTOOL am__fastdepCCAS_FALSE am__fastdepCCAS_TRUE CCASDEPMODE CCASFLAGS CCAS am__fastdepCC_FALSE am__fastdepCC_TRUE CCDEPMODE am__nodep AMDEPBACKSLASH AMDEP_FALSE AMDEP_TRUE am__quote am__include DEPDIR OBJEXT EXEEXT ac_ct_CC CPPFLAGS LDFLAGS CFLAGS CC MEMORY_DEBUG_FALSE MEMORY_DEBUG_TRUE SED EGREP GREP host_os host_vendor host_cpu host build_os build_vendor build_cpu build am__untar am__tar AMTAR am__leading_dot SET_MAKE AWK mkdir_p MKDIR_P INSTALL_STRIP_PROGRAM STRIP install_sh MAKEINFO AUTOHEADER AUTOMAKE AUTOCONF ACLOCAL VERSION PACKAGE CYGPATH_W am__isrc INSTALL_DATA INSTALL_SCRIPT INSTALL_PROGRAM target_alias host_alias build_alias LIBS ECHO_T ECHO_N ECHO_C DEFS mandir localedir libdir psdir pdfdir dvidir htmldir infodir docdir oldincludedir includedir localstatedir sharedstatedir sysconfdir datadir datarootdir libexecdir sbindir bindir program_transform_name prefix exec_prefix PACKAGE_URL PACKAGE_BUGREPORT PACKAGE_STRING PACKAGE_VERSION PACKAGE_TARNAME PACKAGE_NAME PATH_SEPARATOR SHELL' ac_subst_files='' ac_user_opts=' enable_option_checking with_gmp with_gmp_include with_gmp_lib with_gwnum enable_assert enable_shellcmd enable_gmp_cflags enable_openmp enable_sse2 enable_asm_redc enable_memory_debug enable_mulredc_svoboda enable_dependency_tracking enable_shared enable_static with_pic enable_fast_install with_gnu_ld with_sysroot enable_libtool_lock ' ac_precious_vars='build_alias host_alias target_alias CC CFLAGS LDFLAGS LIBS CPPFLAGS CCAS CCASFLAGS CPP M4' # Initialize some variables set by options. ac_init_help= ac_init_version=false ac_unrecognized_opts= ac_unrecognized_sep= # The variables have the same names as the options, with # dashes changed to underlines. cache_file=/dev/null exec_prefix=NONE no_create= no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= verbose= x_includes=NONE x_libraries=NONE # Installation directory options. # These are left unexpanded so users can "make install exec_prefix=/foo" # and all the variables that are supposed to be based on exec_prefix # by default will actually change. # Use braces instead of parens because sh, perl, etc. also accept them. # (The list follows the same order as the GNU Coding Standards.) bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datarootdir='${prefix}/share' datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' infodir='${datarootdir}/info' htmldir='${docdir}' dvidir='${docdir}' pdfdir='${docdir}' psdir='${docdir}' libdir='${exec_prefix}/lib' localedir='${datarootdir}/locale' mandir='${datarootdir}/man' ac_prev= ac_dashdash= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval $ac_prev=\$ac_option ac_prev= continue fi case $ac_option in *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; *=) ac_optarg= ;; *) ac_optarg=yes ;; esac # Accept the important Cygnus configure options, so we can diagnose typos. case $ac_dashdash$ac_option in --) ac_dashdash=yes ;; -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir=$ac_optarg ;; -build | --build | --buil | --bui | --bu) ac_prev=build_alias ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build_alias=$ac_optarg ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file=$ac_optarg ;; --config-cache | -C) cache_file=config.cache ;; -datadir | --datadir | --datadi | --datad) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=*) datadir=$ac_optarg ;; -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ | --dataroo | --dataro | --datar) ac_prev=datarootdir ;; -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) datarootdir=$ac_optarg ;; -disable-* | --disable-*) ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid feature name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=no ;; -docdir | --docdir | --docdi | --doc | --do) ac_prev=docdir ;; -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) docdir=$ac_optarg ;; -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) ac_prev=dvidir ;; -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) dvidir=$ac_optarg ;; -enable-* | --enable-*) ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid feature name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=\$ac_optarg ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix=$ac_optarg ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he | -h) ac_init_help=long ;; -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) ac_init_help=recursive ;; -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) ac_init_help=short ;; -host | --host | --hos | --ho) ac_prev=host_alias ;; -host=* | --host=* | --hos=* | --ho=*) host_alias=$ac_optarg ;; -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) ac_prev=htmldir ;; -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ | --ht=*) htmldir=$ac_optarg ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir=$ac_optarg ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir=$ac_optarg ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir=$ac_optarg ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir=$ac_optarg ;; -localedir | --localedir | --localedi | --localed | --locale) ac_prev=localedir ;; -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) localedir=$ac_optarg ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst | --locals) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) localstatedir=$ac_optarg ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir=$ac_optarg ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c | -n) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir=$ac_optarg ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix=$ac_optarg ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix=$ac_optarg ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix=$ac_optarg ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name=$ac_optarg ;; -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) ac_prev=pdfdir ;; -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) pdfdir=$ac_optarg ;; -psdir | --psdir | --psdi | --psd | --ps) ac_prev=psdir ;; -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) psdir=$ac_optarg ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir=$ac_optarg ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir=$ac_optarg ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site=$ac_optarg ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir=$ac_optarg ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir=$ac_optarg ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target_alias ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target_alias=$ac_optarg ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers | -V) ac_init_version=: ;; -with-* | --with-*) ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid package name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=\$ac_optarg ;; -without-* | --without-*) ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid package name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=no ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes=$ac_optarg ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries=$ac_optarg ;; -*) as_fn_error $? "unrecognized option: \`$ac_option' Try \`$0 --help' for more information" ;; *=*) ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` # Reject names that are not valid shell variable names. case $ac_envvar in #( '' | [0-9]* | *[!_$as_cr_alnum]* ) as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; esac eval $ac_envvar=\$ac_optarg export $ac_envvar ;; *) # FIXME: should be removed in autoconf 3.0. $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" ;; esac done if test -n "$ac_prev"; then ac_option=--`echo $ac_prev | sed 's/_/-/g'` as_fn_error $? "missing argument to $ac_option" fi if test -n "$ac_unrecognized_opts"; then case $enable_option_checking in no) ;; fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; esac fi # Check all directory arguments for consistency. for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ libdir localedir mandir do eval ac_val=\$$ac_var # Remove trailing slashes. case $ac_val in */ ) ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` eval $ac_var=\$ac_val;; esac # Be sure to have absolute directory names. case $ac_val in [\\/$]* | ?:[\\/]* ) continue;; NONE | '' ) case $ac_var in *prefix ) continue;; esac;; esac as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" done # There might be people who depend on the old broken behavior: `$host' # used to hold the argument of --host etc. # FIXME: To remove some day. build=$build_alias host=$host_alias target=$target_alias # FIXME: To remove some day. if test "x$host_alias" != x; then if test "x$build_alias" = x; then cross_compiling=maybe elif test "x$build_alias" != "x$host_alias"; then cross_compiling=yes fi fi ac_tool_prefix= test -n "$host_alias" && ac_tool_prefix=$host_alias- test "$silent" = yes && exec 6>/dev/null ac_pwd=`pwd` && test -n "$ac_pwd" && ac_ls_di=`ls -di .` && ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || as_fn_error $? "working directory cannot be determined" test "X$ac_ls_di" = "X$ac_pwd_ls_di" || as_fn_error $? "pwd does not report name of working directory" # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then the parent directory. ac_confdir=`$as_dirname -- "$as_myself" || $as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_myself" : 'X\(//\)[^/]' \| \ X"$as_myself" : 'X\(//\)$' \| \ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_myself" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` srcdir=$ac_confdir if test ! -r "$srcdir/$ac_unique_file"; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r "$srcdir/$ac_unique_file"; then test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" fi ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" ac_abs_confdir=`( cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" pwd)` # When building in place, set srcdir=. if test "$ac_abs_confdir" = "$ac_pwd"; then srcdir=. fi # Remove unnecessary trailing slashes from srcdir. # Double slashes in file names in object file debugging info # mess up M-x gdb in Emacs. case $srcdir in */) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; esac for ac_var in $ac_precious_vars; do eval ac_env_${ac_var}_set=\${${ac_var}+set} eval ac_env_${ac_var}_value=\$${ac_var} eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} eval ac_cv_env_${ac_var}_value=\$${ac_var} done # # Report the --help message. # if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF \`configure' configures ecm 6.4.4 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... To assign environment variables (e.g., CC, CFLAGS...), specify them as VAR=VALUE. See below for descriptions of some of the useful variables. Defaults for the options are specified in brackets. Configuration: -h, --help display this help and exit --help=short display options specific to this package --help=recursive display the short help of all the included packages -V, --version display version information and exit -q, --quiet, --silent do not print \`checking ...' messages --cache-file=FILE cache test results in FILE [disabled] -C, --config-cache alias for \`--cache-file=config.cache' -n, --no-create do not create output files --srcdir=DIR find the sources in DIR [configure dir or \`..'] Installation directories: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [PREFIX] By default, \`make install' will install all the files in \`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify an installation prefix other than \`$ac_default_prefix' using \`--prefix', for instance \`--prefix=\$HOME'. For better control, use the options below. Fine tuning of the installation directories: --bindir=DIR user executables [EPREFIX/bin] --sbindir=DIR system admin executables [EPREFIX/sbin] --libexecdir=DIR program executables [EPREFIX/libexec] --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] --datadir=DIR read-only architecture-independent data [DATAROOTDIR] --infodir=DIR info documentation [DATAROOTDIR/info] --localedir=DIR locale-dependent data [DATAROOTDIR/locale] --mandir=DIR man documentation [DATAROOTDIR/man] --docdir=DIR documentation root [DATAROOTDIR/doc/ecm] --htmldir=DIR html documentation [DOCDIR] --dvidir=DIR dvi documentation [DOCDIR] --pdfdir=DIR pdf documentation [DOCDIR] --psdir=DIR ps documentation [DOCDIR] _ACEOF cat <<\_ACEOF Program names: --program-prefix=PREFIX prepend PREFIX to installed program names --program-suffix=SUFFIX append SUFFIX to installed program names --program-transform-name=PROGRAM run sed PROGRAM on installed program names System types: --build=BUILD configure for building on BUILD [guessed] --host=HOST cross-compile to build programs to run on HOST [BUILD] _ACEOF fi if test -n "$ac_init_help"; then case $ac_init_help in short | recursive ) echo "Configuration of ecm 6.4.4:";; esac cat <<\_ACEOF Optional Features: --disable-option-checking ignore unrecognized --enable/--with options --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --enable-assert enable ASSERT checking [[default=no]] --enable-shellcmd enable shell command execution [[default=no]] --enable-gmp-cflags enable importing CFLAGS from gmp.h [[default=yes]] --enable-openmp enable OpenMP multi-threading [[default=no]] --enable-sse2 use SSE2 instructions in NTT code (default=yes for 32-bit x86 systems, if supported) --enable-asm-redc use an asm redc (default=yes on x86_64 and powerpc64, no on others) --enable-memory-debug enable memory debugging [[default=no]] --enable-mulredc-svoboda enable Svoboda mulredc [[default=no]] --disable-dependency-tracking speeds up one-time build --enable-dependency-tracking do not reject slow dependency extractors --enable-shared[=PKGS] build shared libraries [default=no] --enable-static[=PKGS] build static libraries [default=yes] --enable-fast-install[=PKGS] optimize for fast installation [default=yes] --disable-libtool-lock avoid locking (might break parallel builds) --disable-openmp do not use OpenMP Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-gmp=DIR GMP install directory --with-gmp-include=DIR GMP include directory --with-gmp-lib=DIR GMP lib directory --with-gwnum=DIR GWNUM source directory --with-pic[=PKGS] try to use only PIC/non-PIC objects [default=use both] --with-gnu-ld assume the C compiler uses GNU ld [default=no] --with-sysroot=DIR Search for dependent libraries within DIR (or the compiler's sysroot if not specified). Some influential environment variables: CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory LIBS libraries to pass to the linker, e.g. -l CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if you have headers in a nonstandard directory CCAS assembler compiler command (defaults to CC) CCASFLAGS assembler compiler flags (defaults to CFLAGS) CPP C preprocessor M4 m4 macro processor Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. Report bugs to . _ACEOF ac_status=$? fi if test "$ac_init_help" = "recursive"; then # If there are subdirs, report their specific --help. for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue test -d "$ac_dir" || { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || continue ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix cd "$ac_dir" || { ac_status=$?; continue; } # Check for guested configure. if test -f "$ac_srcdir/configure.gnu"; then echo && $SHELL "$ac_srcdir/configure.gnu" --help=recursive elif test -f "$ac_srcdir/configure"; then echo && $SHELL "$ac_srcdir/configure" --help=recursive else $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi || ac_status=$? cd "$ac_pwd" || { ac_status=$?; break; } done fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF ecm configure 6.4.4 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF exit fi ## ------------------------ ## ## Autoconf initialization. ## ## ------------------------ ## # ac_fn_c_try_compile LINENO # -------------------------- # Try to compile conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_compile # ac_fn_c_try_link LINENO # ----------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_link () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext conftest$ac_exeext if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || test -x conftest$ac_exeext }; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would # interfere with the next link command; also delete a directory that is # left behind by Apple's compiler. We do this before executing the actions. rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_link # ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES # ------------------------------------------------------- # Tests whether HEADER exists and can be compiled using the include files in # INCLUDES, setting the cache variable VAR accordingly. ac_fn_c_check_header_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 #include <$2> _ACEOF if ac_fn_c_try_compile "$LINENO"; then : eval "$3=yes" else eval "$3=no" fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_header_compile # ac_fn_c_try_cpp LINENO # ---------------------- # Try to preprocess conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_cpp () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if { { ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } > conftest.i && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_cpp # ac_fn_c_try_run LINENO # ---------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. Assumes # that executables *can* be run. ac_fn_c_try_run () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then : ac_retval=0 else $as_echo "$as_me: program exited with status $ac_status" >&5 $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=$ac_status fi rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_run # ac_fn_c_check_func LINENO FUNC VAR # ---------------------------------- # Tests whether FUNC exists, setting the cache variable VAR accordingly ac_fn_c_check_func () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Define $2 to an innocuous variant, in case declares $2. For example, HP-UX 11i declares gettimeofday. */ #define $2 innocuous_$2 /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $2 (); below. Prefer to if __STDC__ is defined, since exists even on freestanding compilers. */ #ifdef __STDC__ # include #else # include #endif #undef $2 /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char $2 (); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ #if defined __stub_$2 || defined __stub___$2 choke me #endif int main () { return $2 (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : eval "$3=yes" else eval "$3=no" fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_func # ac_fn_c_find_intX_t LINENO BITS VAR # ----------------------------------- # Finds a signed integer type with width BITS, setting cache variable VAR # accordingly. ac_fn_c_find_intX_t () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { $as_echo "$as_me:${as_lineno-$LINENO}: checking for int$2_t" >&5 $as_echo_n "checking for int$2_t... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else eval "$3=no" # Order is important - never check a type that is potentially smaller # than half of the expected target width. for ac_type in int$2_t 'int' 'long int' \ 'long long int' 'short int' 'signed char'; do cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_includes_default enum { N = $2 / 2 - 1 }; int main () { static int test_array [1 - 2 * !(0 < ($ac_type) ((((($ac_type) 1 << N) << N) - 1) * 2 + 1))]; test_array [0] = 0; return test_array [0]; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_includes_default enum { N = $2 / 2 - 1 }; int main () { static int test_array [1 - 2 * !(($ac_type) ((((($ac_type) 1 << N) << N) - 1) * 2 + 1) < ($ac_type) ((((($ac_type) 1 << N) << N) - 1) * 2 + 2))]; test_array [0] = 0; return test_array [0]; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : else case $ac_type in #( int$2_t) : eval "$3=yes" ;; #( *) : eval "$3=\$ac_type" ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext if eval test \"x\$"$3"\" = x"no"; then : else break fi done fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_find_intX_t # ac_fn_c_find_uintX_t LINENO BITS VAR # ------------------------------------ # Finds an unsigned integer type with width BITS, setting cache variable VAR # accordingly. ac_fn_c_find_uintX_t () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { $as_echo "$as_me:${as_lineno-$LINENO}: checking for uint$2_t" >&5 $as_echo_n "checking for uint$2_t... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else eval "$3=no" # Order is important - never check a type that is potentially smaller # than half of the expected target width. for ac_type in uint$2_t 'unsigned int' 'unsigned long int' \ 'unsigned long long int' 'unsigned short int' 'unsigned char'; do cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_includes_default int main () { static int test_array [1 - 2 * !((($ac_type) -1 >> ($2 / 2 - 1)) >> ($2 / 2 - 1) == 3)]; test_array [0] = 0; return test_array [0]; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : case $ac_type in #( uint$2_t) : eval "$3=yes" ;; #( *) : eval "$3=\$ac_type" ;; esac fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext if eval test \"x\$"$3"\" = x"no"; then : else break fi done fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_find_uintX_t # ac_fn_c_check_type LINENO TYPE VAR INCLUDES # ------------------------------------------- # Tests whether TYPE exists after having included INCLUDES, setting cache # variable VAR accordingly. ac_fn_c_check_type () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else eval "$3=no" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int main () { if (sizeof ($2)) return 0; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 int main () { if (sizeof (($2))) return 0; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : else eval "$3=yes" fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_type # ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES # ------------------------------------------------------- # Tests whether HEADER exists, giving a warning if it cannot be compiled using # the include files in INCLUDES and setting the cache variable VAR # accordingly. ac_fn_c_check_header_mongrel () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if eval \${$3+:} false; then : { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } else # Is the header compilable? { $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 $as_echo_n "checking $2 usability... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $4 #include <$2> _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_header_compiler=yes else ac_header_compiler=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 $as_echo "$ac_header_compiler" >&6; } # Is the header present? { $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 $as_echo_n "checking $2 presence... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include <$2> _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : ac_header_preproc=yes else ac_header_preproc=no fi rm -f conftest.err conftest.i conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 $as_echo "$ac_header_preproc" >&6; } # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( yes:no: ) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 $as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} ;; no:yes:* ) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 $as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 $as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 $as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 $as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 $as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} ( $as_echo "## ------------------------------------------------ ## ## Report this to ecm-discuss@lists.gforge.inria.fr ## ## ------------------------------------------------ ##" ) | sed "s/^/$as_me: WARNING: /" >&2 ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 $as_echo_n "checking for $2... " >&6; } if eval \${$3+:} false; then : $as_echo_n "(cached) " >&6 else eval "$3=\$ac_header_compiler" fi eval ac_res=\$$3 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno } # ac_fn_c_check_header_mongrel cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by ecm $as_me 6.4.4, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ _ACEOF exec 5>>config.log { cat <<_ASUNAME ## --------- ## ## Platform. ## ## --------- ## hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` uname -m = `(uname -m) 2>/dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` /bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` /bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` /usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` /bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` /bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` _ASUNAME as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. $as_echo "PATH: $as_dir" done IFS=$as_save_IFS } >&5 cat >&5 <<_ACEOF ## ----------- ## ## Core tests. ## ## ----------- ## _ACEOF # Keep a trace of the command line. # Strip out --no-create and --no-recursion so they do not pile up. # Strip out --silent because we don't want to record it for future runs. # Also quote any args containing shell meta-characters. # Make two passes to allow for proper duplicate-argument suppression. ac_configure_args= ac_configure_args0= ac_configure_args1= ac_must_keep_next=false for ac_pass in 1 2 do for ac_arg do case $ac_arg in -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) continue ;; *\'*) ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; 2) as_fn_append ac_configure_args1 " '$ac_arg'" if test $ac_must_keep_next = true; then ac_must_keep_next=false # Got value, back to normal. else case $ac_arg in *=* | --config-cache | -C | -disable-* | --disable-* \ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ | -with-* | --with-* | -without-* | --without-* | --x) case "$ac_configure_args0 " in "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; esac ;; -* ) ac_must_keep_next=true ;; esac fi as_fn_append ac_configure_args " '$ac_arg'" ;; esac done done { ac_configure_args0=; unset ac_configure_args0;} { ac_configure_args1=; unset ac_configure_args1;} # When interrupted or exit'd, cleanup temporary files, and complete # config.log. We remove comments because anyway the quotes in there # would cause problems or look ugly. # WARNING: Use '\'' to represent an apostrophe within the trap. # WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. trap 'exit_status=$? # Save into config.log some information that might help in debugging. { echo $as_echo "## ---------------- ## ## Cache variables. ## ## ---------------- ##" echo # The following way of writing the cache mishandles newlines in values, ( for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) { eval $ac_var=; unset $ac_var;} ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( *${as_nl}ac_space=\ *) sed -n \ "s/'\''/'\''\\\\'\'''\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" ;; #( *) sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) echo $as_echo "## ----------------- ## ## Output variables. ## ## ----------------- ##" echo for ac_var in $ac_subst_vars do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo if test -n "$ac_subst_files"; then $as_echo "## ------------------- ## ## File substitutions. ## ## ------------------- ##" echo for ac_var in $ac_subst_files do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo fi if test -s confdefs.h; then $as_echo "## ----------- ## ## confdefs.h. ## ## ----------- ##" echo cat confdefs.h echo fi test "$ac_signal" != 0 && $as_echo "$as_me: caught signal $ac_signal" $as_echo "$as_me: exit $exit_status" } >&5 rm -f core *.core core.conftest.* && rm -f -r conftest* confdefs* conf$$* $ac_clean_files && exit $exit_status ' 0 for ac_signal in 1 2 13 15; do trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal done ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -f -r conftest* confdefs.h $as_echo "/* confdefs.h */" > confdefs.h # Predefined preprocessor variables. cat >>confdefs.h <<_ACEOF #define PACKAGE_NAME "$PACKAGE_NAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_TARNAME "$PACKAGE_TARNAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_VERSION "$PACKAGE_VERSION" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_STRING "$PACKAGE_STRING" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_URL "$PACKAGE_URL" _ACEOF # Let the site file select an alternate cache file if it wants to. # Prefer an explicitly selected file to automatically selected ones. ac_site_file1=NONE ac_site_file2=NONE if test -n "$CONFIG_SITE"; then # We do not want a PATH search for config.site. case $CONFIG_SITE in #(( -*) ac_site_file1=./$CONFIG_SITE;; */*) ac_site_file1=$CONFIG_SITE;; *) ac_site_file1=./$CONFIG_SITE;; esac elif test "x$prefix" != xNONE; then ac_site_file1=$prefix/share/config.site ac_site_file2=$prefix/etc/config.site else ac_site_file1=$ac_default_prefix/share/config.site ac_site_file2=$ac_default_prefix/etc/config.site fi for ac_site_file in "$ac_site_file1" "$ac_site_file2" do test "x$ac_site_file" = xNONE && continue if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 $as_echo "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" \ || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "failed to load site script $ac_site_file See \`config.log' for more details" "$LINENO" 5; } fi done if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special files # actually), so we avoid doing that. DJGPP emulates it as a regular file. if test /dev/null != "$cache_file" && test -f "$cache_file"; then { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 $as_echo "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . "$cache_file";; *) . "./$cache_file";; esac fi else { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 $as_echo "$as_me: creating cache $cache_file" >&6;} >$cache_file fi # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false for ac_var in $ac_precious_vars; do eval ac_old_set=\$ac_cv_env_${ac_var}_set eval ac_new_set=\$ac_env_${ac_var}_set eval ac_old_val=\$ac_cv_env_${ac_var}_value eval ac_new_val=\$ac_env_${ac_var}_value case $ac_old_set,$ac_new_set in set,) { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) if test "x$ac_old_val" != "x$ac_new_val"; then # differences in whitespace do not lead to failure. ac_old_val_w=`echo x $ac_old_val` ac_new_val_w=`echo x $ac_new_val` if test "$ac_old_val_w" != "$ac_new_val_w"; then { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 $as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} ac_cache_corrupted=: else { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 $as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} eval $ac_var=\$ac_old_val fi { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 $as_echo "$as_me: former value: \`$ac_old_val'" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 $as_echo "$as_me: current value: \`$ac_new_val'" >&2;} fi;; esac # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. *) as_fn_append ac_configure_args " '$ac_arg'" ;; esac fi done if $ac_cache_corrupted; then { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 $as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 fi ## -------------------- ## ## Main body of script. ## ## -------------------- ## ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_config_headers="$ac_config_headers config.h" gmp_configm4="config.m4" gmp_tmpconfigm4=cnfm4.tmp gmp_tmpconfigm4i=cnfm4i.tmp gmp_tmpconfigm4p=cnfm4p.tmp rm -f $gmp_tmpconfigm4 $gmp_tmpconfigm4i $gmp_tmpconfigm4p am__api_version='1.11' ac_aux_dir= for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do if test -f "$ac_dir/install-sh"; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install-sh -c" break elif test -f "$ac_dir/install.sh"; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install.sh -c" break elif test -f "$ac_dir/shtool"; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/shtool install -c" break fi done if test -z "$ac_aux_dir"; then as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5 fi # These three variables are undocumented and unsupported, # and are intended to be withdrawn in a future Autoconf release. # They can cause serious problems if a builder's source tree is in a directory # whose full name contains unusual characters. ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or # incompatible versions: # SysV /etc/install, /usr/sbin/install # SunOS /usr/etc/install # IRIX /sbin/install # AIX /bin/install # AmigaOS /C/install, which installs bootblocks on floppy discs # AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag # AFS /usr/afsws/bin/install, which mishandles nonexistent args # SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" # OS/2's system install, which has a completely different semantic # ./install, which can be erroneously created by make from ./install.sh. # Reject install programs that cannot install multiple files. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5 $as_echo_n "checking for a BSD-compatible install... " >&6; } if test -z "$INSTALL"; then if ${ac_cv_path_install+:} false; then : $as_echo_n "(cached) " >&6 else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. # Account for people who put trailing slashes in PATH elements. case $as_dir/ in #(( ./ | .// | /[cC]/* | \ /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \ /usr/ucb/* ) ;; *) # OSF1 and SCO ODT 3.0 have their own names for install. # Don't use installbsd from OSF since it installs stuff as root # by default. for ac_prog in ginstall scoinst install; do for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then if test $ac_prog = install && grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then # AIX install. It has an incompatible calling convention. : elif test $ac_prog = install && grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then # program-specific install script used by HP pwplus--don't use. : else rm -rf conftest.one conftest.two conftest.dir echo one > conftest.one echo two > conftest.two mkdir conftest.dir if "$as_dir/$ac_prog$ac_exec_ext" -c conftest.one conftest.two "`pwd`/conftest.dir" && test -s conftest.one && test -s conftest.two && test -s conftest.dir/conftest.one && test -s conftest.dir/conftest.two then ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" break 3 fi fi fi done done ;; esac done IFS=$as_save_IFS rm -rf conftest.one conftest.two conftest.dir fi if test "${ac_cv_path_install+set}" = set; then INSTALL=$ac_cv_path_install else # As a last resort, use the slow shell script. Don't cache a # value for INSTALL within a source directory, because that will # break other packages using the cache if that directory is # removed, or if the value is a relative name. INSTALL=$ac_install_sh fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5 $as_echo "$INSTALL" >&6; } # Use test -z because SunOS4 sh mishandles braces in ${var-val}. # It thinks the first close brace ends the variable substitution. test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build environment is sane" >&5 $as_echo_n "checking whether build environment is sane... " >&6; } # Just in case sleep 1 echo timestamp > conftest.file # Reject unsafe characters in $srcdir or the absolute working directory # name. Accept space and tab only in the latter. am_lf=' ' case `pwd` in *[\\\"\#\$\&\'\`$am_lf]*) as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;; esac case $srcdir in *[\\\"\#\$\&\'\`$am_lf\ \ ]*) as_fn_error $? "unsafe srcdir value: \`$srcdir'" "$LINENO" 5;; esac # Do `set' in a subshell so we don't clobber the current shell's # arguments. Must try -L first in case configure is actually a # symlink; some systems play weird games with the mod time of symlinks # (eg FreeBSD returns the mod time of the symlink's containing # directory). if ( set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` if test "$*" = "X"; then # -L didn't work. set X `ls -t "$srcdir/configure" conftest.file` fi rm -f conftest.file if test "$*" != "X $srcdir/configure conftest.file" \ && test "$*" != "X conftest.file $srcdir/configure"; then # If neither matched, then we have a broken ls. This can happen # if, for instance, CONFIG_SHELL is bash and it inherits a # broken ls alias from the environment. This has actually # happened. Such a system could not be considered "sane". as_fn_error $? "ls -t appears to fail. Make sure there is not a broken alias in your environment" "$LINENO" 5 fi test "$2" = conftest.file ) then # Ok. : else as_fn_error $? "newly created file is older than distributed files! Check your system clock" "$LINENO" 5 fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } test "$program_prefix" != NONE && program_transform_name="s&^&$program_prefix&;$program_transform_name" # Use a double $ so make ignores it. test "$program_suffix" != NONE && program_transform_name="s&\$&$program_suffix&;$program_transform_name" # Double any \ or $. # By default was `s,x,x', remove it if useless. ac_script='s/[\\$]/&&/g;s/;s,x,x,$//' program_transform_name=`$as_echo "$program_transform_name" | sed "$ac_script"` # expand $ac_aux_dir to an absolute path am_aux_dir=`cd $ac_aux_dir && pwd` if test x"${MISSING+set}" != xset; then case $am_aux_dir in *\ * | *\ *) MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; *) MISSING="\${SHELL} $am_aux_dir/missing" ;; esac fi # Use eval to expand $SHELL if eval "$MISSING --run true"; then am_missing_run="$MISSING --run " else am_missing_run= { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`missing' script is too old or missing" >&5 $as_echo "$as_me: WARNING: \`missing' script is too old or missing" >&2;} fi if test x"${install_sh}" != xset; then case $am_aux_dir in *\ * | *\ *) install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; *) install_sh="\${SHELL} $am_aux_dir/install-sh" esac fi # Installed binaries are usually stripped using `strip' when the user # run `make install-strip'. However `strip' might not be the right # tool to use in cross-compilation environments, therefore Automake # will honor the `STRIP' environment variable to overrule this program. if test "$cross_compiling" != no; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. set dummy ${ac_tool_prefix}strip; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_STRIP+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$STRIP"; then ac_cv_prog_STRIP="$STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_STRIP="${ac_tool_prefix}strip" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi STRIP=$ac_cv_prog_STRIP if test -n "$STRIP"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 $as_echo "$STRIP" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_STRIP"; then ac_ct_STRIP=$STRIP # Extract the first word of "strip", so it can be a program name with args. set dummy strip; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_STRIP+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_STRIP"; then ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_STRIP="strip" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP if test -n "$ac_ct_STRIP"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 $as_echo "$ac_ct_STRIP" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_STRIP" = x; then STRIP=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac STRIP=$ac_ct_STRIP fi else STRIP="$ac_cv_prog_STRIP" fi fi INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5 $as_echo_n "checking for a thread-safe mkdir -p... " >&6; } if test -z "$MKDIR_P"; then if ${ac_cv_path_mkdir+:} false; then : $as_echo_n "(cached) " >&6 else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in mkdir gmkdir; do for ac_exec_ext in '' $ac_executable_extensions; do as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #( 'mkdir (GNU coreutils) '* | \ 'mkdir (coreutils) '* | \ 'mkdir (fileutils) '4.1*) ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext break 3;; esac done done done IFS=$as_save_IFS fi test -d ./--version && rmdir ./--version if test "${ac_cv_path_mkdir+set}" = set; then MKDIR_P="$ac_cv_path_mkdir -p" else # As a last resort, use the slow shell script. Don't cache a # value for MKDIR_P within a source directory, because that will # break other packages using the cache if that directory is # removed, or if the value is a relative name. MKDIR_P="$ac_install_sh -d" fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5 $as_echo "$MKDIR_P" >&6; } mkdir_p="$MKDIR_P" case $mkdir_p in [\\/$]* | ?:[\\/]*) ;; */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;; esac for ac_prog in gawk mawk nawk awk do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_AWK+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$AWK"; then ac_cv_prog_AWK="$AWK" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_AWK="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi AWK=$ac_cv_prog_AWK if test -n "$AWK"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5 $as_echo "$AWK" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$AWK" && break done { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} sets \$(MAKE)" >&5 $as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; } set x ${MAKE-make} ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then : $as_echo_n "(cached) " >&6 else cat >conftest.make <<\_ACEOF SHELL = /bin/sh all: @echo '@@@%%%=$(MAKE)=@@@%%%' _ACEOF # GNU make sometimes prints "make[1]: Entering ...", which would confuse us. case `${MAKE-make} -f conftest.make 2>/dev/null` in *@@@%%%=?*=@@@%%%*) eval ac_cv_prog_make_${ac_make}_set=yes;; *) eval ac_cv_prog_make_${ac_make}_set=no;; esac rm -f conftest.make fi if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } SET_MAKE= else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } SET_MAKE="MAKE=${MAKE-make}" fi rm -rf .tst 2>/dev/null mkdir .tst 2>/dev/null if test -d .tst; then am__leading_dot=. else am__leading_dot=_ fi rmdir .tst 2>/dev/null if test "`cd $srcdir && pwd`" != "`pwd`"; then # Use -I$(srcdir) only when $(srcdir) != ., so that make's output # is not polluted with repeated "-I." am__isrc=' -I$(srcdir)' # test to see if srcdir already configured if test -f $srcdir/config.status; then as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 fi fi # test whether we have cygpath if test -z "$CYGPATH_W"; then if (cygpath --version) >/dev/null 2>/dev/null; then CYGPATH_W='cygpath -w' else CYGPATH_W=echo fi fi # Define the identity of the package. PACKAGE='ecm' VERSION='6.4.4' cat >>confdefs.h <<_ACEOF #define PACKAGE "$PACKAGE" _ACEOF cat >>confdefs.h <<_ACEOF #define VERSION "$VERSION" _ACEOF # Some tools Automake needs. ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} # We need awk for the "check" target. The system "awk" is bad on # some platforms. # Always define AMTAR for backward compatibility. Yes, it's still used # in the wild :-( We should find a proper way to deprecate it ... AMTAR='$${TAR-tar}' am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -' if test "`cd $srcdir && pwd`" != "`pwd`"; then # Use -I$(srcdir) only when $(srcdir) != ., so that make's output # is not polluted with repeated "-I." am__isrc=' -I$(srcdir)' # test to see if srcdir already configured if test -f $srcdir/config.status; then as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5 fi fi # test whether we have cygpath if test -z "$CYGPATH_W"; then if (cygpath --version) >/dev/null 2>/dev/null; then CYGPATH_W='cygpath -w' else CYGPATH_W=echo fi fi # Define the identity of the package. PACKAGE=ecm VERSION=6.4.4 cat >>confdefs.h <<_ACEOF #define PACKAGE "$PACKAGE" _ACEOF cat >>confdefs.h <<_ACEOF #define VERSION "$VERSION" _ACEOF # Some tools Automake needs. ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} # We need awk for the "check" target. The system "awk" is bad on # some platforms. # Always define AMTAR for backward compatibility. Yes, it's still used # in the wild :-( We should find a proper way to deprecate it ... AMTAR='$${TAR-tar}' am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -' # Make sure we can run config.sub. $SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5 { $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5 $as_echo_n "checking build system type... " >&6; } if ${ac_cv_build+:} false; then : $as_echo_n "(cached) " >&6 else ac_build_alias=$build_alias test "x$ac_build_alias" = x && ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` test "x$ac_build_alias" = x && as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5 ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5 fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5 $as_echo "$ac_cv_build" >&6; } case $ac_cv_build in *-*-*) ;; *) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;; esac build=$ac_cv_build ac_save_IFS=$IFS; IFS='-' set x $ac_cv_build shift build_cpu=$1 build_vendor=$2 shift; shift # Remember, the first character of IFS is used to create $*, # except with old shells: build_os=$* IFS=$ac_save_IFS case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5 $as_echo_n "checking host system type... " >&6; } if ${ac_cv_host+:} false; then : $as_echo_n "(cached) " >&6 else if test "x$host_alias" = x; then ac_cv_host=$ac_cv_build else ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` || as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5 fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5 $as_echo "$ac_cv_host" >&6; } case $ac_cv_host in *-*-*) ;; *) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;; esac host=$ac_cv_host ac_save_IFS=$IFS; IFS='-' set x $ac_cv_host shift host_cpu=$1 host_vendor=$2 shift; shift # Remember, the first character of IFS is used to create $*, # except with old shells: host_os=$* IFS=$ac_save_IFS case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac unset GMP_CFLAGS GMP_CC user_redefine_cc if test -n "$CFLAGS" || test -n "$CC" ; then user_redefine_cc=yes fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 $as_echo_n "checking for grep that handles long lines and -e... " >&6; } if ${ac_cv_path_GREP+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$GREP"; then ac_path_GREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in grep ggrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_GREP" || continue # Check for GNU ac_path_GREP and select it if it is found. # Check for GNU $ac_path_GREP case `"$ac_path_GREP" --version 2>&1` in *GNU*) ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'GREP' >> "conftest.nl" "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_GREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_GREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_GREP"; then as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_GREP=$GREP fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 $as_echo "$ac_cv_path_GREP" >&6; } GREP="$ac_cv_path_GREP" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 $as_echo_n "checking for egrep... " >&6; } if ${ac_cv_path_EGREP+:} false; then : $as_echo_n "(cached) " >&6 else if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 then ac_cv_path_EGREP="$GREP -E" else if test -z "$EGREP"; then ac_path_EGREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in egrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_EGREP" || continue # Check for GNU ac_path_EGREP and select it if it is found. # Check for GNU $ac_path_EGREP case `"$ac_path_EGREP" --version 2>&1` in *GNU*) ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'EGREP' >> "conftest.nl" "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_EGREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_EGREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_EGREP"; then as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_EGREP=$EGREP fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 $as_echo "$ac_cv_path_EGREP" >&6; } EGREP="$ac_cv_path_EGREP" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 $as_echo_n "checking for a sed that does not truncate output... " >&6; } if ${ac_cv_path_SED+:} false; then : $as_echo_n "(cached) " >&6 else ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ for ac_i in 1 2 3 4 5 6 7; do ac_script="$ac_script$as_nl$ac_script" done echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed { ac_script=; unset ac_script;} if test -z "$SED"; then ac_path_SED_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in sed gsed; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_SED="$as_dir/$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_SED" || continue # Check for GNU ac_path_SED and select it if it is found. # Check for GNU $ac_path_SED case `"$ac_path_SED" --version 2>&1` in *GNU*) ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo '' >> "conftest.nl" "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_SED_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_SED="$ac_path_SED" ac_path_SED_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_SED_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_SED"; then as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5 fi else ac_cv_path_SED=$SED fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 $as_echo "$ac_cv_path_SED" >&6; } SED="$ac_cv_path_SED" rm -f conftest.sed # Check whether --with-gmp was given. if test "${with_gmp+set}" = set; then : withval=$with_gmp; with_gmp_include=$withval/include with_gmp_lib=$withval/lib fi # Check whether --with-gmp_include was given. if test "${with_gmp_include+set}" = set; then : withval=$with_gmp_include; with_gmp_include=$withval fi # Check whether --with-gmp_lib was given. if test "${with_gmp_lib+set}" = set; then : withval=$with_gmp_lib; with_gmp_lib=$withval fi # Check whether --with-gwnum was given. if test "${with_gwnum+set}" = set; then : withval=$with_gwnum; with_gwnum=$withval fi if test x"$with_gmp_include" != "x" then if ! test -d "$with_gmp_include" then as_fn_error $? "Specified GMP include directory \"$with_gmp_include\" does not exist" "$LINENO" 5 fi CPPFLAGS="-I$with_gmp_include" fi if test x"$with_gmp_lib" != "x" then if ! test -d "$with_gmp_lib" then as_fn_error $? "Specified GMP library directory \"$with_gmp_lib\" does not exist" "$LINENO" 5 fi fi ############################ # Parse --enable-* options # ############################ # Check whether --enable-assert was given. if test "${enable_assert+set}" = set; then : enableval=$enable_assert; else enable_assert=no fi if test "x$enable_assert" = xyes; then $as_echo "#define WANT_ASSERT 1" >>confdefs.h echo 'define(, <1>)' >>$gmp_tmpconfigm4 fi # Check whether --enable-shellcmd was given. if test "${enable_shellcmd+set}" = set; then : enableval=$enable_shellcmd; fi if test "x$enable_shellcmd" = xyes; then $as_echo "#define WANT_SHELLCMD 1" >>confdefs.h fi # Check whether --enable-gmp-cflags was given. if test "${enable_gmp_cflags+set}" = set; then : enableval=$enable_gmp_cflags; else enable_gmp_cflags=yes fi # Check whether --enable-openmp was given. if test "${enable_openmp+set}" = set; then : enableval=$enable_openmp; fi # Check whether --enable-sse2 was given. if test "${enable_sse2+set}" = set; then : enableval=$enable_sse2; fi # Check whether --enable-asm-redc was given. if test "${enable_asm_redc+set}" = set; then : enableval=$enable_asm_redc; fi # Check whether --enable-memory-debug was given. if test "${enable_memory_debug+set}" = set; then : enableval=$enable_memory_debug; fi if test "x$enable_memory_debug" = xyes; then $as_echo "#define MEMORY_DEBUG 1" >>confdefs.h fi if test "x$enable_memory_debug" = xyes; then MEMORY_DEBUG_TRUE= MEMORY_DEBUG_FALSE='#' else MEMORY_DEBUG_TRUE='#' MEMORY_DEBUG_FALSE= fi # Check whether --enable-mulredc-svoboda was given. if test "${enable_mulredc_svoboda+set}" = set; then : enableval=$enable_mulredc_svoboda; fi if test "x$enable_mulredc_svoboda" = xyes; then $as_echo "#define MULREDC_SVOBODA 1" >>confdefs.h echo 'define(, <1>)' >>$gmp_tmpconfigm4 fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test "x$user_redefine_cc" = x && test "x$enable_gmp_cflags" = xyes && test "x$cross_compiling" != xyes; then if test "x$GMP_CC$GMP_CFLAGS" = x; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for CC and CFLAGS in gmp.h" >&5 $as_echo_n "checking for CC and CFLAGS in gmp.h... " >&6; } GMP_CC=__GMP_CC GMP_CFLAGS=__GMP_CFLAGS for cpp in /lib/cpp gcc cc c99 do test $cpp = /lib/cpp || cpp="$cpp -E" echo "Trying to run $cpp" >&5 cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ foo _ACEOF if $cpp $CPPFLAGS conftest.$ac_ext > /dev/null 2> /dev/null ; then # Get CC and CFLAGS cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include "gmp.h" MPFR_OPTION_CC __GMP_CC MPFR_OPTION_CFLAGS __GMP_CFLAGS _ACEOF echo "Trying to parse gmp.h with: $cpp $CPPFLAGS conftest.$ac_ext" >&5 if $cpp $CPPFLAGS conftest.$ac_ext 2> /dev/null > conftest.out; then GMP_CC="`$EGREP MPFR_OPTION_CC conftest.out | $SED -e 's/MPFR_OPTION_CC //g' | $SED -e 's/"//g'`" GMP_CFLAGS="`$EGREP MPFR_OPTION_CFLAGS conftest.out | $SED -e 's/MPFR_OPTION_CFLAGS //g'| $SED -e 's/"//g'`" echo "Success, GMP_CC=$GMP_CC, GMP_CFLAGS=$GMP_CFLAGS" >&5 break else echo "Could not parse gmp.h with $cpp" >&5 fi else echo "Could not run $cpp" >&5 fi done rm -f conftest* if test "x$GMP_CC" = "x__GMP_CC" || test "x$GMP_CFLAGS" = "x__GMP_CFLAGS" ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } GMP_CFLAGS= GMP_CC= else { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes CC=$GMP_CC CFLAGS=$GMP_CFLAGS" >&5 $as_echo "yes CC=$GMP_CC CFLAGS=$GMP_CFLAGS" >&6; } fi fi if test "x$GMP_CC$GMP_CFLAGS" != x; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether CC=$GMP_CC and CFLAGS=$GMP_CFLAGS works" >&5 $as_echo_n "checking whether CC=$GMP_CC and CFLAGS=$GMP_CFLAGS works... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main (void) { return 0; } _ACEOF if $GMP_CC $GMP_CFLAGS -o conftest conftest.$ac_ext 2> /dev/null ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } CFLAGS=$GMP_CFLAGS CC=$GMP_CC else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no, reverting to default" >&5 $as_echo "no, reverting to default" >&6; } fi rm -f conftest* fi fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}gcc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="gcc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 $as_echo "$ac_ct_CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi else CC="$ac_cv_prog_CC" fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}cc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else ac_prog_rejected=no as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. set dummy $ac_cv_prog_CC shift if test $# != 0; then # We chose a different compiler from the bogus one. # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" fi fi fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then for ac_prog in cl.exe do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$CC" && break done fi if test -z "$CC"; then ac_ct_CC=$CC for ac_prog in cl.exe do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 $as_echo "$ac_ct_CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$ac_ct_CC" && break done if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi fi fi test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "no acceptable C compiler found in \$PATH See \`config.log' for more details" "$LINENO" 5; } # Provide some information about the compiler. $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 set X $ac_compile ac_compiler=$2 for ac_option in --version -v -V -qversion; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then sed '10a\ ... rest of stderr output deleted ... 10q' conftest.err >conftest.er1 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 $as_echo_n "checking whether the C compiler works... " >&6; } ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` # The possible output files: ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" ac_rmfiles= for ac_file in $ac_files do case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; * ) ac_rmfiles="$ac_rmfiles $ac_file";; esac done rm -f $ac_rmfiles if { { ac_try="$ac_link_default" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link_default") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. # So ignore a value of `no', otherwise this would lead to `EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, # so that the user can short-circuit this test for compilers unknown to # Autoconf. for ac_file in $ac_files '' do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; [ab].out ) # We found the default executable, but exeext='' is most # certainly right. break;; *.* ) if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; then :; else ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi # We set ac_cv_exeext here because the later test for it is not # safe: cross compilers may not add the suffix if given an `-o' # argument, so we may need to know it at that point already. # Even if this section looks crufty: it has the advantage of # actually working. break;; * ) break;; esac done test "$ac_cv_exeext" = no && ac_cv_exeext= else ac_file='' fi if test -z "$ac_file"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error 77 "C compiler cannot create executables See \`config.log' for more details" "$LINENO" 5; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 $as_echo_n "checking for C compiler default output file name... " >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 $as_echo "$ac_file" >&6; } ac_exeext=$ac_cv_exeext rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 $as_echo_n "checking for suffix of executables... " >&6; } if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with # `rm'. for ac_file in conftest.exe conftest conftest.*; do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` break;; * ) break;; esac done else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of executables: cannot compile and link See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest conftest$ac_cv_exeext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 $as_echo "$ac_cv_exeext" >&6; } rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext ac_exeext=$EXEEXT cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main () { FILE *f = fopen ("conftest.out", "w"); return ferror (f) || fclose (f) != 0; ; return 0; } _ACEOF ac_clean_files="$ac_clean_files conftest.out" # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 $as_echo_n "checking whether we are cross compiling... " >&6; } if test "$cross_compiling" != yes; then { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if { ac_try='./conftest$ac_cv_exeext' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details" "$LINENO" 5; } fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 $as_echo "$cross_compiling" >&6; } rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out ac_clean_files=$ac_clean_files_save { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 $as_echo_n "checking for suffix of object files... " >&6; } if ${ac_cv_objext+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.o conftest.obj if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : for ac_file in conftest.o conftest.obj conftest.*; do test -f "$ac_file" || continue; case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` break;; esac done else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of object files: cannot compile See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 $as_echo "$ac_cv_objext" >&6; } OBJEXT=$ac_cv_objext ac_objext=$OBJEXT { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 $as_echo_n "checking whether we are using the GNU C compiler... " >&6; } if ${ac_cv_c_compiler_gnu+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_compiler_gnu=yes else ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 $as_echo "$ac_cv_c_compiler_gnu" >&6; } if test $ac_compiler_gnu = yes; then GCC=yes else GCC= fi ac_test_CFLAGS=${CFLAGS+set} ac_save_CFLAGS=$CFLAGS { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 $as_echo_n "checking whether $CC accepts -g... " >&6; } if ${ac_cv_prog_cc_g+:} false; then : $as_echo_n "(cached) " >&6 else ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_g=yes else CFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : else ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_g=yes fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_c_werror_flag=$ac_save_c_werror_flag fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 $as_echo "$ac_cv_prog_cc_g" >&6; } if test "$ac_test_CFLAGS" = set; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 $as_echo_n "checking for $CC option to accept ISO C89... " >&6; } if ${ac_cv_prog_cc_c89+:} false; then : $as_echo_n "(cached) " >&6 else ac_cv_prog_cc_c89=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include struct stat; /* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ struct buf { int x; }; FILE * (*rcsopen) (struct buf *, struct stat *, int); static char *e (p, i) char **p; int i; { return p[i]; } static char *f (char * (*g) (char **, int), char **p, ...) { char *s; va_list v; va_start (v,p); s = g (p, va_arg (v,int)); va_end (v); return s; } /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not '\xHH' hex character constants. These don't provoke an error unfortunately, instead are silently treated as 'x'. The following induces an error, until -std is added to get proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an array size at least. It's necessary to write '\x00'==0 to get something that's true only with -std. */ int osf4_cc_array ['\x00' == 0 ? 1 : -1]; /* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters inside strings and character constants. */ #define FOO(x) 'x' int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; int test (int i, double x); struct s1 {int (*f) (int a);}; struct s2 {int (*f) (double a);}; int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); int argc; char **argv; int main () { return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; ; return 0; } _ACEOF for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_c89=$ac_arg fi rm -f core conftest.err conftest.$ac_objext test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi # AC_CACHE_VAL case "x$ac_cv_prog_cc_c89" in x) { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 $as_echo "none needed" >&6; } ;; xno) { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 $as_echo "unsupported" >&6; } ;; *) CC="$CC $ac_cv_prog_cc_c89" { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 $as_echo "$ac_cv_prog_cc_c89" >&6; } ;; esac if test "x$ac_cv_prog_cc_c89" != xno; then : fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu DEPDIR="${am__leading_dot}deps" ac_config_commands="$ac_config_commands depfiles" am_make=${MAKE-make} cat > confinc << 'END' am__doit: @echo this is the am__doit target .PHONY: am__doit END # If we don't find an include directive, just comment out the code. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for style of include used by $am_make" >&5 $as_echo_n "checking for style of include used by $am_make... " >&6; } am__include="#" am__quote= _am_result=none # First try GNU make style include. echo "include confinc" > confmf # Ignore all kinds of additional output from `make'. case `$am_make -s -f confmf 2> /dev/null` in #( *the\ am__doit\ target*) am__include=include am__quote= _am_result=GNU ;; esac # Now try BSD make style include. if test "$am__include" = "#"; then echo '.include "confinc"' > confmf case `$am_make -s -f confmf 2> /dev/null` in #( *the\ am__doit\ target*) am__include=.include am__quote="\"" _am_result=BSD ;; esac fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $_am_result" >&5 $as_echo "$_am_result" >&6; } rm -f confinc confmf # Check whether --enable-dependency-tracking was given. if test "${enable_dependency_tracking+set}" = set; then : enableval=$enable_dependency_tracking; fi if test "x$enable_dependency_tracking" != xno; then am_depcomp="$ac_aux_dir/depcomp" AMDEPBACKSLASH='\' am__nodep='_no' fi if test "x$enable_dependency_tracking" != xno; then AMDEP_TRUE= AMDEP_FALSE='#' else AMDEP_TRUE='#' AMDEP_FALSE= fi depcc="$CC" am_compiler_list= { $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 $as_echo_n "checking dependency style of $depcc... " >&6; } if ${am_cv_CC_dependencies_compiler_type+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named `D' -- because `-MD' means `put the output # in D'. rm -rf conftest.dir mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_CC_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` fi am__universal=false case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with # Solaris 8's {/usr,}/bin/sh. touch sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf # We check with `-c' and `-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle `-M -o', and we need to detect this. Also, some Intel # versions had trouble with output in subdirs am__obj=sub/conftest.${OBJEXT-o} am__minus_obj="-o $am__obj" case $depmode in gcc) # This depmode causes a compiler race in universal mode. test "$am__universal" = false || continue ;; nosideeffect) # after this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; msvc7 | msvc7msys | msvisualcpp | msvcmsys) # This compiler won't grok `-c -o', but also, the minuso test has # not run yet. These depmodes are late enough in the game, and # so weak that their functioning should not be impacted. am__obj=conftest.${OBJEXT-o} am__minus_obj= ;; none) break ;; esac if depmode=$depmode \ source=sub/conftest.c object=$am__obj \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep $am__obj sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_CC_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_CC_dependencies_compiler_type=none fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 $as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type if test "x$enable_dependency_tracking" != xno \ && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then am__fastdepCC_TRUE= am__fastdepCC_FALSE='#' else am__fastdepCC_TRUE='#' am__fastdepCC_FALSE= fi depcc="$CC" am_compiler_list= { $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 $as_echo_n "checking dependency style of $depcc... " >&6; } if ${am_cv_CC_dependencies_compiler_type+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named `D' -- because `-MD' means `put the output # in D'. rm -rf conftest.dir mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_CC_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` fi am__universal=false case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with # Solaris 8's {/usr,}/bin/sh. touch sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf # We check with `-c' and `-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle `-M -o', and we need to detect this. Also, some Intel # versions had trouble with output in subdirs am__obj=sub/conftest.${OBJEXT-o} am__minus_obj="-o $am__obj" case $depmode in gcc) # This depmode causes a compiler race in universal mode. test "$am__universal" = false || continue ;; nosideeffect) # after this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; msvc7 | msvc7msys | msvisualcpp | msvcmsys) # This compiler won't grok `-c -o', but also, the minuso test has # not run yet. These depmodes are late enough in the game, and # so weak that their functioning should not be impacted. am__obj=conftest.${OBJEXT-o} am__minus_obj= ;; none) break ;; esac if depmode=$depmode \ source=sub/conftest.c object=$am__obj \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep $am__obj sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_CC_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_CC_dependencies_compiler_type=none fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CC_dependencies_compiler_type" >&5 $as_echo "$am_cv_CC_dependencies_compiler_type" >&6; } CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type if test "x$enable_dependency_tracking" != xno \ && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then am__fastdepCC_TRUE= am__fastdepCC_FALSE='#' else am__fastdepCC_TRUE='#' am__fastdepCC_FALSE= fi # By default we simply use the C compiler to build assembly code. test "${CCAS+set}" = set || CCAS=$CC test "${CCASFLAGS+set}" = set || CCASFLAGS=$CFLAGS depcc="$CCAS" am_compiler_list= { $as_echo "$as_me:${as_lineno-$LINENO}: checking dependency style of $depcc" >&5 $as_echo_n "checking dependency style of $depcc... " >&6; } if ${am_cv_CCAS_dependencies_compiler_type+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named `D' -- because `-MD' means `put the output # in D'. rm -rf conftest.dir mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_CCAS_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` fi am__universal=false for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with # Solaris 8's {/usr,}/bin/sh. touch sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf # We check with `-c' and `-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle `-M -o', and we need to detect this. Also, some Intel # versions had trouble with output in subdirs am__obj=sub/conftest.${OBJEXT-o} am__minus_obj="-o $am__obj" case $depmode in gcc) # This depmode causes a compiler race in universal mode. test "$am__universal" = false || continue ;; nosideeffect) # after this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; msvc7 | msvc7msys | msvisualcpp | msvcmsys) # This compiler won't grok `-c -o', but also, the minuso test has # not run yet. These depmodes are late enough in the game, and # so weak that their functioning should not be impacted. am__obj=conftest.${OBJEXT-o} am__minus_obj= ;; none) break ;; esac if depmode=$depmode \ source=sub/conftest.c object=$am__obj \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep $am__obj sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_CCAS_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_CCAS_dependencies_compiler_type=none fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $am_cv_CCAS_dependencies_compiler_type" >&5 $as_echo "$am_cv_CCAS_dependencies_compiler_type" >&6; } CCASDEPMODE=depmode=$am_cv_CCAS_dependencies_compiler_type if test "x$enable_dependency_tracking" != xno \ && test "$am_cv_CCAS_dependencies_compiler_type" = gcc3; then am__fastdepCCAS_TRUE= am__fastdepCCAS_FALSE='#' else am__fastdepCCAS_TRUE='#' am__fastdepCCAS_FALSE= fi if test "x$CC" != xcc; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC and cc understand -c and -o together" >&5 $as_echo_n "checking whether $CC and cc understand -c and -o together... " >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether cc understands -c and -o together" >&5 $as_echo_n "checking whether cc understands -c and -o together... " >&6; } fi set dummy $CC; ac_cc=`$as_echo "$2" | sed 's/[^a-zA-Z0-9_]/_/g;s/^[0-9]/_/'` if eval \${ac_cv_prog_cc_${ac_cc}_c_o+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF # Make sure it works both with $CC and with simple cc. # We do the test twice because some compilers refuse to overwrite an # existing .o file with -o, though they will create one. ac_try='$CC -c conftest.$ac_ext -o conftest2.$ac_objext >&5' rm -f conftest2.* if { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && test -f conftest2.$ac_objext && { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then eval ac_cv_prog_cc_${ac_cc}_c_o=yes if test "x$CC" != xcc; then # Test first that cc exists at all. if { ac_try='cc -c conftest.$ac_ext >&5' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then ac_try='cc -c conftest.$ac_ext -o conftest2.$ac_objext >&5' rm -f conftest2.* if { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && test -f conftest2.$ac_objext && { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then # cc works too. : else # cc exists but doesn't like -o. eval ac_cv_prog_cc_${ac_cc}_c_o=no fi fi fi else eval ac_cv_prog_cc_${ac_cc}_c_o=no fi rm -f core conftest* fi if eval test \$ac_cv_prog_cc_${ac_cc}_c_o = yes; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } $as_echo "#define NO_MINUS_C_MINUS_O 1" >>confdefs.h fi # FIXME: we rely on the cache variable name because # there is no other way. set dummy $CC am_cc=`echo $2 | sed 's/[^a-zA-Z0-9_]/_/g;s/^[0-9]/_/'` eval am_t=\$ac_cv_prog_cc_${am_cc}_c_o if test "$am_t" != yes; then # Losing compiler, so override with the script. # FIXME: It is wrong to rewrite CC. # But if we don't then we get into trouble of one sort or another. # A longer-term fix would be to have automake use am__CC in this case, # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" CC="$am_aux_dir/compile $CC" fi # Now that we have decided on CC and CFLAGS, init libtool # Don't make a shared library by default. Enable building a shared library # by specifying "--enable-shared" on the ./configure command line case `pwd` in *\ * | *\ *) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5 $as_echo "$as_me: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&2;} ;; esac macro_version='2.4.2' macro_revision='1.3337' ltmain="$ac_aux_dir/ltmain.sh" # Backslashify metacharacters that are still active within # double-quoted strings. sed_quote_subst='s/\(["`$\\]\)/\\\1/g' # Same as above, but do not quote variable references. double_quote_subst='s/\(["`\\]\)/\\\1/g' # Sed substitution to delay expansion of an escaped shell variable in a # double_quote_subst'ed string. delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' # Sed substitution to delay expansion of an escaped single quote. delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' # Sed substitution to avoid accidental globbing in evaled expressions no_glob_subst='s/\*/\\\*/g' ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to print strings" >&5 $as_echo_n "checking how to print strings... " >&6; } # Test print first, because it will be a builtin if present. if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then ECHO='print -r --' elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then ECHO='printf %s\n' else # Use this function as a fallback that always works. func_fallback_echo () { eval 'cat <<_LTECHO_EOF $1 _LTECHO_EOF' } ECHO='func_fallback_echo' fi # func_echo_all arg... # Invoke $ECHO with all args, space-separated. func_echo_all () { $ECHO "" } case "$ECHO" in printf*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: printf" >&5 $as_echo "printf" >&6; } ;; print*) { $as_echo "$as_me:${as_lineno-$LINENO}: result: print -r" >&5 $as_echo "print -r" >&6; } ;; *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: cat" >&5 $as_echo "cat" >&6; } ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5 $as_echo_n "checking for a sed that does not truncate output... " >&6; } if ${ac_cv_path_SED+:} false; then : $as_echo_n "(cached) " >&6 else ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/ for ac_i in 1 2 3 4 5 6 7; do ac_script="$ac_script$as_nl$ac_script" done echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed { ac_script=; unset ac_script;} if test -z "$SED"; then ac_path_SED_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in sed gsed; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_SED="$as_dir/$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_SED" || continue # Check for GNU ac_path_SED and select it if it is found. # Check for GNU $ac_path_SED case `"$ac_path_SED" --version 2>&1` in *GNU*) ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo '' >> "conftest.nl" "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_SED_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_SED="$ac_path_SED" ac_path_SED_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_SED_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_SED"; then as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5 fi else ac_cv_path_SED=$SED fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5 $as_echo "$ac_cv_path_SED" >&6; } SED="$ac_cv_path_SED" rm -f conftest.sed test -z "$SED" && SED=sed Xsed="$SED -e 1s/^X//" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5 $as_echo_n "checking for fgrep... " >&6; } if ${ac_cv_path_FGREP+:} false; then : $as_echo_n "(cached) " >&6 else if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1 then ac_cv_path_FGREP="$GREP -F" else if test -z "$FGREP"; then ac_path_FGREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in fgrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_FGREP="$as_dir/$ac_prog$ac_exec_ext" as_fn_executable_p "$ac_path_FGREP" || continue # Check for GNU ac_path_FGREP and select it if it is found. # Check for GNU $ac_path_FGREP case `"$ac_path_FGREP" --version 2>&1` in *GNU*) ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;; *) ac_count=0 $as_echo_n 0123456789 >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" $as_echo 'FGREP' >> "conftest.nl" "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break as_fn_arith $ac_count + 1 && ac_count=$as_val if test $ac_count -gt ${ac_path_FGREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_FGREP_found && break 3 done done done IFS=$as_save_IFS if test -z "$ac_cv_path_FGREP"; then as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 fi else ac_cv_path_FGREP=$FGREP fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5 $as_echo "$ac_cv_path_FGREP" >&6; } FGREP="$ac_cv_path_FGREP" test -z "$GREP" && GREP=grep # Check whether --with-gnu-ld was given. if test "${with_gnu_ld+set}" = set; then : withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes else with_gnu_ld=no fi ac_prog=ld if test "$GCC" = yes; then # Check if gcc -print-prog-name=ld gives a path. { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" >&5 $as_echo_n "checking for ld used by $CC... " >&6; } case $host in *-*-mingw*) # gcc leaves a trailing carriage return which upsets mingw ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; *) ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; esac case $ac_prog in # Accept absolute paths. [\\/]* | ?:[\\/]*) re_direlt='/[^/][^/]*/\.\./' # Canonicalize the pathname of ld ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` done test -z "$LD" && LD="$ac_prog" ;; "") # If it fails, then pretend we aren't using GCC. ac_prog=ld ;; *) # If it is relative, then search for the first ld in PATH. with_gnu_ld=unknown ;; esac elif test "$with_gnu_ld" = yes; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 $as_echo_n "checking for GNU ld... " >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5 $as_echo_n "checking for non-GNU ld... " >&6; } fi if ${lt_cv_path_LD+:} false; then : $as_echo_n "(cached) " >&6 else if test -z "$LD"; then lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then lt_cv_path_LD="$ac_dir/$ac_prog" # Check to see if the program is GNU ld. I'd rather use --version, # but apparently some variants of GNU ld only accept -v. # Break only if it was the GNU/non-GNU ld that we prefer. case `"$lt_cv_path_LD" -v 2>&1 &5 $as_echo "$LD" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5 { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5 $as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; } if ${lt_cv_prog_gnu_ld+:} false; then : $as_echo_n "(cached) " >&6 else # I'd rather use --version here, but apparently some GNU lds only accept -v. case `$LD -v 2>&1 &5 $as_echo "$lt_cv_prog_gnu_ld" >&6; } with_gnu_ld=$lt_cv_prog_gnu_ld { $as_echo "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5 $as_echo_n "checking for BSD- or MS-compatible name lister (nm)... " >&6; } if ${lt_cv_path_NM+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$NM"; then # Let the user override the test. lt_cv_path_NM="$NM" else lt_nm_to_check="${ac_tool_prefix}nm" if test -n "$ac_tool_prefix" && test "$build" = "$host"; then lt_nm_to_check="$lt_nm_to_check nm" fi for lt_tmp_nm in $lt_nm_to_check; do lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. tmp_nm="$ac_dir/$lt_tmp_nm" if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then # Check to see if the nm accepts a BSD-compat flag. # Adding the `sed 1q' prevents false positives on HP-UX, which says: # nm: unknown option "B" ignored # Tru64's nm complains that /dev/null is an invalid object file case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in */dev/null* | *'Invalid file or object type'*) lt_cv_path_NM="$tmp_nm -B" break ;; *) case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in */dev/null*) lt_cv_path_NM="$tmp_nm -p" break ;; *) lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but continue # so that we can try to find one that supports BSD flags ;; esac ;; esac fi done IFS="$lt_save_ifs" done : ${lt_cv_path_NM=no} fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5 $as_echo "$lt_cv_path_NM" >&6; } if test "$lt_cv_path_NM" != "no"; then NM="$lt_cv_path_NM" else # Didn't find any BSD compatible name lister, look for dumpbin. if test -n "$DUMPBIN"; then : # Let the user override the test. else if test -n "$ac_tool_prefix"; then for ac_prog in dumpbin "link -dump" do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_DUMPBIN+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$DUMPBIN"; then ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi DUMPBIN=$ac_cv_prog_DUMPBIN if test -n "$DUMPBIN"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DUMPBIN" >&5 $as_echo "$DUMPBIN" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$DUMPBIN" && break done fi if test -z "$DUMPBIN"; then ac_ct_DUMPBIN=$DUMPBIN for ac_prog in dumpbin "link -dump" do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_DUMPBIN+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_DUMPBIN"; then ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_DUMPBIN="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN if test -n "$ac_ct_DUMPBIN"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DUMPBIN" >&5 $as_echo "$ac_ct_DUMPBIN" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$ac_ct_DUMPBIN" && break done if test "x$ac_ct_DUMPBIN" = x; then DUMPBIN=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac DUMPBIN=$ac_ct_DUMPBIN fi fi case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in *COFF*) DUMPBIN="$DUMPBIN -symbols" ;; *) DUMPBIN=: ;; esac fi if test "$DUMPBIN" != ":"; then NM="$DUMPBIN" fi fi test -z "$NM" && NM=nm { $as_echo "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5 $as_echo_n "checking the name lister ($NM) interface... " >&6; } if ${lt_cv_nm_interface+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_nm_interface="BSD nm" echo "int some_variable = 0;" > conftest.$ac_ext (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5) (eval "$ac_compile" 2>conftest.err) cat conftest.err >&5 (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&5) (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) cat conftest.err >&5 (eval echo "\"\$as_me:$LINENO: output\"" >&5) cat conftest.out >&5 if $GREP 'External.*some_variable' conftest.out > /dev/null; then lt_cv_nm_interface="MS dumpbin" fi rm -f conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5 $as_echo "$lt_cv_nm_interface" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5 $as_echo_n "checking whether ln -s works... " >&6; } LN_S=$as_ln_s if test "$LN_S" = "ln -s"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5 $as_echo "no, using $LN_S" >&6; } fi # find the maximum length of command line arguments { $as_echo "$as_me:${as_lineno-$LINENO}: checking the maximum length of command line arguments" >&5 $as_echo_n "checking the maximum length of command line arguments... " >&6; } if ${lt_cv_sys_max_cmd_len+:} false; then : $as_echo_n "(cached) " >&6 else i=0 teststring="ABCD" case $build_os in msdosdjgpp*) # On DJGPP, this test can blow up pretty badly due to problems in libc # (any single argument exceeding 2000 bytes causes a buffer overrun # during glob expansion). Even if it were fixed, the result of this # check would be larger than it should be. lt_cv_sys_max_cmd_len=12288; # 12K is about right ;; gnu*) # Under GNU Hurd, this test is not required because there is # no limit to the length of command line arguments. # Libtool will interpret -1 as no limit whatsoever lt_cv_sys_max_cmd_len=-1; ;; cygwin* | mingw* | cegcc*) # On Win9x/ME, this test blows up -- it succeeds, but takes # about 5 minutes as the teststring grows exponentially. # Worse, since 9x/ME are not pre-emptively multitasking, # you end up with a "frozen" computer, even though with patience # the test eventually succeeds (with a max line length of 256k). # Instead, let's just punt: use the minimum linelength reported by # all of the supported platforms: 8192 (on NT/2K/XP). lt_cv_sys_max_cmd_len=8192; ;; mint*) # On MiNT this can take a long time and run out of memory. lt_cv_sys_max_cmd_len=8192; ;; amigaos*) # On AmigaOS with pdksh, this test takes hours, literally. # So we just punt and use a minimum line length of 8192. lt_cv_sys_max_cmd_len=8192; ;; netbsd* | freebsd* | openbsd* | darwin* | dragonfly*) # This has been around since 386BSD, at least. Likely further. if test -x /sbin/sysctl; then lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` elif test -x /usr/sbin/sysctl; then lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` else lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs fi # And add a safety zone lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` ;; interix*) # We know the value 262144 and hardcode it with a safety zone (like BSD) lt_cv_sys_max_cmd_len=196608 ;; os2*) # The test takes a long time on OS/2. lt_cv_sys_max_cmd_len=8192 ;; osf*) # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not # nice to cause kernel panics so lets avoid the loop below. # First set a reasonable default. lt_cv_sys_max_cmd_len=16384 # if test -x /sbin/sysconfig; then case `/sbin/sysconfig -q proc exec_disable_arg_limit` in *1*) lt_cv_sys_max_cmd_len=-1 ;; esac fi ;; sco3.2v5*) lt_cv_sys_max_cmd_len=102400 ;; sysv5* | sco5v6* | sysv4.2uw2*) kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` if test -n "$kargmax"; then lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[ ]//'` else lt_cv_sys_max_cmd_len=32768 fi ;; *) lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` if test -n "$lt_cv_sys_max_cmd_len"; then lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` else # Make teststring a little bigger before we do anything with it. # a 1K string should be a reasonable start. for i in 1 2 3 4 5 6 7 8 ; do teststring=$teststring$teststring done SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} # If test is not a shell built-in, we'll probably end up computing a # maximum length that is only half of the actual maximum length, but # we can't tell. while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \ = "X$teststring$teststring"; } >/dev/null 2>&1 && test $i != 17 # 1/2 MB should be enough do i=`expr $i + 1` teststring=$teststring$teststring done # Only check the string length outside the loop. lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` teststring= # Add a significant safety factor because C++ compilers can tack on # massive amounts of additional arguments before passing them to the # linker. It appears as though 1/2 is a usable value. lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` fi ;; esac fi if test -n $lt_cv_sys_max_cmd_len ; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sys_max_cmd_len" >&5 $as_echo "$lt_cv_sys_max_cmd_len" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: none" >&5 $as_echo "none" >&6; } fi max_cmd_len=$lt_cv_sys_max_cmd_len : ${CP="cp -f"} : ${MV="mv -f"} : ${RM="rm -f"} { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands some XSI constructs" >&5 $as_echo_n "checking whether the shell understands some XSI constructs... " >&6; } # Try some XSI features xsi_shell=no ( _lt_dummy="a/b/c" test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \ = c,a/b,b/c, \ && eval 'test $(( 1 + 1 )) -eq 2 \ && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \ && xsi_shell=yes { $as_echo "$as_me:${as_lineno-$LINENO}: result: $xsi_shell" >&5 $as_echo "$xsi_shell" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the shell understands \"+=\"" >&5 $as_echo_n "checking whether the shell understands \"+=\"... " >&6; } lt_shell_append=no ( foo=bar; set foo baz; eval "$1+=\$2" && test "$foo" = barbaz ) \ >/dev/null 2>&1 \ && lt_shell_append=yes { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_shell_append" >&5 $as_echo "$lt_shell_append" >&6; } if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then lt_unset=unset else lt_unset=false fi # test EBCDIC or ASCII case `echo X|tr X '\101'` in A) # ASCII based system # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr lt_SP2NL='tr \040 \012' lt_NL2SP='tr \015\012 \040\040' ;; *) # EBCDIC based system lt_SP2NL='tr \100 \n' lt_NL2SP='tr \r\n \100\100' ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to $host format" >&5 $as_echo_n "checking how to convert $build file names to $host format... " >&6; } if ${lt_cv_to_host_file_cmd+:} false; then : $as_echo_n "(cached) " >&6 else case $host in *-*-mingw* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 ;; *-*-cygwin* ) lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 ;; * ) # otherwise, assume *nix lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 ;; esac ;; *-*-cygwin* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin ;; *-*-cygwin* ) lt_cv_to_host_file_cmd=func_convert_file_noop ;; * ) # otherwise, assume *nix lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin ;; esac ;; * ) # unhandled hosts (and "normal" native builds) lt_cv_to_host_file_cmd=func_convert_file_noop ;; esac fi to_host_file_cmd=$lt_cv_to_host_file_cmd { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_host_file_cmd" >&5 $as_echo "$lt_cv_to_host_file_cmd" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to toolchain format" >&5 $as_echo_n "checking how to convert $build file names to toolchain format... " >&6; } if ${lt_cv_to_tool_file_cmd+:} false; then : $as_echo_n "(cached) " >&6 else #assume ordinary cross tools, or native build. lt_cv_to_tool_file_cmd=func_convert_file_noop case $host in *-*-mingw* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 ;; esac ;; esac fi to_tool_file_cmd=$lt_cv_to_tool_file_cmd { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_tool_file_cmd" >&5 $as_echo "$lt_cv_to_tool_file_cmd" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $LD option to reload object files" >&5 $as_echo_n "checking for $LD option to reload object files... " >&6; } if ${lt_cv_ld_reload_flag+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_ld_reload_flag='-r' fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_reload_flag" >&5 $as_echo "$lt_cv_ld_reload_flag" >&6; } reload_flag=$lt_cv_ld_reload_flag case $reload_flag in "" | " "*) ;; *) reload_flag=" $reload_flag" ;; esac reload_cmds='$LD$reload_flag -o $output$reload_objs' case $host_os in cygwin* | mingw* | pw32* | cegcc*) if test "$GCC" != yes; then reload_cmds=false fi ;; darwin*) if test "$GCC" = yes; then reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r -o $output$reload_objs' else reload_cmds='$LD$reload_flag -o $output$reload_objs' fi ;; esac if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}objdump", so it can be a program name with args. set dummy ${ac_tool_prefix}objdump; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_OBJDUMP+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$OBJDUMP"; then ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi OBJDUMP=$ac_cv_prog_OBJDUMP if test -n "$OBJDUMP"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5 $as_echo "$OBJDUMP" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_OBJDUMP"; then ac_ct_OBJDUMP=$OBJDUMP # Extract the first word of "objdump", so it can be a program name with args. set dummy objdump; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_OBJDUMP+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_OBJDUMP"; then ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_OBJDUMP="objdump" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP if test -n "$ac_ct_OBJDUMP"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5 $as_echo "$ac_ct_OBJDUMP" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_OBJDUMP" = x; then OBJDUMP="false" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac OBJDUMP=$ac_ct_OBJDUMP fi else OBJDUMP="$ac_cv_prog_OBJDUMP" fi test -z "$OBJDUMP" && OBJDUMP=objdump { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to recognize dependent libraries" >&5 $as_echo_n "checking how to recognize dependent libraries... " >&6; } if ${lt_cv_deplibs_check_method+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_file_magic_cmd='$MAGIC_CMD' lt_cv_file_magic_test_file= lt_cv_deplibs_check_method='unknown' # Need to set the preceding variable on all platforms that support # interlibrary dependencies. # 'none' -- dependencies not supported. # `unknown' -- same as none, but documents that we really don't know. # 'pass_all' -- all dependencies passed with no checks. # 'test_compile' -- check by making test program. # 'file_magic [[regex]]' -- check by looking for files in library path # which responds to the $file_magic_cmd with a given extended regex. # If you have `file' or equivalent on your system and you're not sure # whether `pass_all' will *always* work, you probably want this one. case $host_os in aix[4-9]*) lt_cv_deplibs_check_method=pass_all ;; beos*) lt_cv_deplibs_check_method=pass_all ;; bsdi[45]*) lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' lt_cv_file_magic_cmd='/usr/bin/file -L' lt_cv_file_magic_test_file=/shlib/libc.so ;; cygwin*) # func_win32_libid is a shell function defined in ltmain.sh lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' lt_cv_file_magic_cmd='func_win32_libid' ;; mingw* | pw32*) # Base MSYS/MinGW do not provide the 'file' command needed by # func_win32_libid shell function, so use a weaker test based on 'objdump', # unless we find 'file', for example because we are cross-compiling. # func_win32_libid assumes BSD nm, so disallow it if using MS dumpbin. if ( test "$lt_cv_nm_interface" = "BSD nm" && file / ) >/dev/null 2>&1; then lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' lt_cv_file_magic_cmd='func_win32_libid' else # Keep this pattern in sync with the one in func_win32_libid. lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' lt_cv_file_magic_cmd='$OBJDUMP -f' fi ;; cegcc*) # use the weaker test based on 'objdump'. See mingw*. lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' lt_cv_file_magic_cmd='$OBJDUMP -f' ;; darwin* | rhapsody*) lt_cv_deplibs_check_method=pass_all ;; freebsd* | dragonfly*) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then case $host_cpu in i*86 ) # Not sure whether the presence of OpenBSD here was a mistake. # Let's accept both of them until this is cleared up. lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library' lt_cv_file_magic_cmd=/usr/bin/file lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` ;; esac else lt_cv_deplibs_check_method=pass_all fi ;; gnu*) lt_cv_deplibs_check_method=pass_all ;; haiku*) lt_cv_deplibs_check_method=pass_all ;; hpux10.20* | hpux11*) lt_cv_file_magic_cmd=/usr/bin/file case $host_cpu in ia64*) lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - IA64' lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so ;; hppa*64*) lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]' lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl ;; *) lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9]\.[0-9]) shared library' lt_cv_file_magic_test_file=/usr/lib/libc.sl ;; esac ;; interix[3-9]*) # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$' ;; irix5* | irix6* | nonstopux*) case $LD in *-32|*"-32 ") libmagic=32-bit;; *-n32|*"-n32 ") libmagic=N32;; *-64|*"-64 ") libmagic=64-bit;; *) libmagic=never-match;; esac lt_cv_deplibs_check_method=pass_all ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu) lt_cv_deplibs_check_method=pass_all ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|_pic\.a)$' fi ;; newos6*) lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)' lt_cv_file_magic_cmd=/usr/bin/file lt_cv_file_magic_test_file=/usr/lib/libnls.so ;; *nto* | *qnx*) lt_cv_deplibs_check_method=pass_all ;; openbsd*) if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' fi ;; osf3* | osf4* | osf5*) lt_cv_deplibs_check_method=pass_all ;; rdos*) lt_cv_deplibs_check_method=pass_all ;; solaris*) lt_cv_deplibs_check_method=pass_all ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) lt_cv_deplibs_check_method=pass_all ;; sysv4 | sysv4.3*) case $host_vendor in motorola) lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]' lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` ;; ncr) lt_cv_deplibs_check_method=pass_all ;; sequent) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' ;; sni) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib" lt_cv_file_magic_test_file=/lib/libc.so ;; siemens) lt_cv_deplibs_check_method=pass_all ;; pc) lt_cv_deplibs_check_method=pass_all ;; esac ;; tpf*) lt_cv_deplibs_check_method=pass_all ;; esac fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_deplibs_check_method" >&5 $as_echo "$lt_cv_deplibs_check_method" >&6; } file_magic_glob= want_nocaseglob=no if test "$build" = "$host"; then case $host_os in mingw* | pw32*) if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then want_nocaseglob=yes else file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[\1]\/[\1]\/g;/g"` fi ;; esac fi file_magic_cmd=$lt_cv_file_magic_cmd deplibs_check_method=$lt_cv_deplibs_check_method test -z "$deplibs_check_method" && deplibs_check_method=unknown if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a program name with args. set dummy ${ac_tool_prefix}dlltool; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_DLLTOOL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$DLLTOOL"; then ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi DLLTOOL=$ac_cv_prog_DLLTOOL if test -n "$DLLTOOL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5 $as_echo "$DLLTOOL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_DLLTOOL"; then ac_ct_DLLTOOL=$DLLTOOL # Extract the first word of "dlltool", so it can be a program name with args. set dummy dlltool; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_DLLTOOL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_DLLTOOL"; then ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_DLLTOOL="dlltool" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL if test -n "$ac_ct_DLLTOOL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5 $as_echo "$ac_ct_DLLTOOL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_DLLTOOL" = x; then DLLTOOL="false" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac DLLTOOL=$ac_ct_DLLTOOL fi else DLLTOOL="$ac_cv_prog_DLLTOOL" fi test -z "$DLLTOOL" && DLLTOOL=dlltool { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to associate runtime and link libraries" >&5 $as_echo_n "checking how to associate runtime and link libraries... " >&6; } if ${lt_cv_sharedlib_from_linklib_cmd+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_sharedlib_from_linklib_cmd='unknown' case $host_os in cygwin* | mingw* | pw32* | cegcc*) # two different shell functions defined in ltmain.sh # decide which to use based on capabilities of $DLLTOOL case `$DLLTOOL --help 2>&1` in *--identify-strict*) lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib ;; *) lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback ;; esac ;; *) # fallback: assume linklib IS sharedlib lt_cv_sharedlib_from_linklib_cmd="$ECHO" ;; esac fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_sharedlib_from_linklib_cmd" >&5 $as_echo "$lt_cv_sharedlib_from_linklib_cmd" >&6; } sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO if test -n "$ac_tool_prefix"; then for ac_prog in ar do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_AR+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$AR"; then ac_cv_prog_AR="$AR" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_AR="$ac_tool_prefix$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi AR=$ac_cv_prog_AR if test -n "$AR"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $AR" >&5 $as_echo "$AR" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$AR" && break done fi if test -z "$AR"; then ac_ct_AR=$AR for ac_prog in ar do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_AR+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_AR"; then ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_AR="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_AR=$ac_cv_prog_ac_ct_AR if test -n "$ac_ct_AR"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5 $as_echo "$ac_ct_AR" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$ac_ct_AR" && break done if test "x$ac_ct_AR" = x; then AR="false" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac AR=$ac_ct_AR fi fi : ${AR=ar} : ${AR_FLAGS=cru} { $as_echo "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE support" >&5 $as_echo_n "checking for archiver @FILE support... " >&6; } if ${lt_cv_ar_at_file+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_ar_at_file=no cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : echo conftest.$ac_objext > conftest.lst lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&5' { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 (eval $lt_ar_try) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if test "$ac_status" -eq 0; then # Ensure the archiver fails upon bogus file names. rm -f conftest.$ac_objext libconftest.a { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5 (eval $lt_ar_try) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if test "$ac_status" -ne 0; then lt_cv_ar_at_file=@ fi fi rm -f conftest.* libconftest.a fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ar_at_file" >&5 $as_echo "$lt_cv_ar_at_file" >&6; } if test "x$lt_cv_ar_at_file" = xno; then archiver_list_spec= else archiver_list_spec=$lt_cv_ar_at_file fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. set dummy ${ac_tool_prefix}strip; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_STRIP+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$STRIP"; then ac_cv_prog_STRIP="$STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_STRIP="${ac_tool_prefix}strip" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi STRIP=$ac_cv_prog_STRIP if test -n "$STRIP"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5 $as_echo "$STRIP" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_STRIP"; then ac_ct_STRIP=$STRIP # Extract the first word of "strip", so it can be a program name with args. set dummy strip; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_STRIP+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_STRIP"; then ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_STRIP="strip" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP if test -n "$ac_ct_STRIP"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5 $as_echo "$ac_ct_STRIP" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_STRIP" = x; then STRIP=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac STRIP=$ac_ct_STRIP fi else STRIP="$ac_cv_prog_STRIP" fi test -z "$STRIP" && STRIP=: if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. set dummy ${ac_tool_prefix}ranlib; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_RANLIB+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$RANLIB"; then ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi RANLIB=$ac_cv_prog_RANLIB if test -n "$RANLIB"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5 $as_echo "$RANLIB" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_RANLIB"; then ac_ct_RANLIB=$RANLIB # Extract the first word of "ranlib", so it can be a program name with args. set dummy ranlib; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_RANLIB+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_RANLIB"; then ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_RANLIB="ranlib" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB if test -n "$ac_ct_RANLIB"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5 $as_echo "$ac_ct_RANLIB" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_RANLIB" = x; then RANLIB=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac RANLIB=$ac_ct_RANLIB fi else RANLIB="$ac_cv_prog_RANLIB" fi test -z "$RANLIB" && RANLIB=: # Determine commands to create old-style static archives. old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' old_postinstall_cmds='chmod 644 $oldlib' old_postuninstall_cmds= if test -n "$RANLIB"; then case $host_os in openbsd*) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" ;; *) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" ;; esac old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" fi case $host_os in darwin*) lock_old_archive_extraction=yes ;; *) lock_old_archive_extraction=no ;; esac # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC # Check for command to grab the raw symbol name followed by C symbol from nm. { $as_echo "$as_me:${as_lineno-$LINENO}: checking command to parse $NM output from $compiler object" >&5 $as_echo_n "checking command to parse $NM output from $compiler object... " >&6; } if ${lt_cv_sys_global_symbol_pipe+:} false; then : $as_echo_n "(cached) " >&6 else # These are sane defaults that work on at least a few old systems. # [They come from Ultrix. What could be older than Ultrix?!! ;)] # Character class describing NM global symbol codes. symcode='[BCDEGRST]' # Regexp to match symbols that can be accessed directly from C. sympat='\([_A-Za-z][_A-Za-z0-9]*\)' # Define system-specific variables. case $host_os in aix*) symcode='[BCDT]' ;; cygwin* | mingw* | pw32* | cegcc*) symcode='[ABCDGISTW]' ;; hpux*) if test "$host_cpu" = ia64; then symcode='[ABCDEGRST]' fi ;; irix* | nonstopux*) symcode='[BCDEGRST]' ;; osf*) symcode='[BCDEGQRST]' ;; solaris*) symcode='[BDRT]' ;; sco3.2v5*) symcode='[DT]' ;; sysv4.2uw2*) symcode='[DT]' ;; sysv5* | sco5v6* | unixware* | OpenUNIX*) symcode='[ABDT]' ;; sysv4) symcode='[DFNSTU]' ;; esac # If we're using GNU nm, then use its standard symbol codes. case `$NM -V 2>&1` in *GNU* | *'with BFD'*) symcode='[ABCDGIRSTW]' ;; esac # Transform an extracted symbol line into a proper C declaration. # Some systems (esp. on ia64) link data and code symbols differently, # so use this general approach. lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" # Transform an extracted symbol line into symbol name and symbol address lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\)[ ]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/ {\"\2\", (void *) \&\2},/p'" lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([^ ]*\)[ ]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \(lib[^ ]*\)$/ {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/ {\"lib\2\", (void *) \&\2},/p'" # Handle CRLF in mingw tool chain opt_cr= case $build_os in mingw*) opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp ;; esac # Try without a prefix underscore, then with it. for ac_symprfx in "" "_"; do # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. symxfrm="\\1 $ac_symprfx\\2 \\2" # Write the raw and C identifiers. if test "$lt_cv_nm_interface" = "MS dumpbin"; then # Fake it for dumpbin and say T for any non-static function # and D for any global variable. # Also find C++ and __fastcall symbols from MSVC++, # which start with @ or ?. lt_cv_sys_global_symbol_pipe="$AWK '"\ " {last_section=section; section=\$ 3};"\ " /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ " /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ " \$ 0!~/External *\|/{next};"\ " / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ " {if(hide[section]) next};"\ " {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\ " {split(\$ 0, a, /\||\r/); split(a[2], s)};"\ " s[1]~/^[@?]/{print s[1], s[1]; next};"\ " s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\ " ' prfx=^$ac_symprfx" else lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" fi lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" # Check to see that the pipe works correctly. pipe_works=no rm -f conftest* cat > conftest.$ac_ext <<_LT_EOF #ifdef __cplusplus extern "C" { #endif char nm_test_var; void nm_test_func(void); void nm_test_func(void){} #ifdef __cplusplus } #endif int main(){nm_test_var='a';nm_test_func();return(0);} _LT_EOF if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then # Now try to grab the symbols. nlist=conftest.nm if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist\""; } >&5 (eval $NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && test -s "$nlist"; then # Try sorting and uniquifying the output. if sort "$nlist" | uniq > "$nlist"T; then mv -f "$nlist"T "$nlist" else rm -f "$nlist"T fi # Make sure that we snagged all the symbols we need. if $GREP ' nm_test_var$' "$nlist" >/dev/null; then if $GREP ' nm_test_func$' "$nlist" >/dev/null; then cat <<_LT_EOF > conftest.$ac_ext /* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ #if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) /* DATA imports from DLLs on WIN32 con't be const, because runtime relocations are performed -- see ld's documentation on pseudo-relocs. */ # define LT_DLSYM_CONST #elif defined(__osf__) /* This system does not cope well with relocations in const data. */ # define LT_DLSYM_CONST #else # define LT_DLSYM_CONST const #endif #ifdef __cplusplus extern "C" { #endif _LT_EOF # Now generate the symbol file. eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' cat <<_LT_EOF >> conftest.$ac_ext /* The mapping between symbol names and symbols. */ LT_DLSYM_CONST struct { const char *name; void *address; } lt__PROGRAM__LTX_preloaded_symbols[] = { { "@PROGRAM@", (void *) 0 }, _LT_EOF $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext cat <<\_LT_EOF >> conftest.$ac_ext {0, (void *) 0} }; /* This works around a problem in FreeBSD linker */ #ifdef FREEBSD_WORKAROUND static const void *lt_preloaded_setup() { return lt__PROGRAM__LTX_preloaded_symbols; } #endif #ifdef __cplusplus } #endif _LT_EOF # Now try linking the two files. mv conftest.$ac_objext conftstm.$ac_objext lt_globsym_save_LIBS=$LIBS lt_globsym_save_CFLAGS=$CFLAGS LIBS="conftstm.$ac_objext" CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag" if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 (eval $ac_link) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && test -s conftest${ac_exeext}; then pipe_works=yes fi LIBS=$lt_globsym_save_LIBS CFLAGS=$lt_globsym_save_CFLAGS else echo "cannot find nm_test_func in $nlist" >&5 fi else echo "cannot find nm_test_var in $nlist" >&5 fi else echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5 fi else echo "$progname: failed program was:" >&5 cat conftest.$ac_ext >&5 fi rm -rf conftest* conftst* # Do not use the global_symbol_pipe unless it works. if test "$pipe_works" = yes; then break else lt_cv_sys_global_symbol_pipe= fi done fi if test -z "$lt_cv_sys_global_symbol_pipe"; then lt_cv_sys_global_symbol_to_cdecl= fi if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: failed" >&5 $as_echo "failed" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: ok" >&5 $as_echo "ok" >&6; } fi # Response file support. if test "$lt_cv_nm_interface" = "MS dumpbin"; then nm_file_list_spec='@' elif $NM --help 2>/dev/null | grep '[@]FILE' >/dev/null; then nm_file_list_spec='@' fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sysroot" >&5 $as_echo_n "checking for sysroot... " >&6; } # Check whether --with-sysroot was given. if test "${with_sysroot+set}" = set; then : withval=$with_sysroot; else with_sysroot=no fi lt_sysroot= case ${with_sysroot} in #( yes) if test "$GCC" = yes; then lt_sysroot=`$CC --print-sysroot 2>/dev/null` fi ;; #( /*) lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` ;; #( no|'') ;; #( *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${with_sysroot}" >&5 $as_echo "${with_sysroot}" >&6; } as_fn_error $? "The sysroot must be an absolute path." "$LINENO" 5 ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${lt_sysroot:-no}" >&5 $as_echo "${lt_sysroot:-no}" >&6; } # Check whether --enable-libtool-lock was given. if test "${enable_libtool_lock+set}" = set; then : enableval=$enable_libtool_lock; fi test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes # Some flags need to be propagated to the compiler or linker for good # libtool support. case $host in ia64-*-hpux*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then case `/usr/bin/file conftest.$ac_objext` in *ELF-32*) HPUX_IA64_MODE="32" ;; *ELF-64*) HPUX_IA64_MODE="64" ;; esac fi rm -rf conftest* ;; *-*-irix6*) # Find out which ABI we are using. echo '#line '$LINENO' "configure"' > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then if test "$lt_cv_prog_gnu_ld" = yes; then case `/usr/bin/file conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -melf32bsmip" ;; *N32*) LD="${LD-ld} -melf32bmipn32" ;; *64-bit*) LD="${LD-ld} -melf64bmip" ;; esac else case `/usr/bin/file conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -32" ;; *N32*) LD="${LD-ld} -n32" ;; *64-bit*) LD="${LD-ld} -64" ;; esac fi fi rm -rf conftest* ;; x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \ s390*-*linux*|s390*-*tpf*|sparc*-*linux*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then case `/usr/bin/file conftest.o` in *32-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_i386_fbsd" ;; x86_64-*linux*) LD="${LD-ld} -m elf_i386" ;; ppc64-*linux*|powerpc64-*linux*) LD="${LD-ld} -m elf32ppclinux" ;; s390x-*linux*) LD="${LD-ld} -m elf_s390" ;; sparc64-*linux*) LD="${LD-ld} -m elf32_sparc" ;; esac ;; *64-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_x86_64_fbsd" ;; x86_64-*linux*) LD="${LD-ld} -m elf_x86_64" ;; ppc*-*linux*|powerpc*-*linux*) LD="${LD-ld} -m elf64ppc" ;; s390*-*linux*|s390*-*tpf*) LD="${LD-ld} -m elf64_s390" ;; sparc*-*linux*) LD="${LD-ld} -m elf64_sparc" ;; esac ;; esac fi rm -rf conftest* ;; *-*-sco3.2v5*) # On SCO OpenServer 5, we need -belf to get full-featured binaries. SAVE_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS -belf" { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler needs -belf" >&5 $as_echo_n "checking whether the C compiler needs -belf... " >&6; } if ${lt_cv_cc_needs_belf+:} false; then : $as_echo_n "(cached) " >&6 else ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : lt_cv_cc_needs_belf=yes else lt_cv_cc_needs_belf=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_cc_needs_belf" >&5 $as_echo "$lt_cv_cc_needs_belf" >&6; } if test x"$lt_cv_cc_needs_belf" != x"yes"; then # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf CFLAGS="$SAVE_CFLAGS" fi ;; *-*solaris*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then case `/usr/bin/file conftest.o` in *64-bit*) case $lt_cv_prog_gnu_ld in yes*) case $host in i?86-*-solaris*) LD="${LD-ld} -m elf_x86_64" ;; sparc*-*-solaris*) LD="${LD-ld} -m elf64_sparc" ;; esac # GNU ld 2.21 introduced _sol2 emulations. Use them if available. if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then LD="${LD-ld}_sol2" fi ;; *) if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then LD="${LD-ld} -64" fi ;; esac ;; esac fi rm -rf conftest* ;; esac need_locks="$enable_libtool_lock" if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}mt", so it can be a program name with args. set dummy ${ac_tool_prefix}mt; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_MANIFEST_TOOL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$MANIFEST_TOOL"; then ac_cv_prog_MANIFEST_TOOL="$MANIFEST_TOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi MANIFEST_TOOL=$ac_cv_prog_MANIFEST_TOOL if test -n "$MANIFEST_TOOL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MANIFEST_TOOL" >&5 $as_echo "$MANIFEST_TOOL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_MANIFEST_TOOL"; then ac_ct_MANIFEST_TOOL=$MANIFEST_TOOL # Extract the first word of "mt", so it can be a program name with args. set dummy mt; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_MANIFEST_TOOL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_MANIFEST_TOOL"; then ac_cv_prog_ac_ct_MANIFEST_TOOL="$ac_ct_MANIFEST_TOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_MANIFEST_TOOL="mt" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_MANIFEST_TOOL=$ac_cv_prog_ac_ct_MANIFEST_TOOL if test -n "$ac_ct_MANIFEST_TOOL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MANIFEST_TOOL" >&5 $as_echo "$ac_ct_MANIFEST_TOOL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_MANIFEST_TOOL" = x; then MANIFEST_TOOL=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac MANIFEST_TOOL=$ac_ct_MANIFEST_TOOL fi else MANIFEST_TOOL="$ac_cv_prog_MANIFEST_TOOL" fi test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5 $as_echo_n "checking if $MANIFEST_TOOL is a manifest tool... " >&6; } if ${lt_cv_path_mainfest_tool+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_path_mainfest_tool=no echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&5 $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out cat conftest.err >&5 if $GREP 'Manifest Tool' conftest.out > /dev/null; then lt_cv_path_mainfest_tool=yes fi rm -f conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_mainfest_tool" >&5 $as_echo "$lt_cv_path_mainfest_tool" >&6; } if test "x$lt_cv_path_mainfest_tool" != xyes; then MANIFEST_TOOL=: fi case $host_os in rhapsody* | darwin*) if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}dsymutil", so it can be a program name with args. set dummy ${ac_tool_prefix}dsymutil; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_DSYMUTIL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$DSYMUTIL"; then ac_cv_prog_DSYMUTIL="$DSYMUTIL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi DSYMUTIL=$ac_cv_prog_DSYMUTIL if test -n "$DSYMUTIL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $DSYMUTIL" >&5 $as_echo "$DSYMUTIL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_DSYMUTIL"; then ac_ct_DSYMUTIL=$DSYMUTIL # Extract the first word of "dsymutil", so it can be a program name with args. set dummy dsymutil; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_DSYMUTIL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_DSYMUTIL"; then ac_cv_prog_ac_ct_DSYMUTIL="$ac_ct_DSYMUTIL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_DSYMUTIL="dsymutil" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_DSYMUTIL=$ac_cv_prog_ac_ct_DSYMUTIL if test -n "$ac_ct_DSYMUTIL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DSYMUTIL" >&5 $as_echo "$ac_ct_DSYMUTIL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_DSYMUTIL" = x; then DSYMUTIL=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac DSYMUTIL=$ac_ct_DSYMUTIL fi else DSYMUTIL="$ac_cv_prog_DSYMUTIL" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}nmedit", so it can be a program name with args. set dummy ${ac_tool_prefix}nmedit; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_NMEDIT+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$NMEDIT"; then ac_cv_prog_NMEDIT="$NMEDIT" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi NMEDIT=$ac_cv_prog_NMEDIT if test -n "$NMEDIT"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $NMEDIT" >&5 $as_echo "$NMEDIT" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_NMEDIT"; then ac_ct_NMEDIT=$NMEDIT # Extract the first word of "nmedit", so it can be a program name with args. set dummy nmedit; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_NMEDIT+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_NMEDIT"; then ac_cv_prog_ac_ct_NMEDIT="$ac_ct_NMEDIT" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_NMEDIT="nmedit" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT if test -n "$ac_ct_NMEDIT"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NMEDIT" >&5 $as_echo "$ac_ct_NMEDIT" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_NMEDIT" = x; then NMEDIT=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac NMEDIT=$ac_ct_NMEDIT fi else NMEDIT="$ac_cv_prog_NMEDIT" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}lipo", so it can be a program name with args. set dummy ${ac_tool_prefix}lipo; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_LIPO+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$LIPO"; then ac_cv_prog_LIPO="$LIPO" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_LIPO="${ac_tool_prefix}lipo" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi LIPO=$ac_cv_prog_LIPO if test -n "$LIPO"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $LIPO" >&5 $as_echo "$LIPO" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_LIPO"; then ac_ct_LIPO=$LIPO # Extract the first word of "lipo", so it can be a program name with args. set dummy lipo; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_LIPO+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_LIPO"; then ac_cv_prog_ac_ct_LIPO="$ac_ct_LIPO" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_LIPO="lipo" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_LIPO=$ac_cv_prog_ac_ct_LIPO if test -n "$ac_ct_LIPO"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LIPO" >&5 $as_echo "$ac_ct_LIPO" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_LIPO" = x; then LIPO=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac LIPO=$ac_ct_LIPO fi else LIPO="$ac_cv_prog_LIPO" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}otool", so it can be a program name with args. set dummy ${ac_tool_prefix}otool; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_OTOOL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$OTOOL"; then ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_OTOOL="${ac_tool_prefix}otool" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi OTOOL=$ac_cv_prog_OTOOL if test -n "$OTOOL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5 $as_echo "$OTOOL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_OTOOL"; then ac_ct_OTOOL=$OTOOL # Extract the first word of "otool", so it can be a program name with args. set dummy otool; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_OTOOL+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_OTOOL"; then ac_cv_prog_ac_ct_OTOOL="$ac_ct_OTOOL" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_OTOOL="otool" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_OTOOL=$ac_cv_prog_ac_ct_OTOOL if test -n "$ac_ct_OTOOL"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL" >&5 $as_echo "$ac_ct_OTOOL" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_OTOOL" = x; then OTOOL=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac OTOOL=$ac_ct_OTOOL fi else OTOOL="$ac_cv_prog_OTOOL" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}otool64", so it can be a program name with args. set dummy ${ac_tool_prefix}otool64; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_OTOOL64+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$OTOOL64"; then ac_cv_prog_OTOOL64="$OTOOL64" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi OTOOL64=$ac_cv_prog_OTOOL64 if test -n "$OTOOL64"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $OTOOL64" >&5 $as_echo "$OTOOL64" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_OTOOL64"; then ac_ct_OTOOL64=$OTOOL64 # Extract the first word of "otool64", so it can be a program name with args. set dummy otool64; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_OTOOL64+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_OTOOL64"; then ac_cv_prog_ac_ct_OTOOL64="$ac_ct_OTOOL64" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_OTOOL64="otool64" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_OTOOL64=$ac_cv_prog_ac_ct_OTOOL64 if test -n "$ac_ct_OTOOL64"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL64" >&5 $as_echo "$ac_ct_OTOOL64" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_OTOOL64" = x; then OTOOL64=":" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac OTOOL64=$ac_ct_OTOOL64 fi else OTOOL64="$ac_cv_prog_OTOOL64" fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -single_module linker flag" >&5 $as_echo_n "checking for -single_module linker flag... " >&6; } if ${lt_cv_apple_cc_single_mod+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_apple_cc_single_mod=no if test -z "${LT_MULTI_MODULE}"; then # By default we will add the -single_module flag. You can override # by either setting the environment variable LT_MULTI_MODULE # non-empty at configure time, or by adding -multi_module to the # link flags. rm -rf libconftest.dylib* echo "int foo(void){return 1;}" > conftest.c echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ -dynamiclib -Wl,-single_module conftest.c" >&5 $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ -dynamiclib -Wl,-single_module conftest.c 2>conftest.err _lt_result=$? # If there is a non-empty error log, and "single_module" # appears in it, assume the flag caused a linker warning if test -s conftest.err && $GREP single_module conftest.err; then cat conftest.err >&5 # Otherwise, if the output was created with a 0 exit code from # the compiler, it worked. elif test -f libconftest.dylib && test $_lt_result -eq 0; then lt_cv_apple_cc_single_mod=yes else cat conftest.err >&5 fi rm -rf libconftest.dylib* rm -f conftest.* fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_apple_cc_single_mod" >&5 $as_echo "$lt_cv_apple_cc_single_mod" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5 $as_echo_n "checking for -exported_symbols_list linker flag... " >&6; } if ${lt_cv_ld_exported_symbols_list+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_ld_exported_symbols_list=no save_LDFLAGS=$LDFLAGS echo "_main" > conftest.sym LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : lt_cv_ld_exported_symbols_list=yes else lt_cv_ld_exported_symbols_list=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LDFLAGS="$save_LDFLAGS" fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_exported_symbols_list" >&5 $as_echo "$lt_cv_ld_exported_symbols_list" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5 $as_echo_n "checking for -force_load linker flag... " >&6; } if ${lt_cv_ld_force_load+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_ld_force_load=no cat > conftest.c << _LT_EOF int forced_loaded() { return 2;} _LT_EOF echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&5 $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&5 echo "$AR cru libconftest.a conftest.o" >&5 $AR cru libconftest.a conftest.o 2>&5 echo "$RANLIB libconftest.a" >&5 $RANLIB libconftest.a 2>&5 cat > conftest.c << _LT_EOF int main() { return 0;} _LT_EOF echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&5 $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err _lt_result=$? if test -s conftest.err && $GREP force_load conftest.err; then cat conftest.err >&5 elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then lt_cv_ld_force_load=yes else cat conftest.err >&5 fi rm -f conftest.err libconftest.a conftest conftest.c rm -rf conftest.dSYM fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5 $as_echo "$lt_cv_ld_force_load" >&6; } case $host_os in rhapsody* | darwin1.[012]) _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;; darwin1.*) _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; darwin*) # darwin 5.x on # if running on 10.5 or later, the deployment target defaults # to the OS version, if on x86, and 10.4, the deployment # target defaults to 10.4. Don't you love it? case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in 10.0,*86*-darwin8*|10.0,*-darwin[91]*) _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; 10.[012]*) _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; 10.*) _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; esac ;; esac if test "$lt_cv_apple_cc_single_mod" = "yes"; then _lt_dar_single_mod='$single_module' fi if test "$lt_cv_ld_exported_symbols_list" = "yes"; then _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym' else _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}' fi if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then _lt_dsymutil='~$DSYMUTIL $lib || :' else _lt_dsymutil= fi ;; esac ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 $as_echo_n "checking how to run the C preprocessor... " >&6; } # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then if ${ac_cv_prog_CPP+:} false; then : $as_echo_n "(cached) " >&6 else # Double quotes because CPP needs to be expanded for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" do ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : else # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : # Broken: success on invalid input. continue else # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok; then : break fi done ac_cv_prog_CPP=$CPP fi CPP=$ac_cv_prog_CPP else ac_cv_prog_CPP=$CPP fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 $as_echo "$CPP" >&6; } ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : else # Broken: fails on valid input. continue fi rm -f conftest.err conftest.i conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : # Broken: success on invalid input. continue else # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.i conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.i conftest.err conftest.$ac_ext if $ac_preproc_ok; then : else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details" "$LINENO" 5; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 $as_echo_n "checking for ANSI C header files... " >&6; } if ${ac_cv_header_stdc+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #include #include int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_header_stdc=yes else ac_cv_header_stdc=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "memchr" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "free" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. if test "$cross_compiling" = yes; then : : else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #if ((' ' & 0x0FF) == 0x020) # define ISLOWER(c) ('a' <= (c) && (c) <= 'z') # define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) #else # define ISLOWER(c) \ (('a' <= (c) && (c) <= 'i') \ || ('j' <= (c) && (c) <= 'r') \ || ('s' <= (c) && (c) <= 'z')) # define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) #endif #define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) int main () { int i; for (i = 0; i < 256; i++) if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) return 2; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : else ac_cv_header_stdc=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 $as_echo "$ac_cv_header_stdc" >&6; } if test $ac_cv_header_stdc = yes; then $as_echo "#define STDC_HEADERS 1" >>confdefs.h fi # On IRIX 5.3, sys/types and inttypes.h are conflicting. for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ inttypes.h stdint.h unistd.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default " if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done for ac_header in dlfcn.h do : ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default " if test "x$ac_cv_header_dlfcn_h" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_DLFCN_H 1 _ACEOF fi done # Set options # Check whether --enable-shared was given. if test "${enable_shared+set}" = set; then : enableval=$enable_shared; p=${PACKAGE-default} case $enableval in yes) enable_shared=yes ;; no) enable_shared=no ;; *) enable_shared=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_shared=yes fi done IFS="$lt_save_ifs" ;; esac else enable_shared=no fi enable_dlopen=no enable_win32_dll=no # Check whether --enable-static was given. if test "${enable_static+set}" = set; then : enableval=$enable_static; p=${PACKAGE-default} case $enableval in yes) enable_static=yes ;; no) enable_static=no ;; *) enable_static=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_static=yes fi done IFS="$lt_save_ifs" ;; esac else enable_static=yes fi # Check whether --with-pic was given. if test "${with_pic+set}" = set; then : withval=$with_pic; lt_p=${PACKAGE-default} case $withval in yes|no) pic_mode=$withval ;; *) pic_mode=default # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for lt_pkg in $withval; do IFS="$lt_save_ifs" if test "X$lt_pkg" = "X$lt_p"; then pic_mode=yes fi done IFS="$lt_save_ifs" ;; esac else pic_mode=default fi test -z "$pic_mode" && pic_mode=default # Check whether --enable-fast-install was given. if test "${enable_fast_install+set}" = set; then : enableval=$enable_fast_install; p=${PACKAGE-default} case $enableval in yes) enable_fast_install=yes ;; no) enable_fast_install=no ;; *) enable_fast_install=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_fast_install=yes fi done IFS="$lt_save_ifs" ;; esac else enable_fast_install=yes fi # This can be used to rebuild libtool when needed LIBTOOL_DEPS="$ltmain" # Always use our own libtool. LIBTOOL='$(SHELL) $(top_builddir)/libtool' test -z "$LN_S" && LN_S="ln -s" if test -n "${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5 $as_echo_n "checking for objdir... " >&6; } if ${lt_cv_objdir+:} false; then : $as_echo_n "(cached) " >&6 else rm -f .libs 2>/dev/null mkdir .libs 2>/dev/null if test -d .libs; then lt_cv_objdir=.libs else # MS-DOS does not allow filenames that begin with a dot. lt_cv_objdir=_libs fi rmdir .libs 2>/dev/null fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5 $as_echo "$lt_cv_objdir" >&6; } objdir=$lt_cv_objdir cat >>confdefs.h <<_ACEOF #define LT_OBJDIR "$lt_cv_objdir/" _ACEOF case $host_os in aix3*) # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test "X${COLLECT_NAMES+set}" != Xset; then COLLECT_NAMES= export COLLECT_NAMES fi ;; esac # Global variables: ofile=libtool can_build_shared=yes # All known linkers require a `.a' archive for static linking (except MSVC, # which needs '.lib'). libext=a with_gnu_ld="$lt_cv_prog_gnu_ld" old_CC="$CC" old_CFLAGS="$CFLAGS" # Set sane defaults for various variables test -z "$CC" && CC=cc test -z "$LTCC" && LTCC=$CC test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS test -z "$LD" && LD=ld test -z "$ac_objext" && ac_objext=o for cc_temp in $compiler""; do case $cc_temp in compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; \-*) ;; *) break;; esac done cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` # Only perform the check for file, if the check method requires it test -z "$MAGIC_CMD" && MAGIC_CMD=file case $deplibs_check_method in file_magic*) if test "$file_magic_cmd" = '$MAGIC_CMD'; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ${ac_tool_prefix}file" >&5 $as_echo_n "checking for ${ac_tool_prefix}file... " >&6; } if ${lt_cv_path_MAGIC_CMD+:} false; then : $as_echo_n "(cached) " >&6 else case $MAGIC_CMD in [\\/*] | ?:[\\/]*) lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. ;; *) lt_save_MAGIC_CMD="$MAGIC_CMD" lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" for ac_dir in $ac_dummy; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/${ac_tool_prefix}file; then lt_cv_path_MAGIC_CMD="$ac_dir/${ac_tool_prefix}file" if test -n "$file_magic_test_file"; then case $deplibs_check_method in "file_magic "*) file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | $EGREP "$file_magic_regex" > /dev/null; then : else cat <<_LT_EOF 1>&2 *** Warning: the command libtool uses to detect shared libraries, *** $file_magic_cmd, produces output that libtool cannot recognize. *** The result is that libtool may fail to recognize shared libraries *** as such. This will affect the creation of libtool libraries that *** depend on shared libraries, but programs linked with such libtool *** libraries will work regardless of this problem. Nevertheless, you *** may want to report the problem to your system manager and/or to *** bug-libtool@gnu.org _LT_EOF fi ;; esac fi break fi done IFS="$lt_save_ifs" MAGIC_CMD="$lt_save_MAGIC_CMD" ;; esac fi MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if test -n "$MAGIC_CMD"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 $as_echo "$MAGIC_CMD" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test -z "$lt_cv_path_MAGIC_CMD"; then if test -n "$ac_tool_prefix"; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for file" >&5 $as_echo_n "checking for file... " >&6; } if ${lt_cv_path_MAGIC_CMD+:} false; then : $as_echo_n "(cached) " >&6 else case $MAGIC_CMD in [\\/*] | ?:[\\/]*) lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. ;; *) lt_save_MAGIC_CMD="$MAGIC_CMD" lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" for ac_dir in $ac_dummy; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/file; then lt_cv_path_MAGIC_CMD="$ac_dir/file" if test -n "$file_magic_test_file"; then case $deplibs_check_method in "file_magic "*) file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | $EGREP "$file_magic_regex" > /dev/null; then : else cat <<_LT_EOF 1>&2 *** Warning: the command libtool uses to detect shared libraries, *** $file_magic_cmd, produces output that libtool cannot recognize. *** The result is that libtool may fail to recognize shared libraries *** as such. This will affect the creation of libtool libraries that *** depend on shared libraries, but programs linked with such libtool *** libraries will work regardless of this problem. Nevertheless, you *** may want to report the problem to your system manager and/or to *** bug-libtool@gnu.org _LT_EOF fi ;; esac fi break fi done IFS="$lt_save_ifs" MAGIC_CMD="$lt_save_MAGIC_CMD" ;; esac fi MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if test -n "$MAGIC_CMD"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5 $as_echo "$MAGIC_CMD" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi else MAGIC_CMD=: fi fi fi ;; esac # Use C for the default configuration in the libtool script lt_save_CC="$CC" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu # Source file extension for C test sources. ac_ext=c # Object file extension for compiled C test sources. objext=o objext=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests lt_simple_link_test_code='int main(){return(0);}' # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC # Save the default compiler, since it gets overwritten when the other # tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. compiler_DEFAULT=$CC # save warnings/boilerplate of simple test code ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $RM conftest* ac_outfile=conftest.$ac_objext echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` $RM -r conftest* ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... if test -n "$compiler"; then lt_prog_compiler_no_builtin_flag= if test "$GCC" = yes; then case $cc_basename in nvcc*) lt_prog_compiler_no_builtin_flag=' -Xcompiler -fno-builtin' ;; *) lt_prog_compiler_no_builtin_flag=' -fno-builtin' ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -fno-rtti -fno-exceptions" >&5 $as_echo_n "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; } if ${lt_cv_prog_compiler_rtti_exceptions+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_rtti_exceptions=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-fno-rtti -fno-exceptions" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_rtti_exceptions=yes fi fi $RM conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_rtti_exceptions" >&5 $as_echo "$lt_cv_prog_compiler_rtti_exceptions" >&6; } if test x"$lt_cv_prog_compiler_rtti_exceptions" = xyes; then lt_prog_compiler_no_builtin_flag="$lt_prog_compiler_no_builtin_flag -fno-rtti -fno-exceptions" else : fi fi lt_prog_compiler_wl= lt_prog_compiler_pic= lt_prog_compiler_static= if test "$GCC" = yes; then lt_prog_compiler_wl='-Wl,' lt_prog_compiler_static='-static' case $host_os in aix*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor lt_prog_compiler_static='-Bstatic' fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support lt_prog_compiler_pic='-fPIC' ;; m68k) # FIXME: we need at least 68020 code to build shared libraries, but # adding the `-m68020' flag to GCC prevents building anything better, # like `-m68040'. lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4' ;; esac ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries lt_prog_compiler_pic='-DDLL_EXPORT' ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files lt_prog_compiler_pic='-fno-common' ;; haiku*) # PIC is the default for Haiku. # The "-static" flag exists, but is broken. lt_prog_compiler_static= ;; hpux*) # PIC is the default for 64-bit PA HP-UX, but not for 32-bit # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag # sets the default TLS model and affects inlining. case $host_cpu in hppa*64*) # +Z the default ;; *) lt_prog_compiler_pic='-fPIC' ;; esac ;; interix[3-9]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; msdosdjgpp*) # Just because we use GCC doesn't mean we suddenly get shared libraries # on systems that don't support them. lt_prog_compiler_can_build_shared=no enable_shared=no ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. lt_prog_compiler_pic='-fPIC -shared' ;; sysv4*MP*) if test -d /usr/nec; then lt_prog_compiler_pic=-Kconform_pic fi ;; *) lt_prog_compiler_pic='-fPIC' ;; esac case $cc_basename in nvcc*) # Cuda Compiler Driver 2.2 lt_prog_compiler_wl='-Xlinker ' if test -n "$lt_prog_compiler_pic"; then lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic" fi ;; esac else # PORTME Check for flag to pass linker flags through the system compiler. case $host_os in aix*) lt_prog_compiler_wl='-Wl,' if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor lt_prog_compiler_static='-Bstatic' else lt_prog_compiler_static='-bnso -bI:/lib/syscalls.exp' fi ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). lt_prog_compiler_pic='-DDLL_EXPORT' ;; hpux9* | hpux10* | hpux11*) lt_prog_compiler_wl='-Wl,' # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) lt_prog_compiler_pic='+Z' ;; esac # Is there a better lt_prog_compiler_static that works with the bundled CC? lt_prog_compiler_static='${wl}-a ${wl}archive' ;; irix5* | irix6* | nonstopux*) lt_prog_compiler_wl='-Wl,' # PIC (with -KPIC) is the default. lt_prog_compiler_static='-non_shared' ;; linux* | k*bsd*-gnu | kopensolaris*-gnu) case $cc_basename in # old Intel for x86_64 which still supported -KPIC. ecc*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-static' ;; # icc used to be incompatible with GCC. # ICC 10 doesn't accept -KPIC any more. icc* | ifort*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fPIC' lt_prog_compiler_static='-static' ;; # Lahey Fortran 8.1. lf95*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='--shared' lt_prog_compiler_static='--static' ;; nagfor*) # NAG Fortran compiler lt_prog_compiler_wl='-Wl,-Wl,,' lt_prog_compiler_pic='-PIC' lt_prog_compiler_static='-Bstatic' ;; pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group compilers (*not* the Pentium gcc compiler, # which looks to be a dead project) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fpic' lt_prog_compiler_static='-Bstatic' ;; ccc*) lt_prog_compiler_wl='-Wl,' # All Alpha code is PIC. lt_prog_compiler_static='-non_shared' ;; xl* | bgxl* | bgf* | mpixl*) # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-qpic' lt_prog_compiler_static='-qstaticlink' ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 8.[0-3]*) # Sun Fortran 8.3 passes all unrecognized flags to the linker lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' lt_prog_compiler_wl='' ;; *Sun\ F* | *Sun*Fortran*) lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' lt_prog_compiler_wl='-Qoption ld ' ;; *Sun\ C*) # Sun C 5.9 lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' lt_prog_compiler_wl='-Wl,' ;; *Intel*\ [CF]*Compiler*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fPIC' lt_prog_compiler_static='-static' ;; *Portland\ Group*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fpic' lt_prog_compiler_static='-Bstatic' ;; esac ;; esac ;; newsos6) lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. lt_prog_compiler_pic='-fPIC -shared' ;; osf3* | osf4* | osf5*) lt_prog_compiler_wl='-Wl,' # All OSF/1 code is PIC. lt_prog_compiler_static='-non_shared' ;; rdos*) lt_prog_compiler_static='-non_shared' ;; solaris*) lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' case $cc_basename in f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) lt_prog_compiler_wl='-Qoption ld ';; *) lt_prog_compiler_wl='-Wl,';; esac ;; sunos4*) lt_prog_compiler_wl='-Qoption ld ' lt_prog_compiler_pic='-PIC' lt_prog_compiler_static='-Bstatic' ;; sysv4 | sysv4.2uw2* | sysv4.3*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' ;; sysv4*MP*) if test -d /usr/nec ;then lt_prog_compiler_pic='-Kconform_pic' lt_prog_compiler_static='-Bstatic' fi ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' ;; unicos*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_can_build_shared=no ;; uts4*) lt_prog_compiler_pic='-pic' lt_prog_compiler_static='-Bstatic' ;; *) lt_prog_compiler_can_build_shared=no ;; esac fi case $host_os in # For platforms which do not support PIC, -DPIC is meaningless: *djgpp*) lt_prog_compiler_pic= ;; *) lt_prog_compiler_pic="$lt_prog_compiler_pic -DPIC" ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5 $as_echo_n "checking for $compiler option to produce PIC... " >&6; } if ${lt_cv_prog_compiler_pic+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_pic=$lt_prog_compiler_pic fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic" >&5 $as_echo "$lt_cv_prog_compiler_pic" >&6; } lt_prog_compiler_pic=$lt_cv_prog_compiler_pic # # Check to make sure the PIC flag actually works. # if test -n "$lt_prog_compiler_pic"; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5 $as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; } if ${lt_cv_prog_compiler_pic_works+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_pic_works=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$lt_prog_compiler_pic -DPIC" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_pic_works=yes fi fi $RM conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_pic_works" >&5 $as_echo "$lt_cv_prog_compiler_pic_works" >&6; } if test x"$lt_cv_prog_compiler_pic_works" = xyes; then case $lt_prog_compiler_pic in "" | " "*) ;; *) lt_prog_compiler_pic=" $lt_prog_compiler_pic" ;; esac else lt_prog_compiler_pic= lt_prog_compiler_can_build_shared=no fi fi # # Check to make sure the static flag actually works. # wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\" { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5 $as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; } if ${lt_cv_prog_compiler_static_works+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_static_works=no save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS $lt_tmp_static_flag" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&5 $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_static_works=yes fi else lt_cv_prog_compiler_static_works=yes fi fi $RM -r conftest* LDFLAGS="$save_LDFLAGS" fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_static_works" >&5 $as_echo "$lt_cv_prog_compiler_static_works" >&6; } if test x"$lt_cv_prog_compiler_static_works" = xyes; then : else lt_prog_compiler_static= fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 $as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if ${lt_cv_prog_compiler_c_o+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_c_o=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o=yes fi fi chmod u+w . 2>&5 $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 $as_echo "$lt_cv_prog_compiler_c_o" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5 $as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; } if ${lt_cv_prog_compiler_c_o+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler_c_o=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o=yes fi fi chmod u+w . 2>&5 $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler_c_o" >&5 $as_echo "$lt_cv_prog_compiler_c_o" >&6; } hard_links="nottested" if test "$lt_cv_prog_compiler_c_o" = no && test "$need_locks" != no; then # do not overwrite the value of need_locks provided by the user { $as_echo "$as_me:${as_lineno-$LINENO}: checking if we can lock with hard links" >&5 $as_echo_n "checking if we can lock with hard links... " >&6; } hard_links=yes $RM conftest* ln conftest.a conftest.b 2>/dev/null && hard_links=no touch conftest.a ln conftest.a conftest.b 2>&5 || hard_links=no ln conftest.a conftest.b 2>/dev/null && hard_links=no { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5 $as_echo "$hard_links" >&6; } if test "$hard_links" = no; then { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5 $as_echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;} need_locks=warn fi else need_locks=no fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $compiler linker ($LD) supports shared libraries" >&5 $as_echo_n "checking whether the $compiler linker ($LD) supports shared libraries... " >&6; } runpath_var= allow_undefined_flag= always_export_symbols=no archive_cmds= archive_expsym_cmds= compiler_needs_object=no enable_shared_with_static_runtimes=no export_dynamic_flag_spec= export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' hardcode_automatic=no hardcode_direct=no hardcode_direct_absolute=no hardcode_libdir_flag_spec= hardcode_libdir_separator= hardcode_minus_L=no hardcode_shlibpath_var=unsupported inherit_rpath=no link_all_deplibs=unknown module_cmds= module_expsym_cmds= old_archive_from_new_cmds= old_archive_from_expsyms_cmds= thread_safe_flag_spec= whole_archive_flag_spec= # include_expsyms should be a list of space-separated symbols to be *always* # included in the symbol list include_expsyms= # exclude_expsyms can be an extended regexp of symbols to exclude # it will be wrapped by ` (' and `)$', so one must not match beginning or # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', # as well as any symbol that contains `d'. exclude_expsyms='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*' # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out # platforms (ab)use it in PIC code, but their linkers get confused if # the symbol is explicitly referenced. Since portable code cannot # rely on this symbol name, it's probably fine to never include it in # preloaded symbol tables. # Exclude shared library initialization/finalization symbols. extract_expsyms_cmds= case $host_os in cygwin* | mingw* | pw32* | cegcc*) # FIXME: the MSVC++ port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using # Microsoft Visual C++. if test "$GCC" != yes; then with_gnu_ld=no fi ;; interix*) # we just hope/assume this is gcc and not c89 (= MSVC++) with_gnu_ld=yes ;; openbsd*) with_gnu_ld=no ;; linux* | k*bsd*-gnu | gnu*) link_all_deplibs=no ;; esac ld_shlibs=yes # On some targets, GNU ld is compatible enough with the native linker # that we're better off using the native interface for both. lt_use_gnu_ld_interface=no if test "$with_gnu_ld" = yes; then case $host_os in aix*) # The AIX port of GNU ld has always aspired to compatibility # with the native linker. However, as the warning in the GNU ld # block says, versions before 2.19.5* couldn't really create working # shared libraries, regardless of the interface used. case `$LD -v 2>&1` in *\ \(GNU\ Binutils\)\ 2.19.5*) ;; *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;; *\ \(GNU\ Binutils\)\ [3-9]*) ;; *) lt_use_gnu_ld_interface=yes ;; esac ;; *) lt_use_gnu_ld_interface=yes ;; esac fi if test "$lt_use_gnu_ld_interface" = yes; then # If archive_cmds runs LD, not CC, wlarc should be empty wlarc='${wl}' # Set some defaults for GNU ld with shared library support. These # are reset later if shared libraries are not supported. Putting them # here allows them to be overridden if necessary. runpath_var=LD_RUN_PATH hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' export_dynamic_flag_spec='${wl}--export-dynamic' # ancient GNU ld didn't support --whole-archive et. al. if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' else whole_archive_flag_spec= fi supports_anon_versioning=no case `$LD -v 2>&1` in *GNU\ gold*) supports_anon_versioning=yes ;; *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... *\ 2.11.*) ;; # other 2.11 versions *) supports_anon_versioning=yes ;; esac # See if GNU ld supports shared libraries. case $host_os in aix[3-9]*) # On AIX/PPC, the GNU linker is very broken if test "$host_cpu" != ia64; then ld_shlibs=no cat <<_LT_EOF 1>&2 *** Warning: the GNU linker, at least up to release 2.19, is reported *** to be unable to reliably create shared libraries on AIX. *** Therefore, libtool is disabling shared libraries support. If you *** really care for shared libraries, you may want to install binutils *** 2.20 or above, or modify your PATH so that a non-GNU linker is found. *** You will then need to restart the configuration process. _LT_EOF fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds='' ;; m68k) archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' hardcode_libdir_flag_spec='-L$libdir' hardcode_minus_L=yes ;; esac ;; beos*) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then allow_undefined_flag=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' else ld_shlibs=no fi ;; cygwin* | mingw* | pw32* | cegcc*) # _LT_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless, # as there is no search path for DLLs. hardcode_libdir_flag_spec='-L$libdir' export_dynamic_flag_spec='${wl}--export-all-symbols' allow_undefined_flag=unsupported always_export_symbols=no enable_shared_with_static_runtimes=yes export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > $export_symbols' exclude_expsyms='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname' if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file (1st line # is EXPORTS), use it as is; otherwise, prepend... archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else ld_shlibs=no fi ;; haiku*) archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' link_all_deplibs=yes ;; interix[3-9]*) hardcode_direct=no hardcode_shlibpath_var=no hardcode_libdir_flag_spec='${wl}-rpath,$libdir' export_dynamic_flag_spec='${wl}-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' archive_expsym_cmds='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) tmp_diet=no if test "$host_os" = linux-dietlibc; then case $cc_basename in diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) esac fi if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ && test "$tmp_diet" = no then tmp_addflag=' $pic_flag' tmp_sharedflag='-shared' case $cc_basename,$host_cpu in pgcc*) # Portland Group C compiler whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' tmp_addflag=' $pic_flag' ;; pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group f77 and f90 compilers whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' tmp_addflag=' $pic_flag -Mnomain' ;; ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 tmp_addflag=' -i_dynamic' ;; efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 tmp_addflag=' -i_dynamic -nofor_main' ;; ifc* | ifort*) # Intel Fortran compiler tmp_addflag=' -nofor_main' ;; lf95*) # Lahey Fortran 8.1 whole_archive_flag_spec= tmp_sharedflag='--shared' ;; xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf below) tmp_sharedflag='-qmkshrobj' tmp_addflag= ;; nvcc*) # Cuda Compiler Driver 2.2 whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' compiler_needs_object=yes ;; esac case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C 5.9 whole_archive_flag_spec='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' compiler_needs_object=yes tmp_sharedflag='-G' ;; *Sun\ F*) # Sun Fortran 8.3 tmp_sharedflag='-G' ;; esac archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' if test "x$supports_anon_versioning" = xyes; then archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' fi case $cc_basename in xlf* | bgf* | bgxlf* | mpixlf*) # IBM XL Fortran 10.1 on PPC cannot create shared libs itself whole_archive_flag_spec='--whole-archive$convenience --no-whole-archive' hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' if test "x$supports_anon_versioning" = xyes; then archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' fi ;; esac else ld_shlibs=no fi ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' wlarc= else archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' fi ;; solaris*) if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then ld_shlibs=no cat <<_LT_EOF 1>&2 *** Warning: The releases 2.8.* of the GNU linker cannot reliably *** create shared libraries on Solaris systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.9.1 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs=no fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) case `$LD -v 2>&1` in *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) ld_shlibs=no cat <<_LT_EOF 1>&2 *** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not *** reliably create shared libraries on SCO systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.16.91.0.3 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF ;; *) # For security reasons, it is highly recommended that you always # use absolute paths for naming shared libraries, and exclude the # DT_RUNPATH tag from executables and libraries. But doing so # requires that you compile everything twice, which is a pain. if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs=no fi ;; esac ;; sunos4*) archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' wlarc= hardcode_direct=yes hardcode_shlibpath_var=no ;; *) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs=no fi ;; esac if test "$ld_shlibs" = no; then runpath_var= hardcode_libdir_flag_spec= export_dynamic_flag_spec= whole_archive_flag_spec= fi else # PORTME fill in a description of your system's linker (not GNU ld) case $host_os in aix3*) allow_undefined_flag=unsupported always_export_symbols=yes archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' # Note: this linker hardcodes the directories in LIBPATH if there # are no directories specified by -L. hardcode_minus_L=yes if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then # Neither direct hardcoding nor static linking is supported with a # broken collect2. hardcode_direct=unsupported fi ;; aix[4-9]*) if test "$host_cpu" = ia64; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag="" else # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to AIX nm, but means don't demangle with GNU nm # Also, AIX nm treats weak defined symbols like other global # defined symbols, whereas GNU nm marks them as "W". if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' else export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' fi aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # need to do runtime linking. case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*) for ld_flag in $LDFLAGS; do if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then aix_use_runtimelinking=yes break fi done ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. archive_cmds='' hardcode_direct=yes hardcode_direct_absolute=yes hardcode_libdir_separator=':' link_all_deplibs=yes file_list_spec='${wl}-f,' if test "$GCC" = yes; then case $host_os in aix4.[012]|aix4.[012].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`${CC} -print-prog-name=collect2` if test -f "$collect2name" && strings "$collect2name" | $GREP resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 hardcode_direct=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking hardcode_minus_L=yes hardcode_libdir_flag_spec='-L$libdir' hardcode_libdir_separator= fi ;; esac shared_flag='-shared' if test "$aix_use_runtimelinking" = yes; then shared_flag="$shared_flag "'${wl}-G' fi link_all_deplibs=no else # not using gcc if test "$host_cpu" = ia64; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test "$aix_use_runtimelinking" = yes; then shared_flag='${wl}-G' else shared_flag='${wl}-bM:SRE' fi fi fi export_dynamic_flag_spec='${wl}-bexpall' # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to export. always_export_symbols=yes if test "$aix_use_runtimelinking" = yes; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. allow_undefined_flag='-berok' # Determine the default libpath from the value encoded in an # empty executable. if test "${lt_cv_aix_libpath+set}" = set; then aix_libpath=$lt_cv_aix_libpath else if ${lt_cv_aix_libpath_+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }' lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$lt_cv_aix_libpath_"; then lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext if test -z "$lt_cv_aix_libpath_"; then lt_cv_aix_libpath_="/usr/lib:/lib" fi fi aix_libpath=$lt_cv_aix_libpath_ fi hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath" archive_expsym_cmds='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" else if test "$host_cpu" = ia64; then hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib' allow_undefined_flag="-z nodefs" archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an # empty executable. if test "${lt_cv_aix_libpath+set}" = set; then aix_libpath=$lt_cv_aix_libpath else if ${lt_cv_aix_libpath_+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }' lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$lt_cv_aix_libpath_"; then lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext if test -z "$lt_cv_aix_libpath_"; then lt_cv_aix_libpath_="/usr/lib:/lib" fi fi aix_libpath=$lt_cv_aix_libpath_ fi hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. no_undefined_flag=' ${wl}-bernotok' allow_undefined_flag=' ${wl}-berok' if test "$with_gnu_ld" = yes; then # We only use this code for GNU lds that support --whole-archive. whole_archive_flag_spec='${wl}--whole-archive$convenience ${wl}--no-whole-archive' else # Exported symbols can be pulled into shared objects from archives whole_archive_flag_spec='$convenience' fi archive_cmds_need_lc=yes # This is similar to how AIX traditionally builds its shared libraries. archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' fi fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds='' ;; m68k) archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' hardcode_libdir_flag_spec='-L$libdir' hardcode_minus_L=yes ;; esac ;; bsdi[45]*) export_dynamic_flag_spec=-rdynamic ;; cygwin* | mingw* | pw32* | cegcc*) # When not using gcc, we currently assume that we are using # Microsoft Visual C++. # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. case $cc_basename in cl*) # Native MSVC hardcode_libdir_flag_spec=' ' allow_undefined_flag=unsupported always_export_symbols=yes file_list_spec='@' # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=".dll" # FIXME: Setting linknames here is a bad hack. archive_cmds='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; else sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; fi~ $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ linknames=' # The linker will not automatically build a static lib if we build a DLL. # _LT_TAGVAR(old_archive_from_new_cmds, )='true' enable_shared_with_static_runtimes=yes exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' # Don't use ranlib old_postinstall_cmds='chmod 644 $oldlib' postlink_cmds='lt_outputfile="@OUTPUT@"~ lt_tool_outputfile="@TOOL_OUTPUT@"~ case $lt_outputfile in *.exe|*.EXE) ;; *) lt_outputfile="$lt_outputfile.exe" lt_tool_outputfile="$lt_tool_outputfile.exe" ;; esac~ if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; $RM "$lt_outputfile.manifest"; fi' ;; *) # Assume MSVC wrapper hardcode_libdir_flag_spec=' ' allow_undefined_flag=unsupported # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=".dll" # FIXME: Setting linknames here is a bad hack. archive_cmds='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' # The linker will automatically build a .lib file if we build a DLL. old_archive_from_new_cmds='true' # FIXME: Should let the user specify the lib program. old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs' enable_shared_with_static_runtimes=yes ;; esac ;; darwin* | rhapsody*) archive_cmds_need_lc=no hardcode_direct=no hardcode_automatic=yes hardcode_shlibpath_var=unsupported if test "$lt_cv_ld_force_load" = "yes"; then whole_archive_flag_spec='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' else whole_archive_flag_spec='' fi link_all_deplibs=yes allow_undefined_flag="$_lt_dar_allow_undefined" case $cc_basename in ifort*) _lt_dar_can_shared=yes ;; *) _lt_dar_can_shared=$GCC ;; esac if test "$_lt_dar_can_shared" = "yes"; then output_verbose_link_cmd=func_echo_all archive_cmds="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}" module_cmds="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}" archive_expsym_cmds="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}" module_expsym_cmds="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}" else ld_shlibs=no fi ;; dgux*) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec='-L$libdir' hardcode_shlibpath_var=no ;; # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor # support. Future versions do this automatically, but an explicit c++rt0.o # does not break anything, and helps significantly (at the cost of a little # extra space). freebsd2.2*) archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' hardcode_libdir_flag_spec='-R$libdir' hardcode_direct=yes hardcode_shlibpath_var=no ;; # Unfortunately, older versions of FreeBSD 2 do not have this feature. freebsd2.*) archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=yes hardcode_minus_L=yes hardcode_shlibpath_var=no ;; # FreeBSD 3 and greater uses gcc -shared to do shared libraries. freebsd* | dragonfly*) archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec='-R$libdir' hardcode_direct=yes hardcode_shlibpath_var=no ;; hpux9*) if test "$GCC" = yes; then archive_cmds='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' else archive_cmds='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' fi hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' hardcode_libdir_separator=: hardcode_direct=yes # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L=yes export_dynamic_flag_spec='${wl}-E' ;; hpux10*) if test "$GCC" = yes && test "$with_gnu_ld" = no; then archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' fi if test "$with_gnu_ld" = no; then hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' hardcode_libdir_separator=: hardcode_direct=yes hardcode_direct_absolute=yes export_dynamic_flag_spec='${wl}-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L=yes fi ;; hpux11*) if test "$GCC" = yes && test "$with_gnu_ld" = no; then case $host_cpu in hppa*64*) archive_cmds='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac else case $host_cpu in hppa*64*) archive_cmds='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) # Older versions of the 11.00 compiler do not understand -b yet # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC understands -b" >&5 $as_echo_n "checking if $CC understands -b... " >&6; } if ${lt_cv_prog_compiler__b+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_prog_compiler__b=no save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS -b" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&5 $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler__b=yes fi else lt_cv_prog_compiler__b=yes fi fi $RM -r conftest* LDFLAGS="$save_LDFLAGS" fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler__b" >&5 $as_echo "$lt_cv_prog_compiler__b" >&6; } if test x"$lt_cv_prog_compiler__b" = xyes; then archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' fi ;; esac fi if test "$with_gnu_ld" = no; then hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' hardcode_libdir_separator=: case $host_cpu in hppa*64*|ia64*) hardcode_direct=no hardcode_shlibpath_var=no ;; *) hardcode_direct=yes hardcode_direct_absolute=yes export_dynamic_flag_spec='${wl}-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L=yes ;; esac fi ;; irix5* | irix6* | nonstopux*) if test "$GCC" = yes; then archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' # Try to use the -exported_symbol ld option, if it does not # work, assume that -exports_file does not work either and # implicitly export all symbols. # This should be the same for all languages, so no per-tag cache variable. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5 $as_echo_n "checking whether the $host_os linker accepts -exported_symbol... " >&6; } if ${lt_cv_irix_exported_symbol+:} false; then : $as_echo_n "(cached) " >&6 else save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int foo (void) { return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : lt_cv_irix_exported_symbol=yes else lt_cv_irix_exported_symbol=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LDFLAGS="$save_LDFLAGS" fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_irix_exported_symbol" >&5 $as_echo "$lt_cv_irix_exported_symbol" >&6; } if test "$lt_cv_irix_exported_symbol" = yes; then archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib' fi else archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib' fi archive_cmds_need_lc='no' hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator=: inherit_rpath=yes link_all_deplibs=yes ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out else archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF fi hardcode_libdir_flag_spec='-R$libdir' hardcode_direct=yes hardcode_shlibpath_var=no ;; newsos6) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=yes hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator=: hardcode_shlibpath_var=no ;; *nto* | *qnx*) ;; openbsd*) if test -f /usr/libexec/ld.so; then hardcode_direct=yes hardcode_shlibpath_var=no hardcode_direct_absolute=yes if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols' hardcode_libdir_flag_spec='${wl}-rpath,$libdir' export_dynamic_flag_spec='${wl}-E' else case $host_os in openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*) archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec='-R$libdir' ;; *) archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec='${wl}-rpath,$libdir' ;; esac fi else ld_shlibs=no fi ;; os2*) hardcode_libdir_flag_spec='-L$libdir' hardcode_minus_L=yes allow_undefined_flag=unsupported archive_cmds='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' ;; osf3*) if test "$GCC" = yes; then allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else allow_undefined_flag=' -expect_unresolved \*' archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' fi archive_cmds_need_lc='no' hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator=: ;; osf4* | osf5*) # as osf3* with the addition of -msym flag if test "$GCC" = yes; then allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' archive_cmds='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' else allow_undefined_flag=' -expect_unresolved \*' archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' archive_expsym_cmds='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp' # Both c and cxx compiler support -rpath directly hardcode_libdir_flag_spec='-rpath $libdir' fi archive_cmds_need_lc='no' hardcode_libdir_separator=: ;; solaris*) no_undefined_flag=' -z defs' if test "$GCC" = yes; then wlarc='${wl}' archive_cmds='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' else case `$CC -V 2>&1` in *"Compilers 5.0"*) wlarc='' archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' ;; *) wlarc='${wl}' archive_cmds='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' ;; esac fi hardcode_libdir_flag_spec='-R$libdir' hardcode_shlibpath_var=no case $host_os in solaris2.[0-5] | solaris2.[0-5].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands `-z linker_flag'. GCC discards it without `$wl', # but is careful enough not to reorder. # Supported since Solaris 2.6 (maybe 2.5.1?) if test "$GCC" = yes; then whole_archive_flag_spec='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' else whole_archive_flag_spec='-z allextract$convenience -z defaultextract' fi ;; esac link_all_deplibs=yes ;; sunos4*) if test "x$host_vendor" = xsequent; then # Use $CC to link under sequent, because it throws in some extra .o # files that make .init and .fini sections work. archive_cmds='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' fi hardcode_libdir_flag_spec='-L$libdir' hardcode_direct=yes hardcode_minus_L=yes hardcode_shlibpath_var=no ;; sysv4) case $host_vendor in sni) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=yes # is this really true??? ;; siemens) ## LD is ld it makes a PLAMLIB ## CC just makes a GrossModule. archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' reload_cmds='$CC -r -o $output$reload_objs' hardcode_direct=no ;; motorola) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=no #Motorola manual says yes, but my tests say they lie ;; esac runpath_var='LD_RUN_PATH' hardcode_shlibpath_var=no ;; sysv4.3*) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_shlibpath_var=no export_dynamic_flag_spec='-Bexport' ;; sysv4*MP*) if test -d /usr/nec; then archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_shlibpath_var=no runpath_var=LD_RUN_PATH hardcode_runpath_var=yes ld_shlibs=yes fi ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) no_undefined_flag='${wl}-z,text' archive_cmds_need_lc=no hardcode_shlibpath_var=no runpath_var='LD_RUN_PATH' if test "$GCC" = yes; then archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We can NOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. no_undefined_flag='${wl}-z,text' allow_undefined_flag='${wl}-z,nodefs' archive_cmds_need_lc=no hardcode_shlibpath_var=no hardcode_libdir_flag_spec='${wl}-R,$libdir' hardcode_libdir_separator=':' link_all_deplibs=yes export_dynamic_flag_spec='${wl}-Bexport' runpath_var='LD_RUN_PATH' if test "$GCC" = yes; then archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; uts4*) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec='-L$libdir' hardcode_shlibpath_var=no ;; *) ld_shlibs=no ;; esac if test x$host_vendor = xsni; then case $host in sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) export_dynamic_flag_spec='${wl}-Blargedynsym' ;; esac fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs" >&5 $as_echo "$ld_shlibs" >&6; } test "$ld_shlibs" = no && can_build_shared=no with_gnu_ld=$with_gnu_ld # # Do we need to explicitly link libc? # case "x$archive_cmds_need_lc" in x|xyes) # Assume -lc should be added archive_cmds_need_lc=yes if test "$enable_shared" = yes && test "$GCC" = yes; then case $archive_cmds in *'~'*) # FIXME: we may have to deal with multi-command sequences. ;; '$CC '*) # Test whether the compiler implicitly links with -lc since on some # systems, -lgcc has to come before -lc. If gcc already passes -lc # to ld, don't add -lc before -lgcc. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5 $as_echo_n "checking whether -lc should be explicitly linked in... " >&6; } if ${lt_cv_archive_cmds_need_lc+:} false; then : $as_echo_n "(cached) " >&6 else $RM conftest* echo "$lt_simple_compile_test_code" > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } 2>conftest.err; then soname=conftest lib=conftest libobjs=conftest.$ac_objext deplibs= wl=$lt_prog_compiler_wl pic_flag=$lt_prog_compiler_pic compiler_flags=-v linker_flags=-v verstring= output_objdir=. libname=conftest lt_save_allow_undefined_flag=$allow_undefined_flag allow_undefined_flag= if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5 (eval $archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } then lt_cv_archive_cmds_need_lc=no else lt_cv_archive_cmds_need_lc=yes fi allow_undefined_flag=$lt_save_allow_undefined_flag else cat conftest.err 1>&5 fi $RM conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_archive_cmds_need_lc" >&5 $as_echo "$lt_cv_archive_cmds_need_lc" >&6; } archive_cmds_need_lc=$lt_cv_archive_cmds_need_lc ;; esac fi ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking dynamic linker characteristics" >&5 $as_echo_n "checking dynamic linker characteristics... " >&6; } if test "$GCC" = yes; then case $host_os in darwin*) lt_awk_arg="/^libraries:/,/LR/" ;; *) lt_awk_arg="/^libraries:/" ;; esac case $host_os in mingw* | cegcc*) lt_sed_strip_eq="s,=\([A-Za-z]:\),\1,g" ;; *) lt_sed_strip_eq="s,=/,/,g" ;; esac lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` case $lt_search_path_spec in *\;*) # if the path contains ";" then we assume it to be the separator # otherwise default to the standard path separator (i.e. ":") - it is # assumed that no part of a normal pathname contains ";" but that should # okay in the real world where ";" in dirpaths is itself problematic. lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` ;; *) lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` ;; esac # Ok, now we have the path, separated by spaces, we can step through it # and add multilib dir if necessary. lt_tmp_lt_search_path_spec= lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` for lt_sys_path in $lt_search_path_spec; do if test -d "$lt_sys_path/$lt_multi_os_dir"; then lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" else test -d "$lt_sys_path" && \ lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" fi done lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' BEGIN {RS=" "; FS="/|\n";} { lt_foo=""; lt_count=0; for (lt_i = NF; lt_i > 0; lt_i--) { if ($lt_i != "" && $lt_i != ".") { if ($lt_i == "..") { lt_count++; } else { if (lt_count == 0) { lt_foo="/" $lt_i lt_foo; } else { lt_count--; } } } } if (lt_foo != "") { lt_freq[lt_foo]++; } if (lt_freq[lt_foo] == 1) { print lt_foo; } }'` # AWK program above erroneously prepends '/' to C:/dos/paths # for these hosts. case $host_os in mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ $SED 's,/\([A-Za-z]:\),\1,g'` ;; esac sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` else sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" fi library_names_spec= libname_spec='lib$name' soname_spec= shrext_cmds=".so" postinstall_cmds= postuninstall_cmds= finish_cmds= finish_eval= shlibpath_var= shlibpath_overrides_runpath=unknown version_type=none dynamic_linker="$host_os ld.so" sys_lib_dlsearch_path_spec="/lib /usr/lib" need_lib_prefix=unknown hardcode_into_libs=no # when you set need_version to no, make sure it does not cause -set_version # flags to be left without arguments need_version=unknown case $host_os in aix3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' shlibpath_var=LIBPATH # AIX 3 has no versioning support, so we append a major version to the name. soname_spec='${libname}${release}${shared_ext}$major' ;; aix[4-9]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no hardcode_into_libs=yes if test "$host_cpu" = ia64; then # AIX 5 supports IA64 library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH else # With GCC up to 2.95.x, collect2 would create an import file # for dependence libraries. The import file would start with # the line `#! .'. This would cause the generated library to # depend on `.', always an invalid library. This was fixed in # development snapshots of GCC prior to 3.0. case $host_os in aix4 | aix4.[01] | aix4.[01].*) if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' echo ' yes ' echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then : else can_build_shared=no fi ;; esac # AIX (on Power*) has no versioning support, so currently we can not hardcode correct # soname into executable. Probably we can add versioning support to # collect2, so additional links can be useful in future. if test "$aix_use_runtimelinking" = yes; then # If using run time linking (on AIX 4.2 or later) use lib.so # instead of lib.a to let people know that these are not # typical AIX shared libraries. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' else # We preserve .a as extension for shared libraries through AIX4.2 # and later when we are not doing run time linking. library_names_spec='${libname}${release}.a $libname.a' soname_spec='${libname}${release}${shared_ext}$major' fi shlibpath_var=LIBPATH fi ;; amigaos*) case $host_cpu in powerpc) # Since July 2007 AmigaOS4 officially supports .so libraries. # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' ;; m68k) library_names_spec='$libname.ixlibrary $libname.a' # Create ${libname}_ixlibrary.a entries in /sys/libs. finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' ;; esac ;; beos*) library_names_spec='${libname}${shared_ext}' dynamic_linker="$host_os ld.so" shlibpath_var=LIBRARY_PATH ;; bsdi[45]*) version_type=linux # correct to gnu/linux during the next big refactor need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" # the default ld.so.conf also contains /usr/contrib/lib and # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow # libtool to hard-code these into programs ;; cygwin* | mingw* | pw32* | cegcc*) version_type=windows shrext_cmds=".dll" need_version=no need_lib_prefix=no case $GCC,$cc_basename in yes,*) # gcc library_names_spec='$libname.dll.a' # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \${file}`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname~ if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; fi' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api" ;; mingw* | cegcc*) # MinGW DLLs use traditional 'lib' prefix soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' ;; esac dynamic_linker='Win32 ld.exe' ;; *,cl*) # Native MSVC libname_spec='$name' soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' library_names_spec='${libname}.dll.lib' case $build_os in mingw*) sys_lib_search_path_spec= lt_save_ifs=$IFS IFS=';' for lt_path in $LIB do IFS=$lt_save_ifs # Let DOS variable expansion print the short 8.3 style file name. lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" done IFS=$lt_save_ifs # Convert to MSYS style. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'` ;; cygwin*) # Convert to unix form, then to dos form, then back to unix form # but this time dos style (no spaces!) so that the unix form looks # like /cygdrive/c/PROGRA~1:/cygdr... sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` ;; *) sys_lib_search_path_spec="$LIB" if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; then # It is most probably a Windows format PATH. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` else sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi # FIXME: find the short name or the path components, as spaces are # common. (e.g. "Program Files" -> "PROGRA~1") ;; esac # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \${file}`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes dynamic_linker='Win32 link.exe' ;; *) # Assume MSVC wrapper library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib' dynamic_linker='Win32 ld.exe' ;; esac # FIXME: first we should search . and the directory the executable is in shlibpath_var=PATH ;; darwin* | rhapsody*) dynamic_linker="$host_os dyld" version_type=darwin need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext' soname_spec='${libname}${release}${major}$shared_ext' shlibpath_overrides_runpath=yes shlibpath_var=DYLD_LIBRARY_PATH shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib" sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' ;; dgux*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; freebsd* | dragonfly*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then objformat=`/usr/bin/objformat` else case $host_os in freebsd[23].*) objformat=aout ;; *) objformat=elf ;; esac fi version_type=freebsd-$objformat case $version_type in freebsd-elf*) library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' need_version=no need_lib_prefix=no ;; freebsd-*) library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' need_version=yes ;; esac shlibpath_var=LD_LIBRARY_PATH case $host_os in freebsd2.*) shlibpath_overrides_runpath=yes ;; freebsd3.[01]* | freebsdelf3.[01]*) shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; *) # from 4.6 on, and DragonFly shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; esac ;; gnu*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; haiku*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no dynamic_linker="$host_os runtime_loader" library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LIBRARY_PATH shlibpath_overrides_runpath=yes sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' hardcode_into_libs=yes ;; hpux9* | hpux10* | hpux11*) # Give a soname corresponding to the major version so that dld.sl refuses to # link against other versions. version_type=sunos need_lib_prefix=no need_version=no case $host_cpu in ia64*) shrext_cmds='.so' hardcode_into_libs=yes dynamic_linker="$host_os dld.so" shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' if test "X$HPUX_IA64_MODE" = X32; then sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" else sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" fi sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; hppa*64*) shrext_cmds='.sl' hardcode_into_libs=yes dynamic_linker="$host_os dld.sl" shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; *) shrext_cmds='.sl' dynamic_linker="$host_os dld.sl" shlibpath_var=SHLIB_PATH shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' ;; esac # HP-UX runs *really* slowly unless shared libraries are mode 555, ... postinstall_cmds='chmod 555 $lib' # or fails outright, so override atomically: install_override_mode=555 ;; interix[3-9]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; irix5* | irix6* | nonstopux*) case $host_os in nonstopux*) version_type=nonstopux ;; *) if test "$lt_cv_prog_gnu_ld" = yes; then version_type=linux # correct to gnu/linux during the next big refactor else version_type=irix fi ;; esac need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' case $host_os in irix5* | nonstopux*) libsuff= shlibsuff= ;; *) case $LD in # libtool.m4 will add one of these switches to LD *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= libmagic=32-bit;; *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 libmagic=64-bit;; *) libsuff= shlibsuff= libmagic=never-match;; esac ;; esac shlibpath_var=LD_LIBRARY${shlibsuff}_PATH shlibpath_overrides_runpath=no sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" hardcode_into_libs=yes ;; # No shared lib support for Linux oldld, aout, or coff. linux*oldld* | linux*aout* | linux*coff*) dynamic_linker=no ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no # Some binutils ld are patched to set DT_RUNPATH if ${lt_cv_shlibpath_overrides_runpath+:} false; then : $as_echo_n "(cached) " >&6 else lt_cv_shlibpath_overrides_runpath=no save_LDFLAGS=$LDFLAGS save_libdir=$libdir eval "libdir=/foo; wl=\"$lt_prog_compiler_wl\"; \ LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec\"" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : if ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null; then : lt_cv_shlibpath_overrides_runpath=yes fi fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LDFLAGS=$save_LDFLAGS libdir=$save_libdir fi shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes # Append ld.so.conf contents to the search path if test -f /etc/ld.so.conf; then lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" fi # We used to test for /lib/ld.so.1 and disable shared libraries on # powerpc, because MkLinux only supported shared libraries with the # GNU dynamic linker. Since this was broken with cross compilers, # most powerpc-linux boxes support dynamic linking these days and # people can always --disable-shared, the test was removed, and we # assume the GNU/Linux dynamic linker is in use. dynamic_linker='GNU/Linux ld.so' ;; netbsdelf*-gnu) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='NetBSD ld.elf_so' ;; netbsd*) version_type=sunos need_lib_prefix=no need_version=no if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' dynamic_linker='NetBSD (a.out) ld.so' else library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='NetBSD ld.elf_so' fi shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; newsos6) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; *nto* | *qnx*) version_type=qnx need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='ldqnx.so' ;; openbsd*) version_type=sunos sys_lib_dlsearch_path_spec="/usr/lib" need_lib_prefix=no # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. case $host_os in openbsd3.3 | openbsd3.3.*) need_version=yes ;; *) need_version=no ;; esac library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' shlibpath_var=LD_LIBRARY_PATH if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then case $host_os in openbsd2.[89] | openbsd2.[89].*) shlibpath_overrides_runpath=no ;; *) shlibpath_overrides_runpath=yes ;; esac else shlibpath_overrides_runpath=yes fi ;; os2*) libname_spec='$name' shrext_cmds=".dll" need_lib_prefix=no library_names_spec='$libname${shared_ext} $libname.a' dynamic_linker='OS/2 ld.exe' shlibpath_var=LIBPATH ;; osf3* | osf4* | osf5*) version_type=osf need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" ;; rdos*) dynamic_linker=no ;; solaris*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes # ldd complains unless libraries are executable postinstall_cmds='chmod +x $lib' ;; sunos4*) version_type=sunos library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes if test "$with_gnu_ld" = yes; then need_lib_prefix=no fi need_version=yes ;; sysv4 | sysv4.3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH case $host_vendor in sni) shlibpath_overrides_runpath=no need_lib_prefix=no runpath_var=LD_RUN_PATH ;; siemens) need_lib_prefix=no ;; motorola) need_lib_prefix=no need_version=no shlibpath_overrides_runpath=no sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' ;; esac ;; sysv4*MP*) if test -d /usr/nec ;then version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' soname_spec='$libname${shared_ext}.$major' shlibpath_var=LD_LIBRARY_PATH fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) version_type=freebsd-elf need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes if test "$with_gnu_ld" = yes; then sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' else sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' case $host_os in sco3.2v5*) sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" ;; esac fi sys_lib_dlsearch_path_spec='/usr/lib' ;; tpf*) # TPF is a cross-target only. Preferred cross-host = GNU/Linux. version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; uts4*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; *) dynamic_linker=no ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5 $as_echo "$dynamic_linker" >&6; } test "$dynamic_linker" = no && can_build_shared=no variables_saved_for_relink="PATH $shlibpath_var $runpath_var" if test "$GCC" = yes; then variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" fi if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec" fi if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec" fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to hardcode library paths into programs" >&5 $as_echo_n "checking how to hardcode library paths into programs... " >&6; } hardcode_action= if test -n "$hardcode_libdir_flag_spec" || test -n "$runpath_var" || test "X$hardcode_automatic" = "Xyes" ; then # We can hardcode non-existent directories. if test "$hardcode_direct" != no && # If the only mechanism to avoid hardcoding is shlibpath_var, we # have to relink, otherwise we might link with an installed library # when we should be linking with a yet-to-be-installed one ## test "$_LT_TAGVAR(hardcode_shlibpath_var, )" != no && test "$hardcode_minus_L" != no; then # Linking always hardcodes the temporary library directory. hardcode_action=relink else # We can link without hardcoding, and we can hardcode nonexisting dirs. hardcode_action=immediate fi else # We cannot hardcode anything, or else we can only hardcode existing # directories. hardcode_action=unsupported fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $hardcode_action" >&5 $as_echo "$hardcode_action" >&6; } if test "$hardcode_action" = relink || test "$inherit_rpath" = yes; then # Fast installation is not supported enable_fast_install=no elif test "$shlibpath_overrides_runpath" = yes || test "$enable_shared" = no; then # Fast installation is not necessary enable_fast_install=needless fi if test "x$enable_dlopen" != xyes; then enable_dlopen=unknown enable_dlopen_self=unknown enable_dlopen_self_static=unknown else lt_cv_dlopen=no lt_cv_dlopen_libs= case $host_os in beos*) lt_cv_dlopen="load_add_on" lt_cv_dlopen_libs= lt_cv_dlopen_self=yes ;; mingw* | pw32* | cegcc*) lt_cv_dlopen="LoadLibrary" lt_cv_dlopen_libs= ;; cygwin*) lt_cv_dlopen="dlopen" lt_cv_dlopen_libs= ;; darwin*) # if libdl is installed we need to link against it { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 $as_echo_n "checking for dlopen in -ldl... " >&6; } if ${ac_cv_lib_dl_dlopen+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-ldl $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char dlopen (); int main () { return dlopen (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_dl_dlopen=yes else ac_cv_lib_dl_dlopen=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 $as_echo "$ac_cv_lib_dl_dlopen" >&6; } if test "x$ac_cv_lib_dl_dlopen" = xyes; then : lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl" else lt_cv_dlopen="dyld" lt_cv_dlopen_libs= lt_cv_dlopen_self=yes fi ;; *) ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load" if test "x$ac_cv_func_shl_load" = xyes; then : lt_cv_dlopen="shl_load" else { $as_echo "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5 $as_echo_n "checking for shl_load in -ldld... " >&6; } if ${ac_cv_lib_dld_shl_load+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-ldld $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char shl_load (); int main () { return shl_load (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_dld_shl_load=yes else ac_cv_lib_dld_shl_load=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5 $as_echo "$ac_cv_lib_dld_shl_load" >&6; } if test "x$ac_cv_lib_dld_shl_load" = xyes; then : lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld" else ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen" if test "x$ac_cv_func_dlopen" = xyes; then : lt_cv_dlopen="dlopen" else { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5 $as_echo_n "checking for dlopen in -ldl... " >&6; } if ${ac_cv_lib_dl_dlopen+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-ldl $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char dlopen (); int main () { return dlopen (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_dl_dlopen=yes else ac_cv_lib_dl_dlopen=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5 $as_echo "$ac_cv_lib_dl_dlopen" >&6; } if test "x$ac_cv_lib_dl_dlopen" = xyes; then : lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl" else { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5 $as_echo_n "checking for dlopen in -lsvld... " >&6; } if ${ac_cv_lib_svld_dlopen+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lsvld $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char dlopen (); int main () { return dlopen (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_svld_dlopen=yes else ac_cv_lib_svld_dlopen=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5 $as_echo "$ac_cv_lib_svld_dlopen" >&6; } if test "x$ac_cv_lib_svld_dlopen" = xyes; then : lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld" else { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5 $as_echo_n "checking for dld_link in -ldld... " >&6; } if ${ac_cv_lib_dld_dld_link+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-ldld $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char dld_link (); int main () { return dld_link (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_dld_dld_link=yes else ac_cv_lib_dld_dld_link=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5 $as_echo "$ac_cv_lib_dld_dld_link" >&6; } if test "x$ac_cv_lib_dld_dld_link" = xyes; then : lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld" fi fi fi fi fi fi ;; esac if test "x$lt_cv_dlopen" != xno; then enable_dlopen=yes else enable_dlopen=no fi case $lt_cv_dlopen in dlopen) save_CPPFLAGS="$CPPFLAGS" test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" save_LDFLAGS="$LDFLAGS" wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" save_LIBS="$LIBS" LIBS="$lt_cv_dlopen_libs $LIBS" { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program can dlopen itself" >&5 $as_echo_n "checking whether a program can dlopen itself... " >&6; } if ${lt_cv_dlopen_self+:} false; then : $as_echo_n "(cached) " >&6 else if test "$cross_compiling" = yes; then : lt_cv_dlopen_self=cross else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF #line $LINENO "configure" #include "confdefs.h" #if HAVE_DLFCN_H #include #endif #include #ifdef RTLD_GLOBAL # define LT_DLGLOBAL RTLD_GLOBAL #else # ifdef DL_GLOBAL # define LT_DLGLOBAL DL_GLOBAL # else # define LT_DLGLOBAL 0 # endif #endif /* We may have to define LT_DLLAZY_OR_NOW in the command line if we find out it does not work in some platform. */ #ifndef LT_DLLAZY_OR_NOW # ifdef RTLD_LAZY # define LT_DLLAZY_OR_NOW RTLD_LAZY # else # ifdef DL_LAZY # define LT_DLLAZY_OR_NOW DL_LAZY # else # ifdef RTLD_NOW # define LT_DLLAZY_OR_NOW RTLD_NOW # else # ifdef DL_NOW # define LT_DLLAZY_OR_NOW DL_NOW # else # define LT_DLLAZY_OR_NOW 0 # endif # endif # endif # endif #endif /* When -fvisbility=hidden is used, assume the code has been annotated correspondingly for the symbols needed. */ #if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) int fnord () __attribute__((visibility("default"))); #endif int fnord () { return 42; } int main () { void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); int status = $lt_dlunknown; if (self) { if (dlsym (self,"fnord")) status = $lt_dlno_uscore; else { if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; else puts (dlerror ()); } /* dlclose (self); */ } else puts (dlerror ()); return status; } _LT_EOF if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 (eval $ac_link) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then (./conftest; exit; ) >&5 2>/dev/null lt_status=$? case x$lt_status in x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;; x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;; x$lt_dlunknown|x*) lt_cv_dlopen_self=no ;; esac else : # compilation failed lt_cv_dlopen_self=no fi fi rm -fr conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self" >&5 $as_echo "$lt_cv_dlopen_self" >&6; } if test "x$lt_cv_dlopen_self" = xyes; then wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a statically linked program can dlopen itself" >&5 $as_echo_n "checking whether a statically linked program can dlopen itself... " >&6; } if ${lt_cv_dlopen_self_static+:} false; then : $as_echo_n "(cached) " >&6 else if test "$cross_compiling" = yes; then : lt_cv_dlopen_self_static=cross else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF #line $LINENO "configure" #include "confdefs.h" #if HAVE_DLFCN_H #include #endif #include #ifdef RTLD_GLOBAL # define LT_DLGLOBAL RTLD_GLOBAL #else # ifdef DL_GLOBAL # define LT_DLGLOBAL DL_GLOBAL # else # define LT_DLGLOBAL 0 # endif #endif /* We may have to define LT_DLLAZY_OR_NOW in the command line if we find out it does not work in some platform. */ #ifndef LT_DLLAZY_OR_NOW # ifdef RTLD_LAZY # define LT_DLLAZY_OR_NOW RTLD_LAZY # else # ifdef DL_LAZY # define LT_DLLAZY_OR_NOW DL_LAZY # else # ifdef RTLD_NOW # define LT_DLLAZY_OR_NOW RTLD_NOW # else # ifdef DL_NOW # define LT_DLLAZY_OR_NOW DL_NOW # else # define LT_DLLAZY_OR_NOW 0 # endif # endif # endif # endif #endif /* When -fvisbility=hidden is used, assume the code has been annotated correspondingly for the symbols needed. */ #if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) int fnord () __attribute__((visibility("default"))); #endif int fnord () { return 42; } int main () { void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); int status = $lt_dlunknown; if (self) { if (dlsym (self,"fnord")) status = $lt_dlno_uscore; else { if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; else puts (dlerror ()); } /* dlclose (self); */ } else puts (dlerror ()); return status; } _LT_EOF if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5 (eval $ac_link) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then (./conftest; exit; ) >&5 2>/dev/null lt_status=$? case x$lt_status in x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;; x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;; x$lt_dlunknown|x*) lt_cv_dlopen_self_static=no ;; esac else : # compilation failed lt_cv_dlopen_self_static=no fi fi rm -fr conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self_static" >&5 $as_echo "$lt_cv_dlopen_self_static" >&6; } fi CPPFLAGS="$save_CPPFLAGS" LDFLAGS="$save_LDFLAGS" LIBS="$save_LIBS" ;; esac case $lt_cv_dlopen_self in yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; *) enable_dlopen_self=unknown ;; esac case $lt_cv_dlopen_self_static in yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; *) enable_dlopen_self_static=unknown ;; esac fi striplib= old_striplib= { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether stripping libraries is possible" >&5 $as_echo_n "checking whether stripping libraries is possible... " >&6; } if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" test -z "$striplib" && striplib="$STRIP --strip-unneeded" { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else # FIXME - insert some real tests, host_os isn't really good enough case $host_os in darwin*) if test -n "$STRIP" ; then striplib="$STRIP -x" old_striplib="$STRIP -S" { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi ;; *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } ;; esac fi # Report which library types will actually be built { $as_echo "$as_me:${as_lineno-$LINENO}: checking if libtool supports shared libraries" >&5 $as_echo_n "checking if libtool supports shared libraries... " >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5 $as_echo "$can_build_shared" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build shared libraries" >&5 $as_echo_n "checking whether to build shared libraries... " >&6; } test "$can_build_shared" = "no" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test "$enable_shared" = yes && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix[4-9]*) if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then test "$enable_shared" = yes && enable_static=no fi ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5 $as_echo "$enable_shared" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build static libraries" >&5 $as_echo_n "checking whether to build static libraries... " >&6; } # Make sure either enable_shared or enable_static is yes. test "$enable_shared" = yes || enable_static=yes { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5 $as_echo "$enable_static" >&6; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu CC="$lt_save_CC" ac_config_commands="$ac_config_commands libtool" # Only expand once: ac_fn_c_find_intX_t "$LINENO" "64" "ac_cv_c_int64_t" case $ac_cv_c_int64_t in #( no|yes) ;; #( *) cat >>confdefs.h <<_ACEOF #define int64_t $ac_cv_c_int64_t _ACEOF ;; esac ac_fn_c_find_uintX_t "$LINENO" "64" "ac_cv_c_uint64_t" case $ac_cv_c_uint64_t in #( no|yes) ;; #( *) $as_echo "#define _UINT64_T 1" >>confdefs.h cat >>confdefs.h <<_ACEOF #define uint64_t $ac_cv_c_uint64_t _ACEOF ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking for long long int" >&5 $as_echo_n "checking for long long int... " >&6; } if ${ac_cv_type_long_long_int+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* For now, do not test the preprocessor; as of 2007 there are too many implementations with broken preprocessors. Perhaps this can be revisited in 2012. In the meantime, code should not expect #if to work with literals wider than 32 bits. */ /* Test literals. */ long long int ll = 9223372036854775807ll; long long int nll = -9223372036854775807LL; unsigned long long int ull = 18446744073709551615ULL; /* Test constant expressions. */ typedef int a[((-9223372036854775807LL < 0 && 0 < 9223372036854775807ll) ? 1 : -1)]; typedef int b[(18446744073709551615ULL <= (unsigned long long int) -1 ? 1 : -1)]; int i = 63; int main () { /* Test availability of runtime routines for shift and division. */ long long int llmax = 9223372036854775807ll; unsigned long long int ullmax = 18446744073709551615ull; return ((ll << 63) | (ll >> 63) | (ll < i) | (ll > i) | (llmax / ll) | (llmax % ll) | (ull << 63) | (ull >> 63) | (ull << i) | (ull >> i) | (ullmax / ull) | (ullmax % ull)); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : if test "$cross_compiling" = yes; then : ac_cv_type_long_long_int=yes else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #ifndef LLONG_MAX # define HALF \ (1LL << (sizeof (long long int) * CHAR_BIT - 2)) # define LLONG_MAX (HALF - 1 + HALF) #endif int main () { long long int n = 1; int i; for (i = 0; ; i++) { long long int m = n << i; if (m >> i != n) return 1; if (LLONG_MAX / 2 < m) break; } return 0; ; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : ac_cv_type_long_long_int=yes else ac_cv_type_long_long_int=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi else ac_cv_type_long_long_int=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_type_long_long_int" >&5 $as_echo "$ac_cv_type_long_long_int" >&6; } if test $ac_cv_type_long_long_int = yes; then $as_echo "#define HAVE_LONG_LONG_INT 1" >>confdefs.h fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for an ANSI C-conforming const" >&5 $as_echo_n "checking for an ANSI C-conforming const... " >&6; } if ${ac_cv_c_const+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { #ifndef __cplusplus /* Ultrix mips cc rejects this sort of thing. */ typedef int charset[2]; const charset cs = { 0, 0 }; /* SunOS 4.1.1 cc rejects this. */ char const *const *pcpcc; char **ppc; /* NEC SVR4.0.2 mips cc rejects this. */ struct point {int x, y;}; static struct point const zero = {0,0}; /* AIX XL C 1.02.0.0 rejects this. It does not let you subtract one const X* pointer from another in an arm of an if-expression whose if-part is not a constant expression */ const char *g = "string"; pcpcc = &g + (g ? g-g : 0); /* HPUX 7.0 cc rejects these. */ ++pcpcc; ppc = (char**) pcpcc; pcpcc = (char const *const *) ppc; { /* SCO 3.2v4 cc rejects this sort of thing. */ char tx; char *t = &tx; char const *s = 0 ? (char *) 0 : (char const *) 0; *t++ = 0; if (s) return 0; } { /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */ int x[] = {25, 17}; const int *foo = &x[0]; ++foo; } { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */ typedef const int *iptr; iptr p = 0; ++p; } { /* AIX XL C 1.02.0.0 rejects this sort of thing, saying "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */ struct s { int j; const int *ap[3]; } bx; struct s *b = &bx; b->j = 5; } { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ const int foo = 10; if (!foo) return 0; } return !cs[0] && !zero.x; #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_c_const=yes else ac_cv_c_const=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_const" >&5 $as_echo "$ac_cv_c_const" >&6; } if test $ac_cv_c_const = no; then $as_echo "#define const /**/" >>confdefs.h fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for inline" >&5 $as_echo_n "checking for inline... " >&6; } if ${ac_cv_c_inline+:} false; then : $as_echo_n "(cached) " >&6 else ac_cv_c_inline=no for ac_kw in inline __inline__ __inline; do cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifndef __cplusplus typedef int foo_t; static $ac_kw foo_t static_foo () {return 0; } $ac_kw foo_t foo () {return 0; } #endif _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_c_inline=$ac_kw fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext test "$ac_cv_c_inline" != no && break done fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_inline" >&5 $as_echo "$ac_cv_c_inline" >&6; } case $ac_cv_c_inline in inline | yes) ;; *) case $ac_cv_c_inline in no) ac_val=;; *) ac_val=$ac_cv_c_inline;; esac cat >>confdefs.h <<_ACEOF #ifndef __cplusplus #define inline $ac_val #endif _ACEOF ;; esac { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether time.h and sys/time.h may both be included" >&5 $as_echo_n "checking whether time.h and sys/time.h may both be included... " >&6; } if ${ac_cv_header_time+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #include int main () { if ((struct tm *) 0) return 0; ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_header_time=yes else ac_cv_header_time=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_time" >&5 $as_echo "$ac_cv_header_time" >&6; } if test $ac_cv_header_time = yes; then $as_echo "#define TIME_WITH_SYS_TIME 1" >>confdefs.h fi ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default" if test "x$ac_cv_type_size_t" = xyes; then : else cat >>confdefs.h <<_ACEOF #define size_t unsigned int _ACEOF fi if test "x$enable_openmp" = xyes; then OPENMP_CFLAGS= # Check whether --enable-openmp was given. if test "${enable_openmp+set}" = set; then : enableval=$enable_openmp; fi if test "$enable_openmp" != no; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to support OpenMP" >&5 $as_echo_n "checking for $CC option to support OpenMP... " >&6; } if ${ac_cv_prog_c_openmp+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifndef _OPENMP choke me #endif #include int main () { return omp_get_num_threads (); } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_prog_c_openmp='none needed' else ac_cv_prog_c_openmp='unsupported' for ac_option in -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \ -Popenmp --openmp; do ac_save_CFLAGS=$CFLAGS CFLAGS="$CFLAGS $ac_option" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifndef _OPENMP choke me #endif #include int main () { return omp_get_num_threads (); } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_prog_c_openmp=$ac_option fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext CFLAGS=$ac_save_CFLAGS if test "$ac_cv_prog_c_openmp" != unsupported; then break fi done fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_c_openmp" >&5 $as_echo "$ac_cv_prog_c_openmp" >&6; } case $ac_cv_prog_c_openmp in #( "none needed" | unsupported) ;; #( *) OPENMP_CFLAGS=$ac_cv_prog_c_openmp ;; esac fi fi ######################## # Enable asm redc code # ######################## # If --(en|dis)able-asm-redc not specified, choose default value # Test if asm redc code is available for this cpu. # Point ASMPATH to the correct subdirectory. # asm_redc enabled by default for x86_64 and 64 bit PowerPC if test "x$enable_asm_redc" = x; then case $host in x86_64*-*-* | powerpc-apple-darwin* | powerpc64-*-linux*) enable_asm_redc=yes;; *) enable_asm_redc=no;; esac fi if test "x$enable_asm_redc" = xyes; then case $host in pentium4-*-* | pentium3-*-* | viac7-*-* | i786-*-*) ASMPATH=pentium4;; x86_64*-*-*) # In case GMP has been compiled with a 32-bit ABI... # Use AC_COMPILE_IFELSE instead of AC_PREPROC_IFELSE, otherwise # GMP's CFLAGS doesn't seem to be taken into account. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #if defined(__i386__) #error #endif int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : else { $as_echo "$as_me:${as_lineno-$LINENO}: 32-bit ABI (i386), disabling asm-redc" >&5 $as_echo "$as_me: 32-bit ABI (i386), disabling asm-redc" >&6;} enable_asm_redc=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ASMPATH=x86_64;; # warning: with powerpc-apple-darwin* we can have ABI=32 # see bug #10646 on the bug tracker, where config.guess says # powerpc-apple-darwin8.11.0 (this a 64-bit machine, but most applications # are compiled in 32 bits). It works with --disable-asm-redc. powerpc-apple-darwin*) cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #if defined(__ppc__) #error #endif int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : else { $as_echo "$as_me:${as_lineno-$LINENO}: 32-bit PowerPC, disabling asm-redc" >&5 $as_echo "$as_me: 32-bit PowerPC, disabling asm-redc" >&6;} enable_asm_redc=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ASMPATH=powerpc64;; powerpc64-*-linux*) echo "include(<"$srcdir"/powerpc64/powerpc-defs.m4>)" >> $gmp_tmpconfigm4 ASMPATH=powerpc64;; i[56]86-*-* | k[78]*-*-* | athlon*-*-* | pentiumpro-*-* | \ pentium2-*-* | viac3*-*-* | i686-apple-darwin*) ASMPATH=athlon;; *) as_fn_error $? "asm redc not available on this machine $host" "$LINENO" 5;; esac fi if test "x$enable_asm_redc" = xyes; then # do the necessary definitions and includes $as_echo "#define USE_ASM_REDC 1" >>confdefs.h test "x$CCAS" != x || CCAS="$CC -c" { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suitable m4" >&5 $as_echo_n "checking for suitable m4... " >&6; } if ${gmp_cv_prog_m4+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$M4"; then gmp_cv_prog_m4="$M4" else cat >conftest.m4 <<\EOF define(dollarhash,``$#'')ifelse(dollarhash(x),1,`define(t1,Y)', ``bad: $# not supported (SunOS /usr/bin/m4) '')ifelse(eval(89),89,`define(t2,Y)', `bad: eval() doesnt support 8 or 9 in a constant (OpenBSD 2.6 m4) ')ifelse(t1`'t2,YY,`good ') EOF echo "trying m4" >&5 gmp_tmp_val=`(m4 conftest.m4) 2>&5` echo "$gmp_tmp_val" >&5 if test "$gmp_tmp_val" = good; then gmp_cv_prog_m4="m4" else IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH:/usr/5bin" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. echo "trying $ac_dir/m4" >&5 gmp_tmp_val=`($ac_dir/m4 conftest.m4) 2>&5` echo "$gmp_tmp_val" >&5 if test "$gmp_tmp_val" = good; then gmp_cv_prog_m4="$ac_dir/m4" break fi done IFS="$ac_save_ifs" if test -z "$gmp_cv_prog_m4"; then as_fn_error $? "No usable m4 in \$PATH or /usr/5bin (see config.log for reasons)." "$LINENO" 5 fi fi rm -f conftest.m4 fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_prog_m4" >&5 $as_echo "$gmp_cv_prog_m4" >&6; } M4="$gmp_cv_prog_m4" { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to switch to text section" >&5 $as_echo_n "checking how to switch to text section... " >&6; } if ${gmp_cv_asm_text+:} false; then : $as_echo_n "(cached) " >&6 else case $host in *-*-aix*) gmp_cv_asm_text=".csect .text[PR]" ;; *-*-hpux*) gmp_cv_asm_text=".code" ;; *) gmp_cv_asm_text=".text" ;; esac fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_text" >&5 $as_echo "$gmp_cv_asm_text" >&6; } echo "define(, <$gmp_cv_asm_text>)" >> $gmp_tmpconfigm4 { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to export a symbol" >&5 $as_echo_n "checking how to export a symbol... " >&6; } if ${gmp_cv_asm_globl+:} false; then : $as_echo_n "(cached) " >&6 else case $host in *-*-hpux*) gmp_cv_asm_globl=".export" ;; *) gmp_cv_asm_globl=".globl" ;; esac fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_globl" >&5 $as_echo "$gmp_cv_asm_globl" >&6; } echo "define(, <$gmp_cv_asm_globl>)" >> $gmp_tmpconfigm4 { $as_echo "$as_me:${as_lineno-$LINENO}: checking what assembly label suffix to use" >&5 $as_echo_n "checking what assembly label suffix to use... " >&6; } if ${gmp_cv_asm_label_suffix+:} false; then : $as_echo_n "(cached) " >&6 else case $host in # Empty is only for the HP-UX hppa assembler; hppa gas requires a colon. *-*-hpux*) gmp_cv_asm_label_suffix= ;; *) gmp_cv_asm_label_suffix=: ;; esac fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_label_suffix" >&5 $as_echo "$gmp_cv_asm_label_suffix" >&6; } echo "define(, <\$1$gmp_cv_asm_label_suffix>)" >> $gmp_tmpconfigm4 { $as_echo "$as_me:${as_lineno-$LINENO}: checking if globals are prefixed by underscore" >&5 $as_echo_n "checking if globals are prefixed by underscore... " >&6; } if ${gmp_cv_asm_underscore+:} false; then : $as_echo_n "(cached) " >&6 else cat >conftes1.c <conftes2.s <>conftes2.s <&5 (eval $gmp_compile) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then eval tmp_result$tmp_underscore=yes else eval tmp_result$tmp_underscore=no fi done if test $tmp_result_ = yes; then if test $tmp_result = yes; then as_fn_error $? "Test program unexpectedly links both with and without underscore." "$LINENO" 5 else gmp_cv_asm_underscore=yes fi else if test $tmp_result = yes; then gmp_cv_asm_underscore=no else as_fn_error $? "Test program links neither with nor without underscore." "$LINENO" 5 fi fi rm -f conftes1* conftes2* a.out fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_underscore" >&5 $as_echo "$gmp_cv_asm_underscore" >&6; } if test "$gmp_cv_asm_underscore" = "yes"; then echo 'define(, <_>)' >>$gmp_tmpconfigm4 else echo 'define(, <>)' >>$gmp_tmpconfigm4 fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to switch to text section" >&5 $as_echo_n "checking how to switch to text section... " >&6; } if ${gmp_cv_asm_text+:} false; then : $as_echo_n "(cached) " >&6 else case $host in *-*-aix*) gmp_cv_asm_text=".csect .text[PR]" ;; *-*-hpux*) gmp_cv_asm_text=".code" ;; *) gmp_cv_asm_text=".text" ;; esac fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_text" >&5 $as_echo "$gmp_cv_asm_text" >&6; } echo "define(, <$gmp_cv_asm_text>)" >> $gmp_tmpconfigm4 { $as_echo "$as_me:${as_lineno-$LINENO}: checking how to export a symbol" >&5 $as_echo_n "checking how to export a symbol... " >&6; } if ${gmp_cv_asm_globl+:} false; then : $as_echo_n "(cached) " >&6 else case $host in *-*-hpux*) gmp_cv_asm_globl=".export" ;; *) gmp_cv_asm_globl=".globl" ;; esac fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_globl" >&5 $as_echo "$gmp_cv_asm_globl" >&6; } echo "define(, <$gmp_cv_asm_globl>)" >> $gmp_tmpconfigm4 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler .type directive" >&5 $as_echo_n "checking for assembler .type directive... " >&6; } if ${gmp_cv_asm_type+:} false; then : $as_echo_n "(cached) " >&6 else gmp_cv_asm_type= for gmp_tmp_prefix in @ \# %; do cat >conftest.s <&5 (eval $gmp_assemble) 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then cat conftest.out >&5 if grep "\.type pseudo-op used outside of \.def/\.endef ignored" conftest.out >/dev/null; then : ; else gmp_cv_asm_type=".type \$1,${gmp_tmp_prefix}\$2" break fi else cat conftest.out >&5 echo "configure: failed program was:" >&5 cat conftest.s >&5 : fi rm -f conftest* done rm -f conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gmp_cv_asm_type" >&5 $as_echo "$gmp_cv_asm_type" >&6; } echo "define(, <$gmp_cv_asm_type>)" >> $gmp_tmpconfigm4 case $host in *-*-mingw32) echo 'define(, <1>)' >>$gmp_tmpconfigm4 $as_echo "#define WINDOWS64_ABI 1" >>confdefs.h ;; *) ;; esac case $host in pentium3-*-*) echo "WARNING: Your processor is recognized as Pentium3." echo " The asm code uses SSE2, and therefore it might" echo " fail if your proc is indeed a P3, and not a" echo " Pentium M. If you have compilation problems," echo " consider using --disable-asm-redc." ;; *) esac fi if test "x$enable_asm_redc" = xyes; then ENABLE_ASM_REDC_TRUE= ENABLE_ASM_REDC_FALSE='#' else ENABLE_ASM_REDC_TRUE='#' ENABLE_ASM_REDC_FALSE= fi ############################ # Enable SSE2 instructions # ############################ # Test if we should use SSE2 instructions and if the cpu supports them if test "x$enable_sse2" = "x"; then case $host in pentium4-*-* | viac7-*-* | i686-*-* | i786-*-*) enable_sse2=yes ;; esac fi # If the necessary predefines (__GNUC__ or __ICL, and __i386__) # are not set, SSE2 will never be compiled in, and we switch off # the SSE2 flag if test "x$enable_sse2" = xyes; then cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #if !defined(__GNUC__) && !defined(__ICL) || !defined(__i386__) #error #IRIXdoesnotexitaterrordirective #endif int main () { ; return 0; } _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : else { $as_echo "$as_me:${as_lineno-$LINENO}: Not using GCC or ICC, or not a 32-bit x86. SSE2 disabled" >&5 $as_echo "$as_me: Not using GCC or ICC, or not a 32-bit x86. SSE2 disabled" >&6;} enable_sse2=no fi rm -f conftest.err conftest.i conftest.$ac_ext fi if test "x$enable_sse2" = xyes; then # See if we need -msse2 to enable SSE2 instructions { $as_echo "$as_me:${as_lineno-$LINENO}: checking for SSE2 support" >&5 $as_echo_n "checking for SSE2 support... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { #if (defined(__GNUC__) || defined(__ICL)) && defined(__i386__) /* On some machines, a program without constraints may pass without -msse2 but those with constraints in spv.c fail, thus we test with constraints here. */ asm volatile ("pmuludq %%xmm2, %%xmm0" : : :"%xmm0"); #else #error #IRIXdoesnotexitaterrordirective #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else OLDCFLAGS="$CFLAGS" CFLAGS="$CFLAGS -msse2" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { #if (defined(__GNUC__) || defined(__ICL)) && defined(__i386__) /* On some machines, a program without constraints may pass without -msse2 but those with constraints in spv.c fail, thus we test with constraints here. */ asm volatile ("pmuludq %%xmm2, %%xmm0" : : :"%xmm0"); #else #error #IRIXdoesnotexitaterrordirective #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes, with -msse2" >&5 $as_echo "yes, with -msse2" >&6; } else CFLAGS="$OLDCFLAGS" enable_sse2=no { $as_echo "$as_me:${as_lineno-$LINENO}: result: not supported, SSE2 disabled" >&5 $as_echo "not supported, SSE2 disabled" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi if test "x$enable_sse2" = xyes; then $as_echo "#define HAVE_SSE2 1" >>confdefs.h fi ######################## # Add GWNUM if desired # ######################## if test "x$with_gwnum" != "x"; then if test "x$enable_openmp" = xyes; then as_fn_error $? "Woltman's GWNUM currently cannot be used together with OpenMP" "$LINENO" 5 fi as_ac_File=`$as_echo "ac_cv_file_$with_gwnum/gwnum.a" | $as_tr_sh` { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $with_gwnum/gwnum.a" >&5 $as_echo_n "checking for $with_gwnum/gwnum.a... " >&6; } if eval \${$as_ac_File+:} false; then : $as_echo_n "(cached) " >&6 else test "$cross_compiling" = yes && as_fn_error $? "cannot check for file existence when cross compiling" "$LINENO" 5 if test -r "$with_gwnum/gwnum.a"; then eval "$as_ac_File=yes" else eval "$as_ac_File=no" fi fi eval ac_res=\$$as_ac_File { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } if eval test \"x\$"$as_ac_File"\" = x"yes"; then : $as_echo "#define HAVE_GWNUM 1" >>confdefs.h GWLIB="$with_gwnum/gwnum.a -lpthread" CPPFLAGS="$CPPFLAGS -I$with_gwnum" else as_ac_File=`$as_echo "ac_cv_file_$with_gwnum/gwnum.lib" | $as_tr_sh` { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $with_gwnum/gwnum.lib" >&5 $as_echo_n "checking for $with_gwnum/gwnum.lib... " >&6; } if eval \${$as_ac_File+:} false; then : $as_echo_n "(cached) " >&6 else test "$cross_compiling" = yes && as_fn_error $? "cannot check for file existence when cross compiling" "$LINENO" 5 if test -r "$with_gwnum/gwnum.lib"; then eval "$as_ac_File=yes" else eval "$as_ac_File=no" fi fi eval ac_res=\$$as_ac_File { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 $as_echo "$ac_res" >&6; } if eval test \"x\$"$as_ac_File"\" = x"yes"; then : $as_echo "#define HAVE_GWNUM 1" >>confdefs.h GWLIB="$with_gwnum/gwnum.lib -lpthread" CPPFLAGS="$CPPFLAGS -I$with_gwnum" else with_gwnum= as_fn_error $? "Woltman's GWNUM library not found" "$LINENO" 5 fi fi fi if test "x$with_gwnum" != "x"; then WITH_GWNUM_TRUE= WITH_GWNUM_FALSE='#' else WITH_GWNUM_TRUE='#' WITH_GWNUM_FALSE= fi # The Ultrix 4.2 mips builtin alloca declared by alloca.h only works # for constant arguments. Useless! { $as_echo "$as_me:${as_lineno-$LINENO}: checking for working alloca.h" >&5 $as_echo_n "checking for working alloca.h... " >&6; } if ${ac_cv_working_alloca_h+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main () { char *p = (char *) alloca (2 * sizeof (int)); if (p) return 0; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_working_alloca_h=yes else ac_cv_working_alloca_h=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_working_alloca_h" >&5 $as_echo "$ac_cv_working_alloca_h" >&6; } if test $ac_cv_working_alloca_h = yes; then $as_echo "#define HAVE_ALLOCA_H 1" >>confdefs.h fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for alloca" >&5 $as_echo_n "checking for alloca... " >&6; } if ${ac_cv_func_alloca_works+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef __GNUC__ # define alloca __builtin_alloca #else # ifdef _MSC_VER # include # define alloca _alloca # else # ifdef HAVE_ALLOCA_H # include # else # ifdef _AIX #pragma alloca # else # ifndef alloca /* predefined by HP cc +Olibcalls */ void *alloca (size_t); # endif # endif # endif # endif #endif int main () { char *p = (char *) alloca (1); if (p) return 0; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_func_alloca_works=yes else ac_cv_func_alloca_works=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_alloca_works" >&5 $as_echo "$ac_cv_func_alloca_works" >&6; } if test $ac_cv_func_alloca_works = yes; then $as_echo "#define HAVE_ALLOCA 1" >>confdefs.h else # The SVR3 libPW and SVR4 libucb both contain incompatible functions # that cause trouble. Some versions do not even contain alloca or # contain a buggy version. If you still want to use their alloca, # use ar to extract alloca.o from them instead of compiling alloca.c. ALLOCA=\${LIBOBJDIR}alloca.$ac_objext $as_echo "#define C_ALLOCA 1" >>confdefs.h { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether \`alloca.c' needs Cray hooks" >&5 $as_echo_n "checking whether \`alloca.c' needs Cray hooks... " >&6; } if ${ac_cv_os_cray+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #if defined CRAY && ! defined CRAY2 webecray #else wenotbecray #endif _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "webecray" >/dev/null 2>&1; then : ac_cv_os_cray=yes else ac_cv_os_cray=no fi rm -f conftest* fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_os_cray" >&5 $as_echo "$ac_cv_os_cray" >&6; } if test $ac_cv_os_cray = yes; then for ac_func in _getb67 GETB67 getb67; do as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" if eval test \"x\$"$as_ac_var"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define CRAY_STACKSEG_END $ac_func _ACEOF break fi done fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking stack direction for C alloca" >&5 $as_echo_n "checking stack direction for C alloca... " >&6; } if ${ac_cv_c_stack_direction+:} false; then : $as_echo_n "(cached) " >&6 else if test "$cross_compiling" = yes; then : ac_cv_c_stack_direction=0 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_includes_default int find_stack_direction (int *addr, int depth) { int dir, dummy = 0; if (! addr) addr = &dummy; *addr = addr < &dummy ? 1 : addr == &dummy ? 0 : -1; dir = depth ? find_stack_direction (addr, depth - 1) : 0; return dir + dummy; } int main (int argc, char **argv) { return find_stack_direction (0, argc + !argv + 20) < 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : ac_cv_c_stack_direction=1 else ac_cv_c_stack_direction=-1 fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_stack_direction" >&5 $as_echo "$ac_cv_c_stack_direction" >&6; } cat >>confdefs.h <<_ACEOF #define STACK_DIRECTION $ac_cv_c_stack_direction _ACEOF fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 $as_echo_n "checking for ANSI C header files... " >&6; } if ${ac_cv_header_stdc+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #include #include int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_header_stdc=yes else ac_cv_header_stdc=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "memchr" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "free" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. if test "$cross_compiling" = yes; then : : else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #if ((' ' & 0x0FF) == 0x020) # define ISLOWER(c) ('a' <= (c) && (c) <= 'z') # define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) #else # define ISLOWER(c) \ (('a' <= (c) && (c) <= 'i') \ || ('j' <= (c) && (c) <= 'r') \ || ('s' <= (c) && (c) <= 'z')) # define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) #endif #define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) int main () { int i; for (i = 0; i < 256; i++) if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) return 2; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : else ac_cv_header_stdc=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 $as_echo "$ac_cv_header_stdc" >&6; } if test $ac_cv_header_stdc = yes; then $as_echo "#define STDC_HEADERS 1" >>confdefs.h fi for ac_header in math.h limits.h malloc.h strings.h sys/time.h unistd.h io.h signal.h fcntl.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done for ac_header in windows.h do : ac_fn_c_check_header_mongrel "$LINENO" "windows.h" "ac_cv_header_windows_h" "$ac_includes_default" if test "x$ac_cv_header_windows_h" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_WINDOWS_H 1 _ACEOF fi done for ac_header in ctype.h sys/types.h sys/resource.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done { $as_echo "$as_me:${as_lineno-$LINENO}: checking for working strtod" >&5 $as_echo_n "checking for working strtod... " >&6; } if ${ac_cv_func_strtod+:} false; then : $as_echo_n "(cached) " >&6 else if test "$cross_compiling" = yes; then : ac_cv_func_strtod=no else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ $ac_includes_default #ifndef strtod double strtod (); #endif int main() { { /* Some versions of Linux strtod mis-parse strings with leading '+'. */ char *string = " +69"; char *term; double value; value = strtod (string, &term); if (value != 69 || term != (string + 4)) return 1; } { /* Under Solaris 2.4, strtod returns the wrong value for the terminating character under some conditions. */ char *string = "NaN"; char *term; strtod (string, &term); if (term != string && *(term - 1) == 0) return 1; } return 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : ac_cv_func_strtod=yes else ac_cv_func_strtod=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_strtod" >&5 $as_echo "$ac_cv_func_strtod" >&6; } if test $ac_cv_func_strtod = no; then case " $LIBOBJS " in *" strtod.$ac_objext "* ) ;; *) LIBOBJS="$LIBOBJS strtod.$ac_objext" ;; esac ac_fn_c_check_func "$LINENO" "pow" "ac_cv_func_pow" if test "x$ac_cv_func_pow" = xyes; then : fi if test $ac_cv_func_pow = no; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pow in -lm" >&5 $as_echo_n "checking for pow in -lm... " >&6; } if ${ac_cv_lib_m_pow+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lm $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char pow (); int main () { return pow (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_m_pow=yes else ac_cv_lib_m_pow=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_pow" >&5 $as_echo "$ac_cv_lib_m_pow" >&6; } if test "x$ac_cv_lib_m_pow" = xyes; then : POW_LIB=-lm else { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cannot find library containing definition of pow" >&5 $as_echo "$as_me: WARNING: cannot find library containing definition of pow" >&2;} fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pow in -lm" >&5 $as_echo_n "checking for pow in -lm... " >&6; } if ${ac_cv_lib_m_pow+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lm $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char pow (); int main () { return pow (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_m_pow=yes else ac_cv_lib_m_pow=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_pow" >&5 $as_echo "$ac_cv_lib_m_pow" >&6; } if test "x$ac_cv_lib_m_pow" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_LIBM 1 _ACEOF LIBS="-lm $LIBS" else as_fn_error $? "required function missing" "$LINENO" 5 fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for floor in -lm" >&5 $as_echo_n "checking for floor in -lm... " >&6; } if ${ac_cv_lib_m_floor+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lm $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char floor (); int main () { return floor (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_m_floor=yes else ac_cv_lib_m_floor=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_floor" >&5 $as_echo "$ac_cv_lib_m_floor" >&6; } if test "x$ac_cv_lib_m_floor" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_LIBM 1 _ACEOF LIBS="-lm $LIBS" else as_fn_error $? "required function missing" "$LINENO" 5 fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sqrt in -lm" >&5 $as_echo_n "checking for sqrt in -lm... " >&6; } if ${ac_cv_lib_m_sqrt+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lm $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char sqrt (); int main () { return sqrt (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_m_sqrt=yes else ac_cv_lib_m_sqrt=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_sqrt" >&5 $as_echo "$ac_cv_lib_m_sqrt" >&6; } if test "x$ac_cv_lib_m_sqrt" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_LIBM 1 _ACEOF LIBS="-lm $LIBS" else as_fn_error $? "required function missing" "$LINENO" 5 fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for fmod in -lm" >&5 $as_echo_n "checking for fmod in -lm... " >&6; } if ${ac_cv_lib_m_fmod+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lm $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char fmod (); int main () { return fmod (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_m_fmod=yes else ac_cv_lib_m_fmod=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_fmod" >&5 $as_echo "$ac_cv_lib_m_fmod" >&6; } if test "x$ac_cv_lib_m_fmod" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_LIBM 1 _ACEOF LIBS="-lm $LIBS" else as_fn_error $? "required function missing" "$LINENO" 5 fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cos in -lm" >&5 $as_echo_n "checking for cos in -lm... " >&6; } if ${ac_cv_lib_m_cos+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lm $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char cos (); int main () { return cos (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_m_cos=yes else ac_cv_lib_m_cos=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_cos" >&5 $as_echo "$ac_cv_lib_m_cos" >&6; } if test "x$ac_cv_lib_m_cos" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_LIBM 1 _ACEOF LIBS="-lm $LIBS" fi GSL_LD_FLAGS= { $as_echo "$as_me:${as_lineno-$LINENO}: checking for cblas_dgemm in -lgslcblas" >&5 $as_echo_n "checking for cblas_dgemm in -lgslcblas... " >&6; } if ${ac_cv_lib_gslcblas_cblas_dgemm+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lgslcblas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char cblas_dgemm (); int main () { return cblas_dgemm (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_gslcblas_cblas_dgemm=yes else ac_cv_lib_gslcblas_cblas_dgemm=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gslcblas_cblas_dgemm" >&5 $as_echo "$ac_cv_lib_gslcblas_cblas_dgemm" >&6; } if test "x$ac_cv_lib_gslcblas_cblas_dgemm" = xyes; then : { $as_echo "$as_me:${as_lineno-$LINENO}: checking for gsl_blas_dgemm in -lgsl" >&5 $as_echo_n "checking for gsl_blas_dgemm in -lgsl... " >&6; } if ${ac_cv_lib_gsl_gsl_blas_dgemm+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lgsl -lgslcblas $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char gsl_blas_dgemm (); int main () { return gsl_blas_dgemm (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_gsl_gsl_blas_dgemm=yes else ac_cv_lib_gsl_gsl_blas_dgemm=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gsl_gsl_blas_dgemm" >&5 $as_echo "$ac_cv_lib_gsl_gsl_blas_dgemm" >&6; } if test "x$ac_cv_lib_gsl_gsl_blas_dgemm" = xyes; then : $as_echo "#define HAVE_LIBGSL 1" >>confdefs.h $as_echo "#define HAVE_LIBGSLCBLAS 1" >>confdefs.h GSL_LD_FLAGS="-lgsl -lgslcblas" fi fi for ac_func in isascii memset strchr strlen strncasecmp strstr do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" if eval test \"x\$"$as_ac_var"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 _ACEOF else as_fn_error $? "required function missing" "$LINENO" 5 fi done for ac_func in access unlink do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" if eval test \"x\$"$as_ac_var"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 _ACEOF else as_fn_error $? "required function missing" "$LINENO" 5 fi done for ac_func in isspace isdigit isxdigit do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" if eval test \"x\$"$as_ac_var"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 _ACEOF else as_fn_error $? "required function missing" "$LINENO" 5 fi done for ac_func in time ctime do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" if eval test \"x\$"$as_ac_var"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 _ACEOF else as_fn_error $? "required function missing" "$LINENO" 5 fi done for ac_func in setpriority nice gethostname gettimeofday getrusage memmove signal fcntl fileno do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" if eval test \"x\$"$as_ac_var"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 _ACEOF fi done for ac_func in malloc_usable_size do : ac_fn_c_check_func "$LINENO" "malloc_usable_size" "ac_cv_func_malloc_usable_size" if test "x$ac_cv_func_malloc_usable_size" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_MALLOC_USABLE_SIZE 1 _ACEOF fi done if test "x$GCC" = xyes && test "x$user_redefine_cc" != xyes; then case $CFLAGS in "-pedantic "* | *" -pedantic "* | *" -pedantic") ;; *) CFLAGS="-pedantic $CFLAGS" esac case $CFLAGS in "-Wundef "* | *" -Wundef "* | *" -Wundef") ;; *) CFLAGS="-Wundef $CFLAGS" esac case $CFLAGS in "-Wall "* | *" -Wall "* | *" -Wall") ;; *) CFLAGS="-Wall $CFLAGS" esac case $CFLAGS in "-W "* | *" -W "* | *" -W") ;; *) CFLAGS="-W $CFLAGS" esac # CFLAGS="-Wall -W -Wundef -pedantic $CFLAGS" fi if test -d "$with_gmp_include"; then CPPFLAGS="-I$with_gmp_include $CPPFLAGS" else with_gmp_include= fi for ac_header in gmp.h do : ac_fn_c_check_header_mongrel "$LINENO" "gmp.h" "ac_cv_header_gmp_h" "$ac_includes_default" if test "x$ac_cv_header_gmp_h" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_GMP_H 1 _ACEOF else as_fn_error $? "required header file missing" "$LINENO" 5 fi done { $as_echo "$as_me:${as_lineno-$LINENO}: checking for recent GMP" >&5 $as_echo_n "checking for recent GMP... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #if (__GNU_MP_VERSION <= 4) #error #IRIXdoesnotexitaterrordirective #endif _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } as_fn_error $? "GMP 5.0.0 or newer is required" "$LINENO" 5 fi rm -f conftest.err conftest.i conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: checking if GMP is MPIR" >&5 $as_echo_n "checking if GMP is MPIR... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #ifndef __MPIR_VERSION #error #IRIXdoesnotexitaterrordirective #endif _ACEOF if ac_fn_c_try_cpp "$LINENO"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } $as_echo "#define HAVE_MPIR 1" >>confdefs.h else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi rm -f conftest.err conftest.i conftest.$ac_ext GMPLDFLAGS="" if test -d "$with_gmp_lib"; then GMPLDFLAGS="-L$with_gmp_lib" fi GMPLIB="-lgmp" if test "x$enable_shared" != xyes; then if test -r "$with_gmp_lib/libgmp.a"; then GMPLIB="$with_gmp_lib/libgmp.a" GMPLDFLAGS="" fi fi LDFLAGS="$LDFLAGS $GMPLDFLAGS" { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we can link against GMP" >&5 $as_echo_n "checking whether we can link against GMP... " >&6; } LIBS_BACKUP="$LIBS" LIBS="$LIBS $GMPLIB" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #ifdef HAVE_GMP_H #include #endif int main () { mpz_t t; mpz_init(t); mpz_clear(t); return 0; ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } as_fn_error $? "Could not link against GMP library." "$LINENO" 5 fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext { $as_echo "$as_me:${as_lineno-$LINENO}: checking if gmp.h version and libgmp version are the same" >&5 $as_echo_n "checking if gmp.h version and libgmp version are the same... " >&6; } if test "$cross_compiling" = yes; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: cross-compiling: cannot test" >&5 $as_echo "cross-compiling: cannot test" >&6; } else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include #include int main () { char buffer[100]; if (__GNU_MP_VERSION == 4 && __GNU_MP_VERSION_MINOR <= 2 && __GNU_MP_VERSION_PATCHLEVEL == 0) sprintf (buffer, "%d.%d", __GNU_MP_VERSION, __GNU_MP_VERSION_MINOR); else sprintf (buffer, "%d.%d.%d", __GNU_MP_VERSION, __GNU_MP_VERSION_MINOR, __GNU_MP_VERSION_PATCHLEVEL); printf ("(%s/%s) ", buffer, gmp_version); return strcmp (buffer, gmp_version); ; return 0; } _ACEOF if ac_fn_c_try_run "$LINENO"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } as_fn_error $? "'gmp.h' and 'libgmp' have different versions, you have to reinstall GMP properly." "$LINENO" 5 fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ conftest.$ac_objext conftest.beam conftest.$ac_ext fi for ac_func in __gmpn_add_nc __gmpn_mod_34lsub1 __gmpn_redc_1 __gmpn_redc_2 do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" if eval test \"x\$"$as_ac_var"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 _ACEOF fi done for ac_func in __gmpn_mullo_n __gmpn_redc_n __gmpn_preinv_mod_1 do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" if eval test \"x\$"$as_ac_var"\" = x"yes"; then : cat >>confdefs.h <<_ACEOF #define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 _ACEOF fi done LIBS="$LIBS_BACKUP" { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler knows __attribute__((hot))" >&5 $as_echo_n "checking whether compiler knows __attribute__((hot))... " >&6; } ac_c_werror_flag=yes cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ void foo() __attribute__ ((hot)); void foo() {return;} int main () { foo() ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : $as_echo "#define ATTRIBUTE_HOT __attribute__ ((hot))" >>confdefs.h { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } else $as_echo "#define ATTRIBUTE_HOT " >>confdefs.h { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext # Extract the first word of "xsltproc", so it can be a program name with args. set dummy xsltproc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_XSLTPROC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$XSLTPROC"; then ac_cv_prog_XSLTPROC="$XSLTPROC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_XSLTPROC="xsltproc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi XSLTPROC=$ac_cv_prog_XSLTPROC if test -n "$XSLTPROC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $XSLTPROC" >&5 $as_echo "$XSLTPROC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$XSLTPROC" != x; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for docbook.xsl" >&5 $as_echo_n "checking for docbook.xsl... " >&6; } if test "x$XSLDIR" = x; then if test -d "/usr/local/share/sgml/docbook/xsl-stylesheets"; then XSLDIR="/usr/local/share/sgml/docbook/xsl-stylesheets" elif test -d "/usr/share/sgml/docbook/xsl-stylesheets"; then XSLDIR="/usr/share/sgml/docbook/xsl-stylesheets" elif test -d "/usr/local/share/docbook/"; then XSLDIR="/usr/local/share/docbook/" elif test -d "/usr/share/docbook/"; then XSLDIR="/usr/share/docbook/" fi fi if test -r "$XSLDIR/manpages/docbook.xsl"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } make_manpage="yes" else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test "x$make_manpage" = xyes; then MAKE_MANPAGE_TRUE= MAKE_MANPAGE_FALSE='#' else MAKE_MANPAGE_TRUE='#' MAKE_MANPAGE_FALSE= fi # Extract the first word of "valgrind", so it can be a program name with args. set dummy valgrind; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_VALGRIND+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$VALGRIND"; then ac_cv_prog_VALGRIND="$VALGRIND" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_VALGRIND="valgrind -q --error-exitcode=1" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi VALGRIND=$ac_cv_prog_VALGRIND if test -n "$VALGRIND"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $VALGRIND" >&5 $as_echo "$VALGRIND" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi case $host in athlon*-*-*) config_arch="athlon" ;; x86_64*-*-*) config_arch="athlon64" ;; pentium3-*-*) config_arch="pentium3" ;; pentium4-*-*) config_arch="pentium4" ;; pentium-m-*-*) config_arch="pentium-m" ;; alphaev6*-*-*) config_arch="alpha-ev6" ;; alphaev56*-*-*) config_arch="alpha-ev56" ;; alphaev5*-*-*) config_arch="alpha-ev5" ;; powerpc7450-*-*) config_arch="powerpc7450" ;; powerpc-apple-darwin* | powerpc64-*-*) config_arch="powerpc970" ;; mips64el-*-*) config_arch="mips64el" ;; armv5tel-*-*) config_arch="armv5tel" ;; sparc64-*-*) config_arch="sparc64" ;; ia64-*-*) config_arch="ia64" ;; hppa2.0-*-*) config_arch="hppa2.0" ;; *) config_arch="default" ;; esac # See if this is a Core 2, if we have /proc/cpuinfo core2warn=no if test x"$config_arch" = xathlon64; then if test -f /proc/cpuinfo; then if $EGREP -q "Core\(TM\)2" /proc/cpuinfo; then config_arch=core2 elif $EGREP -q "Core\(TM\) i5" /proc/cpuinfo; then config_arch=corei5 fi; else # No /proc/cpuinfo, tell user about ecm-params.h.core2 core2warn=yes fi; fi # See if this is a Pentium 4, if we have /proc/cpuinfo pentium4warn=no if test x"$config_arch" = xdefault; then if test -f /proc/cpuinfo; then if $EGREP -q "Pentium\(R\) 4" /proc/cpuinfo; then config_arch=pentium4 fi; else # No /proc/cpuinfo, tell user about ecm-params.h.pentium4 pentium4warn=yes fi; fi LIBS="$LIBS $GWLIB" echo "creating $gmp_configm4" echo "d""nl $gmp_configm4. Generated automatically by configure." > $gmp_configm4 if test -f $gmp_tmpconfigm4; then echo "changequote(<,>)" >> $gmp_configm4 echo "ifdef(<__CONFIG_M4_INCLUDED__>,,<" >> $gmp_configm4 cat $gmp_tmpconfigm4 >> $gmp_configm4 echo ">)" >> $gmp_configm4 echo "changequote(\`,')" >> $gmp_configm4 rm $gmp_tmpconfigm4 fi echo "ifdef(\`__CONFIG_M4_INCLUDED__',,\`" >> $gmp_configm4 if test -f $gmp_tmpconfigm4i; then cat $gmp_tmpconfigm4i >> $gmp_configm4 rm $gmp_tmpconfigm4i fi if test -f $gmp_tmpconfigm4p; then cat $gmp_tmpconfigm4p >> $gmp_configm4 rm $gmp_tmpconfigm4p fi echo "')" >> $gmp_configm4 echo "define(\`__CONFIG_M4_INCLUDED__')" >> $gmp_configm4 ac_config_files="$ac_config_files Makefile athlon/Makefile pentium4/Makefile x86_64/Makefile powerpc64/Makefile build.vc10/Makefile build.vc10/assembler/Makefile build.vc10/ecm/Makefile build.vc10/libecm/Makefile build.vc10/tune/Makefile build.vc10/bench_mulredc/Makefile" ac_config_links="$ac_config_links ecm-params.h:ecm-params.h.$config_arch" MUL_FFT_PARAMS="mul_fft-params.h.$config_arch" if ! test -f "$MUL_FFT_PARAMS" then MUL_FFT_PARAMS="mul_fft-params.h.default" fi ac_config_links="$ac_config_links mul_fft-params.h:"$MUL_FFT_PARAMS"" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs, see configure's option --config-cache. # It is not useful on other systems. If it contains results you don't # want to keep, you may remove or edit it. # # config.status only pays attention to the cache file if you give it # the --recheck option to rerun configure. # # `ac_cv_env_foo' variables (set or unset) will be overridden when # loading this file, other *unset* `ac_cv_foo' will be assigned the # following values. _ACEOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, we kill variables containing newlines. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. ( for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) { eval $ac_var=; unset $ac_var;} ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space=' '; set) 2>&1` in #( *${as_nl}ac_space=\ *) # `set' does not quote correctly, so add quotes: double-quote # substitution turns \\\\ into \\, and sed turns \\ into \. sed -n \ "s/'/'\\\\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" ;; #( *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) | sed ' /^ac_cv_env_/b end t clear :clear s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ t end s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ :end' >>confcache if diff "$cache_file" confcache >/dev/null 2>&1; then :; else if test -w "$cache_file"; then if test "x$cache_file" != "x/dev/null"; then { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 $as_echo "$as_me: updating cache $cache_file" >&6;} if test ! -f "$cache_file" || test -h "$cache_file"; then cat confcache >"$cache_file" else case $cache_file in #( */* | ?:*) mv -f confcache "$cache_file"$$ && mv -f "$cache_file"$$ "$cache_file" ;; #( *) mv -f confcache "$cache_file" ;; esac fi fi else { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 $as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} fi fi rm -f confcache test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' DEFS=-DHAVE_CONFIG_H ac_libobjs= ac_ltlibobjs= U= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' ac_i=`$as_echo "$ac_i" | sed "$ac_script"` # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR # will be set to the directory where LIBOBJS objects are built. as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' done LIBOBJS=$ac_libobjs LTLIBOBJS=$ac_ltlibobjs if test -n "$EXEEXT"; then am__EXEEXT_TRUE= am__EXEEXT_FALSE='#' else am__EXEEXT_TRUE='#' am__EXEEXT_FALSE= fi if test -n "$EXEEXT"; then am__EXEEXT_TRUE= am__EXEEXT_FALSE='#' else am__EXEEXT_TRUE='#' am__EXEEXT_FALSE= fi if test -z "${MEMORY_DEBUG_TRUE}" && test -z "${MEMORY_DEBUG_FALSE}"; then as_fn_error $? "conditional \"MEMORY_DEBUG\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then as_fn_error $? "conditional \"AMDEP\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then as_fn_error $? "conditional \"am__fastdepCC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then as_fn_error $? "conditional \"am__fastdepCC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${am__fastdepCCAS_TRUE}" && test -z "${am__fastdepCCAS_FALSE}"; then as_fn_error $? "conditional \"am__fastdepCCAS\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${ENABLE_ASM_REDC_TRUE}" && test -z "${ENABLE_ASM_REDC_FALSE}"; then as_fn_error $? "conditional \"ENABLE_ASM_REDC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${WITH_GWNUM_TRUE}" && test -z "${WITH_GWNUM_FALSE}"; then as_fn_error $? "conditional \"WITH_GWNUM\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi if test -z "${MAKE_MANPAGE_TRUE}" && test -z "${MAKE_MANPAGE_FALSE}"; then as_fn_error $? "conditional \"MAKE_MANPAGE\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi : "${CONFIG_STATUS=./config.status}" ac_write_fail=0 ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" { $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 $as_echo "$as_me: creating $CONFIG_STATUS" >&6;} as_write_fail=0 cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 #! $SHELL # Generated by $as_me. # Run this file to recreate the current configuration. # Compiler output produced by configure, useful for debugging # configure, is in config.log if it exists. debug=false ac_cs_recheck=false ac_cs_silent=false SHELL=\${CONFIG_SHELL-$SHELL} export SHELL _ASEOF cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo # Prefer a ksh shell builtin over an external printf program on Solaris, # but without wasting forks for bash or zsh. if test -z "$BASH_VERSION$ZSH_VERSION" \ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='print -r --' as_echo_n='print -rn --' elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in #( *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi # Unset variables that we do not need and which cause bugs (e.g. in # pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" # suppresses any "Segmentation fault" message there. '((' could # trigger a bug in pdksh 5.2.14. for as_var in BASH_ENV ENV MAIL MAILPATH do eval test x\${$as_var+set} = xset \ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # CDPATH. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are # provided, also output the error to LOG_FD, referencing LINENO. Then exit the # script with STATUS, using 1 if that was 0. as_fn_error () { as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi $as_echo "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. as_fn_set_status () { return $1 } # as_fn_set_status # as_fn_exit STATUS # ----------------- # Exit the shell with STATUS, even in a "trap 0" or "set -e" context. as_fn_exit () { set +e as_fn_set_status $1 exit $1 } # as_fn_exit # as_fn_unset VAR # --------------- # Portably unset VAR. as_fn_unset () { { eval $1=; unset $1;} } as_unset=as_fn_unset # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : eval 'as_fn_append () { eval $1+=\$2 }' else as_fn_append () { eval $1=\$$1\$2 } fi # as_fn_append # as_fn_arith ARG... # ------------------ # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : eval 'as_fn_arith () { as_val=$(( $* )) }' else as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) case `echo 'xy\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. xy) ECHO_C='\c';; *) echo `echo ksh88 bug on AIX 6.1` > /dev/null ECHO_T=' ';; esac;; *) ECHO_N='-n';; esac rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -pR' fi else as_ln_s='cp -pR' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null # as_fn_mkdir_p # ------------- # Create "$as_dir" as a directory, including parents if necessary. as_fn_mkdir_p () { case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || eval $as_mkdir_p || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" } # as_fn_mkdir_p if mkdir -p . 2>/dev/null; then as_mkdir_p='mkdir -p "$as_dir"' else test -d ./-p && rmdir ./-p as_mkdir_p=false fi # as_fn_executable_p FILE # ----------------------- # Test if FILE is an executable regular file. as_fn_executable_p () { test -f "$1" && test -x "$1" } # as_fn_executable_p as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" exec 6>&1 ## ----------------------------------- ## ## Main body of $CONFIG_STATUS script. ## ## ----------------------------------- ## _ASEOF test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Save the log message, to keep $0 and so on meaningful, and to # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" This file was extended by ecm $as_me 6.4.4, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS CONFIG_LINKS = $CONFIG_LINKS CONFIG_COMMANDS = $CONFIG_COMMANDS $ $0 $@ on `(hostname || uname -n) 2>/dev/null | sed 1q` " _ACEOF case $ac_config_files in *" "*) set x $ac_config_files; shift; ac_config_files=$*;; esac case $ac_config_headers in *" "*) set x $ac_config_headers; shift; ac_config_headers=$*;; esac cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 # Files that config.status was made for. config_files="$ac_config_files" config_headers="$ac_config_headers" config_links="$ac_config_links" config_commands="$ac_config_commands" _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ac_cs_usage="\ \`$as_me' instantiates files and other configuration actions from templates according to the current configuration. Unless the files and actions are specified as TAGs, all are instantiated by default. Usage: $0 [OPTION]... [TAG]... -h, --help print this help, then exit -V, --version print version number and configuration settings, then exit --config print configuration, then exit -q, --quiet, --silent do not print progress messages -d, --debug don't remove temporary files --recheck update $as_me by reconfiguring in the same conditions --file=FILE[:TEMPLATE] instantiate the configuration file FILE --header=FILE[:TEMPLATE] instantiate the configuration header FILE Configuration files: $config_files Configuration headers: $config_headers Configuration links: $config_links Configuration commands: $config_commands Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ ecm config.status 6.4.4 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" Copyright (C) 2012 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." ac_pwd='$ac_pwd' srcdir='$srcdir' INSTALL='$INSTALL' MKDIR_P='$MKDIR_P' AWK='$AWK' test -n "\$AWK" || AWK=awk _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # The default lists apply if the user does not specify any file. ac_need_defaults=: while test $# != 0 do case $1 in --*=?*) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` ac_shift=: ;; --*=) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg= ac_shift=: ;; *) ac_option=$1 ac_optarg=$2 ac_shift=shift ;; esac case $ac_option in # Handling of the options. -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) $as_echo "$ac_cs_version"; exit ;; --config | --confi | --conf | --con | --co | --c ) $as_echo "$ac_cs_config"; exit ;; --debug | --debu | --deb | --de | --d | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift case $ac_optarg in *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; '') as_fn_error $? "missing file argument" ;; esac as_fn_append CONFIG_FILES " '$ac_optarg'" ac_need_defaults=false;; --header | --heade | --head | --hea ) $ac_shift case $ac_optarg in *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; esac as_fn_append CONFIG_HEADERS " '$ac_optarg'" ac_need_defaults=false;; --he | --h) # Conflict between --help and --header as_fn_error $? "ambiguous option: \`$1' Try \`$0 --help' for more information.";; --help | --hel | -h ) $as_echo "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; # This is an error. -*) as_fn_error $? "unrecognized option: \`$1' Try \`$0 --help' for more information." ;; *) as_fn_append ac_config_targets " $1" ac_need_defaults=false ;; esac shift done ac_configure_extra_args= if $ac_cs_silent; then exec 6>/dev/null ac_configure_extra_args="$ac_configure_extra_args --silent" fi _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 if \$ac_cs_recheck; then set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion shift \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 CONFIG_SHELL='$SHELL' export CONFIG_SHELL exec "\$@" fi _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 exec 5>>config.log { echo sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX $as_echo "$ac_log" } >&5 _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 # # INIT-COMMANDS # AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir" # The HP-UX ksh and POSIX shell print the target directory to stdout # if CDPATH is set. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH sed_quote_subst='$sed_quote_subst' double_quote_subst='$double_quote_subst' delay_variable_subst='$delay_variable_subst' macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`' macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`' enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`' enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`' pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`' enable_fast_install='`$ECHO "$enable_fast_install" | $SED "$delay_single_quote_subst"`' SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`' ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`' PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`' host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`' host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`' host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`' build_alias='`$ECHO "$build_alias" | $SED "$delay_single_quote_subst"`' build='`$ECHO "$build" | $SED "$delay_single_quote_subst"`' build_os='`$ECHO "$build_os" | $SED "$delay_single_quote_subst"`' SED='`$ECHO "$SED" | $SED "$delay_single_quote_subst"`' Xsed='`$ECHO "$Xsed" | $SED "$delay_single_quote_subst"`' GREP='`$ECHO "$GREP" | $SED "$delay_single_quote_subst"`' EGREP='`$ECHO "$EGREP" | $SED "$delay_single_quote_subst"`' FGREP='`$ECHO "$FGREP" | $SED "$delay_single_quote_subst"`' LD='`$ECHO "$LD" | $SED "$delay_single_quote_subst"`' NM='`$ECHO "$NM" | $SED "$delay_single_quote_subst"`' LN_S='`$ECHO "$LN_S" | $SED "$delay_single_quote_subst"`' max_cmd_len='`$ECHO "$max_cmd_len" | $SED "$delay_single_quote_subst"`' ac_objext='`$ECHO "$ac_objext" | $SED "$delay_single_quote_subst"`' exeext='`$ECHO "$exeext" | $SED "$delay_single_quote_subst"`' lt_unset='`$ECHO "$lt_unset" | $SED "$delay_single_quote_subst"`' lt_SP2NL='`$ECHO "$lt_SP2NL" | $SED "$delay_single_quote_subst"`' lt_NL2SP='`$ECHO "$lt_NL2SP" | $SED "$delay_single_quote_subst"`' lt_cv_to_host_file_cmd='`$ECHO "$lt_cv_to_host_file_cmd" | $SED "$delay_single_quote_subst"`' lt_cv_to_tool_file_cmd='`$ECHO "$lt_cv_to_tool_file_cmd" | $SED "$delay_single_quote_subst"`' reload_flag='`$ECHO "$reload_flag" | $SED "$delay_single_quote_subst"`' reload_cmds='`$ECHO "$reload_cmds" | $SED "$delay_single_quote_subst"`' OBJDUMP='`$ECHO "$OBJDUMP" | $SED "$delay_single_quote_subst"`' deplibs_check_method='`$ECHO "$deplibs_check_method" | $SED "$delay_single_quote_subst"`' file_magic_cmd='`$ECHO "$file_magic_cmd" | $SED "$delay_single_quote_subst"`' file_magic_glob='`$ECHO "$file_magic_glob" | $SED "$delay_single_quote_subst"`' want_nocaseglob='`$ECHO "$want_nocaseglob" | $SED "$delay_single_quote_subst"`' DLLTOOL='`$ECHO "$DLLTOOL" | $SED "$delay_single_quote_subst"`' sharedlib_from_linklib_cmd='`$ECHO "$sharedlib_from_linklib_cmd" | $SED "$delay_single_quote_subst"`' AR='`$ECHO "$AR" | $SED "$delay_single_quote_subst"`' AR_FLAGS='`$ECHO "$AR_FLAGS" | $SED "$delay_single_quote_subst"`' archiver_list_spec='`$ECHO "$archiver_list_spec" | $SED "$delay_single_quote_subst"`' STRIP='`$ECHO "$STRIP" | $SED "$delay_single_quote_subst"`' RANLIB='`$ECHO "$RANLIB" | $SED "$delay_single_quote_subst"`' old_postinstall_cmds='`$ECHO "$old_postinstall_cmds" | $SED "$delay_single_quote_subst"`' old_postuninstall_cmds='`$ECHO "$old_postuninstall_cmds" | $SED "$delay_single_quote_subst"`' old_archive_cmds='`$ECHO "$old_archive_cmds" | $SED "$delay_single_quote_subst"`' lock_old_archive_extraction='`$ECHO "$lock_old_archive_extraction" | $SED "$delay_single_quote_subst"`' CC='`$ECHO "$CC" | $SED "$delay_single_quote_subst"`' CFLAGS='`$ECHO "$CFLAGS" | $SED "$delay_single_quote_subst"`' compiler='`$ECHO "$compiler" | $SED "$delay_single_quote_subst"`' GCC='`$ECHO "$GCC" | $SED "$delay_single_quote_subst"`' lt_cv_sys_global_symbol_pipe='`$ECHO "$lt_cv_sys_global_symbol_pipe" | $SED "$delay_single_quote_subst"`' lt_cv_sys_global_symbol_to_cdecl='`$ECHO "$lt_cv_sys_global_symbol_to_cdecl" | $SED "$delay_single_quote_subst"`' lt_cv_sys_global_symbol_to_c_name_address='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address" | $SED "$delay_single_quote_subst"`' lt_cv_sys_global_symbol_to_c_name_address_lib_prefix='`$ECHO "$lt_cv_sys_global_symbol_to_c_name_address_lib_prefix" | $SED "$delay_single_quote_subst"`' nm_file_list_spec='`$ECHO "$nm_file_list_spec" | $SED "$delay_single_quote_subst"`' lt_sysroot='`$ECHO "$lt_sysroot" | $SED "$delay_single_quote_subst"`' objdir='`$ECHO "$objdir" | $SED "$delay_single_quote_subst"`' MAGIC_CMD='`$ECHO "$MAGIC_CMD" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_no_builtin_flag='`$ECHO "$lt_prog_compiler_no_builtin_flag" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_pic='`$ECHO "$lt_prog_compiler_pic" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_wl='`$ECHO "$lt_prog_compiler_wl" | $SED "$delay_single_quote_subst"`' lt_prog_compiler_static='`$ECHO "$lt_prog_compiler_static" | $SED "$delay_single_quote_subst"`' lt_cv_prog_compiler_c_o='`$ECHO "$lt_cv_prog_compiler_c_o" | $SED "$delay_single_quote_subst"`' need_locks='`$ECHO "$need_locks" | $SED "$delay_single_quote_subst"`' MANIFEST_TOOL='`$ECHO "$MANIFEST_TOOL" | $SED "$delay_single_quote_subst"`' DSYMUTIL='`$ECHO "$DSYMUTIL" | $SED "$delay_single_quote_subst"`' NMEDIT='`$ECHO "$NMEDIT" | $SED "$delay_single_quote_subst"`' LIPO='`$ECHO "$LIPO" | $SED "$delay_single_quote_subst"`' OTOOL='`$ECHO "$OTOOL" | $SED "$delay_single_quote_subst"`' OTOOL64='`$ECHO "$OTOOL64" | $SED "$delay_single_quote_subst"`' libext='`$ECHO "$libext" | $SED "$delay_single_quote_subst"`' shrext_cmds='`$ECHO "$shrext_cmds" | $SED "$delay_single_quote_subst"`' extract_expsyms_cmds='`$ECHO "$extract_expsyms_cmds" | $SED "$delay_single_quote_subst"`' archive_cmds_need_lc='`$ECHO "$archive_cmds_need_lc" | $SED "$delay_single_quote_subst"`' enable_shared_with_static_runtimes='`$ECHO "$enable_shared_with_static_runtimes" | $SED "$delay_single_quote_subst"`' export_dynamic_flag_spec='`$ECHO "$export_dynamic_flag_spec" | $SED "$delay_single_quote_subst"`' whole_archive_flag_spec='`$ECHO "$whole_archive_flag_spec" | $SED "$delay_single_quote_subst"`' compiler_needs_object='`$ECHO "$compiler_needs_object" | $SED "$delay_single_quote_subst"`' old_archive_from_new_cmds='`$ECHO "$old_archive_from_new_cmds" | $SED "$delay_single_quote_subst"`' old_archive_from_expsyms_cmds='`$ECHO "$old_archive_from_expsyms_cmds" | $SED "$delay_single_quote_subst"`' archive_cmds='`$ECHO "$archive_cmds" | $SED "$delay_single_quote_subst"`' archive_expsym_cmds='`$ECHO "$archive_expsym_cmds" | $SED "$delay_single_quote_subst"`' module_cmds='`$ECHO "$module_cmds" | $SED "$delay_single_quote_subst"`' module_expsym_cmds='`$ECHO "$module_expsym_cmds" | $SED "$delay_single_quote_subst"`' with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`' allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED "$delay_single_quote_subst"`' no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED "$delay_single_quote_subst"`' hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED "$delay_single_quote_subst"`' hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED "$delay_single_quote_subst"`' hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`' hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED "$delay_single_quote_subst"`' hardcode_minus_L='`$ECHO "$hardcode_minus_L" | $SED "$delay_single_quote_subst"`' hardcode_shlibpath_var='`$ECHO "$hardcode_shlibpath_var" | $SED "$delay_single_quote_subst"`' hardcode_automatic='`$ECHO "$hardcode_automatic" | $SED "$delay_single_quote_subst"`' inherit_rpath='`$ECHO "$inherit_rpath" | $SED "$delay_single_quote_subst"`' link_all_deplibs='`$ECHO "$link_all_deplibs" | $SED "$delay_single_quote_subst"`' always_export_symbols='`$ECHO "$always_export_symbols" | $SED "$delay_single_quote_subst"`' export_symbols_cmds='`$ECHO "$export_symbols_cmds" | $SED "$delay_single_quote_subst"`' exclude_expsyms='`$ECHO "$exclude_expsyms" | $SED "$delay_single_quote_subst"`' include_expsyms='`$ECHO "$include_expsyms" | $SED "$delay_single_quote_subst"`' prelink_cmds='`$ECHO "$prelink_cmds" | $SED "$delay_single_quote_subst"`' postlink_cmds='`$ECHO "$postlink_cmds" | $SED "$delay_single_quote_subst"`' file_list_spec='`$ECHO "$file_list_spec" | $SED "$delay_single_quote_subst"`' variables_saved_for_relink='`$ECHO "$variables_saved_for_relink" | $SED "$delay_single_quote_subst"`' need_lib_prefix='`$ECHO "$need_lib_prefix" | $SED "$delay_single_quote_subst"`' need_version='`$ECHO "$need_version" | $SED "$delay_single_quote_subst"`' version_type='`$ECHO "$version_type" | $SED "$delay_single_quote_subst"`' runpath_var='`$ECHO "$runpath_var" | $SED "$delay_single_quote_subst"`' shlibpath_var='`$ECHO "$shlibpath_var" | $SED "$delay_single_quote_subst"`' shlibpath_overrides_runpath='`$ECHO "$shlibpath_overrides_runpath" | $SED "$delay_single_quote_subst"`' libname_spec='`$ECHO "$libname_spec" | $SED "$delay_single_quote_subst"`' library_names_spec='`$ECHO "$library_names_spec" | $SED "$delay_single_quote_subst"`' soname_spec='`$ECHO "$soname_spec" | $SED "$delay_single_quote_subst"`' install_override_mode='`$ECHO "$install_override_mode" | $SED "$delay_single_quote_subst"`' postinstall_cmds='`$ECHO "$postinstall_cmds" | $SED "$delay_single_quote_subst"`' postuninstall_cmds='`$ECHO "$postuninstall_cmds" | $SED "$delay_single_quote_subst"`' finish_cmds='`$ECHO "$finish_cmds" | $SED "$delay_single_quote_subst"`' finish_eval='`$ECHO "$finish_eval" | $SED "$delay_single_quote_subst"`' hardcode_into_libs='`$ECHO "$hardcode_into_libs" | $SED "$delay_single_quote_subst"`' sys_lib_search_path_spec='`$ECHO "$sys_lib_search_path_spec" | $SED "$delay_single_quote_subst"`' sys_lib_dlsearch_path_spec='`$ECHO "$sys_lib_dlsearch_path_spec" | $SED "$delay_single_quote_subst"`' hardcode_action='`$ECHO "$hardcode_action" | $SED "$delay_single_quote_subst"`' enable_dlopen='`$ECHO "$enable_dlopen" | $SED "$delay_single_quote_subst"`' enable_dlopen_self='`$ECHO "$enable_dlopen_self" | $SED "$delay_single_quote_subst"`' enable_dlopen_self_static='`$ECHO "$enable_dlopen_self_static" | $SED "$delay_single_quote_subst"`' old_striplib='`$ECHO "$old_striplib" | $SED "$delay_single_quote_subst"`' striplib='`$ECHO "$striplib" | $SED "$delay_single_quote_subst"`' LTCC='$LTCC' LTCFLAGS='$LTCFLAGS' compiler='$compiler_DEFAULT' # A function that is used when there is no print builtin or printf. func_fallback_echo () { eval 'cat <<_LTECHO_EOF \$1 _LTECHO_EOF' } # Quote evaled strings. for var in SHELL \ ECHO \ PATH_SEPARATOR \ SED \ GREP \ EGREP \ FGREP \ LD \ NM \ LN_S \ lt_SP2NL \ lt_NL2SP \ reload_flag \ OBJDUMP \ deplibs_check_method \ file_magic_cmd \ file_magic_glob \ want_nocaseglob \ DLLTOOL \ sharedlib_from_linklib_cmd \ AR \ AR_FLAGS \ archiver_list_spec \ STRIP \ RANLIB \ CC \ CFLAGS \ compiler \ lt_cv_sys_global_symbol_pipe \ lt_cv_sys_global_symbol_to_cdecl \ lt_cv_sys_global_symbol_to_c_name_address \ lt_cv_sys_global_symbol_to_c_name_address_lib_prefix \ nm_file_list_spec \ lt_prog_compiler_no_builtin_flag \ lt_prog_compiler_pic \ lt_prog_compiler_wl \ lt_prog_compiler_static \ lt_cv_prog_compiler_c_o \ need_locks \ MANIFEST_TOOL \ DSYMUTIL \ NMEDIT \ LIPO \ OTOOL \ OTOOL64 \ shrext_cmds \ export_dynamic_flag_spec \ whole_archive_flag_spec \ compiler_needs_object \ with_gnu_ld \ allow_undefined_flag \ no_undefined_flag \ hardcode_libdir_flag_spec \ hardcode_libdir_separator \ exclude_expsyms \ include_expsyms \ file_list_spec \ variables_saved_for_relink \ libname_spec \ library_names_spec \ soname_spec \ install_override_mode \ finish_eval \ old_striplib \ striplib; do case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in *[\\\\\\\`\\"\\\$]*) eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" ;; *) eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" ;; esac done # Double-quote double-evaled strings. for var in reload_cmds \ old_postinstall_cmds \ old_postuninstall_cmds \ old_archive_cmds \ extract_expsyms_cmds \ old_archive_from_new_cmds \ old_archive_from_expsyms_cmds \ archive_cmds \ archive_expsym_cmds \ module_cmds \ module_expsym_cmds \ export_symbols_cmds \ prelink_cmds \ postlink_cmds \ postinstall_cmds \ postuninstall_cmds \ finish_cmds \ sys_lib_search_path_spec \ sys_lib_dlsearch_path_spec; do case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in *[\\\\\\\`\\"\\\$]*) eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" ;; *) eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" ;; esac done ac_aux_dir='$ac_aux_dir' xsi_shell='$xsi_shell' lt_shell_append='$lt_shell_append' # See if we are running on zsh, and set the options which allow our # commands through without removal of \ escapes INIT. if test -n "\${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi PACKAGE='$PACKAGE' VERSION='$VERSION' TIMESTAMP='$TIMESTAMP' RM='$RM' ofile='$ofile' _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Handling of arguments. for ac_config_target in $ac_config_targets do case $ac_config_target in "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; "athlon/Makefile") CONFIG_FILES="$CONFIG_FILES athlon/Makefile" ;; "pentium4/Makefile") CONFIG_FILES="$CONFIG_FILES pentium4/Makefile" ;; "x86_64/Makefile") CONFIG_FILES="$CONFIG_FILES x86_64/Makefile" ;; "powerpc64/Makefile") CONFIG_FILES="$CONFIG_FILES powerpc64/Makefile" ;; "build.vc10/Makefile") CONFIG_FILES="$CONFIG_FILES build.vc10/Makefile" ;; "build.vc10/assembler/Makefile") CONFIG_FILES="$CONFIG_FILES build.vc10/assembler/Makefile" ;; "build.vc10/ecm/Makefile") CONFIG_FILES="$CONFIG_FILES build.vc10/ecm/Makefile" ;; "build.vc10/libecm/Makefile") CONFIG_FILES="$CONFIG_FILES build.vc10/libecm/Makefile" ;; "build.vc10/tune/Makefile") CONFIG_FILES="$CONFIG_FILES build.vc10/tune/Makefile" ;; "build.vc10/bench_mulredc/Makefile") CONFIG_FILES="$CONFIG_FILES build.vc10/bench_mulredc/Makefile" ;; "ecm-params.h") CONFIG_LINKS="$CONFIG_LINKS ecm-params.h:ecm-params.h.$config_arch" ;; "mul_fft-params.h") CONFIG_LINKS="$CONFIG_LINKS mul_fft-params.h:"$MUL_FFT_PARAMS"" ;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; esac done # If the user did not use the arguments to specify the items to instantiate, # then the envvar interface is used. Set only those that are not. # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers test "${CONFIG_LINKS+set}" = set || CONFIG_LINKS=$config_links test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands fi # Have a temporary directory for convenience. Make it in the build tree # simply because there is no reason against having it here, and in addition, # creating and moving files from /tmp can sometimes cause problems. # Hook for its removal unless debugging. # Note that there is a small window in which the directory will not be cleaned: # after its creation but before its name has been assigned to `$tmp'. $debug || { tmp= ac_tmp= trap 'exit_status=$? : "${ac_tmp:=$tmp}" { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status ' 0 trap 'as_fn_exit 1' 1 2 13 15 } # Create a (secure) tmp directory for tmp files. { tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && test -d "$tmp" } || { tmp=./conf$$-$RANDOM (umask 077 && mkdir "$tmp") } || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 ac_tmp=$tmp # Set up the scripts for CONFIG_FILES section. # No need to generate them if there are no CONFIG_FILES. # This happens for instance with `./config.status config.h'. if test -n "$CONFIG_FILES"; then ac_cr=`echo X | tr X '\015'` # On cygwin, bash can eat \r inside `` if the user requested igncr. # But we know of no other shell where ac_cr would be empty at this # point, so we can use a bashism as a fallback. if test "x$ac_cr" = x; then eval ac_cr=\$\'\\r\' fi ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then ac_cs_awk_cr='\\r' else ac_cs_awk_cr=$ac_cr fi echo 'BEGIN {' >"$ac_tmp/subs1.awk" && _ACEOF { echo "cat >conf$$subs.awk <<_ACEOF" && echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && echo "_ACEOF" } >conf$$subs.sh || as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` ac_delim='%!_!# ' for ac_last_try in false false false false false :; do . ./conf$$subs.sh || as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` if test $ac_delim_n = $ac_delim_num; then break elif $ac_last_try; then as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 else ac_delim="$ac_delim!$ac_delim _$ac_delim!! " fi done rm -f conf$$subs.sh cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && _ACEOF sed -n ' h s/^/S["/; s/!.*/"]=/ p g s/^[^!]*!// :repl t repl s/'"$ac_delim"'$// t delim :nl h s/\(.\{148\}\)..*/\1/ t more1 s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ p n b repl :more1 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t nl :delim h s/\(.\{148\}\)..*/\1/ t more2 s/["\\]/\\&/g; s/^/"/; s/$/"/ p b :more2 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t delim ' >$CONFIG_STATUS || ac_write_fail=1 rm -f conf$$subs.awk cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 _ACAWK cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && for (key in S) S_is_set[key] = 1 FS = "" } { line = $ 0 nfields = split(line, field, "@") substed = 0 len = length(field[1]) for (i = 2; i < nfields; i++) { key = field[i] keylen = length(key) if (S_is_set[key]) { value = S[key] line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) len += length(value) + length(field[++i]) substed = 1 } else len += 1 + keylen } print line } _ACAWK _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" else cat fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 _ACEOF # VPATH may cause trouble with some makes, so we remove sole $(srcdir), # ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and # trailing colons and then remove the whole line if VPATH becomes empty # (actually we leave an empty line to preserve line numbers). if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ h s/// s/^/:/ s/[ ]*$/:/ s/:\$(srcdir):/:/g s/:\${srcdir}:/:/g s/:@srcdir@:/:/g s/^:*// s/:*$// x s/\(=[ ]*\).*/\1/ G s/\n// s/^[^=]*=[ ]*$// }' fi cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 fi # test -n "$CONFIG_FILES" # Set up the scripts for CONFIG_HEADERS section. # No need to generate them if there are no CONFIG_HEADERS. # This happens for instance with `./config.status Makefile'. if test -n "$CONFIG_HEADERS"; then cat >"$ac_tmp/defines.awk" <<\_ACAWK || BEGIN { _ACEOF # Transform confdefs.h into an awk script `defines.awk', embedded as # here-document in config.status, that substitutes the proper values into # config.h.in to produce config.h. # Create a delimiter string that does not exist in confdefs.h, to ease # handling of long lines. ac_delim='%!_!# ' for ac_last_try in false false :; do ac_tt=`sed -n "/$ac_delim/p" confdefs.h` if test -z "$ac_tt"; then break elif $ac_last_try; then as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 else ac_delim="$ac_delim!$ac_delim _$ac_delim!! " fi done # For the awk script, D is an array of macro values keyed by name, # likewise P contains macro parameters if any. Preserve backslash # newline sequences. ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* sed -n ' s/.\{148\}/&'"$ac_delim"'/g t rset :rset s/^[ ]*#[ ]*define[ ][ ]*/ / t def d :def s/\\$// t bsnl s/["\\]/\\&/g s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ D["\1"]=" \3"/p s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p d :bsnl s/["\\]/\\&/g s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ D["\1"]=" \3\\\\\\n"\\/p t cont s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p t cont d :cont n s/.\{148\}/&'"$ac_delim"'/g t clear :clear s/\\$// t bsnlc s/["\\]/\\&/g; s/^/"/; s/$/"/p d :bsnlc s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p b cont ' >$CONFIG_STATUS || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 for (key in D) D_is_set[key] = 1 FS = "" } /^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { line = \$ 0 split(line, arg, " ") if (arg[1] == "#") { defundef = arg[2] mac1 = arg[3] } else { defundef = substr(arg[1], 2) mac1 = arg[2] } split(mac1, mac2, "(") #) macro = mac2[1] prefix = substr(line, 1, index(line, defundef) - 1) if (D_is_set[macro]) { # Preserve the white space surrounding the "#". print prefix "define", macro P[macro] D[macro] next } else { # Replace #undef with comments. This is necessary, for example, # in the case of _POSIX_SOURCE, which is predefined and required # on some systems where configure will not decide to define it. if (defundef == "undef") { print "/*", prefix defundef, macro, "*/" next } } } { print } _ACAWK _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 fi # test -n "$CONFIG_HEADERS" eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS :L $CONFIG_LINKS :C $CONFIG_COMMANDS" shift for ac_tag do case $ac_tag in :[FHLC]) ac_mode=$ac_tag; continue;; esac case $ac_mode$ac_tag in :[FHL]*:*);; :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; :[FH]-) ac_tag=-:-;; :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; esac ac_save_IFS=$IFS IFS=: set x $ac_tag IFS=$ac_save_IFS shift ac_file=$1 shift case $ac_mode in :L) ac_source=$1;; :[FH]) ac_file_inputs= for ac_f do case $ac_f in -) ac_f="$ac_tmp/stdin";; *) # Look for the file first in the build tree, then in the source tree # (if the path is not absolute). The absolute path cannot be DOS-style, # because $ac_f cannot contain `:'. test -f "$ac_f" || case $ac_f in [\\/$]*) false;; *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; esac || as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; esac case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac as_fn_append ac_file_inputs " '$ac_f'" done # Let's still pretend it is `configure' which instantiates (i.e., don't # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input='Generated from '` $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' `' by configure.' if test x"$ac_file" != x-; then configure_input="$ac_file. $configure_input" { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 $as_echo "$as_me: creating $ac_file" >&6;} fi # Neutralize special characters interpreted by sed in replacement strings. case $configure_input in #( *\&* | *\|* | *\\* ) ac_sed_conf_input=`$as_echo "$configure_input" | sed 's/[\\\\&|]/\\\\&/g'`;; #( *) ac_sed_conf_input=$configure_input;; esac case $ac_tag in *:-:* | *:-) cat >"$ac_tmp/stdin" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; esac ;; esac ac_dir=`$as_dirname -- "$ac_file" || $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` as_dir="$ac_dir"; as_fn_mkdir_p ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix case $ac_mode in :F) # # CONFIG_FILE # case $INSTALL in [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; esac ac_MKDIR_P=$MKDIR_P case $MKDIR_P in [\\/$]* | ?:[\\/]* ) ;; */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; esac _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # If the template does not know about datarootdir, expand it. # FIXME: This hack should be removed a few years after 2.60. ac_datarootdir_hack=; ac_datarootdir_seen= ac_sed_dataroot=' /datarootdir/ { p q } /@datadir@/p /@docdir@/p /@infodir@/p /@localedir@/p /@mandir@/p' case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in *datarootdir*) ac_datarootdir_seen=yes;; *@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 $as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_datarootdir_hack=' s&@datadir@&$datadir&g s&@docdir@&$docdir&g s&@infodir@&$infodir&g s&@localedir@&$localedir&g s&@mandir@&$mandir&g s&\\\${datarootdir}&$datarootdir&g' ;; esac _ACEOF # Neutralize VPATH when `$srcdir' = `.'. # Shell code in configure.ac might set extrasub. # FIXME: do we really want to maintain this feature? cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_sed_extra="$ac_vpsub $extrasub _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 :t /@[a-zA-Z_][a-zA-Z_0-9]*@/!b s|@configure_input@|$ac_sed_conf_input|;t t s&@top_builddir@&$ac_top_builddir_sub&;t t s&@top_build_prefix@&$ac_top_build_prefix&;t t s&@srcdir@&$ac_srcdir&;t t s&@abs_srcdir@&$ac_abs_srcdir&;t t s&@top_srcdir@&$ac_top_srcdir&;t t s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t s&@builddir@&$ac_builddir&;t t s&@abs_builddir@&$ac_abs_builddir&;t t s&@abs_top_builddir@&$ac_abs_top_builddir&;t t s&@INSTALL@&$ac_INSTALL&;t t s&@MKDIR_P@&$ac_MKDIR_P&;t t $ac_datarootdir_hack " eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ "$ac_tmp/out"`; test -z "$ac_out"; } && { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&5 $as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&2;} rm -f "$ac_tmp/stdin" case $ac_file in -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; esac \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; :H) # # CONFIG_HEADER # if test x"$ac_file" != x-; then { $as_echo "/* $configure_input */" \ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" } >"$ac_tmp/config.h" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 $as_echo "$as_me: $ac_file is unchanged" >&6;} else rm -f "$ac_file" mv "$ac_tmp/config.h" "$ac_file" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 fi else $as_echo "/* $configure_input */" \ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ || as_fn_error $? "could not create -" "$LINENO" 5 fi # Compute "$ac_file"'s index in $config_headers. _am_arg="$ac_file" _am_stamp_count=1 for _am_header in $config_headers :; do case $_am_header in $_am_arg | $_am_arg:* ) break ;; * ) _am_stamp_count=`expr $_am_stamp_count + 1` ;; esac done echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" || $as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$_am_arg" : 'X\(//\)[^/]' \| \ X"$_am_arg" : 'X\(//\)$' \| \ X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$_am_arg" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'`/stamp-h$_am_stamp_count ;; :L) # # CONFIG_LINK # if test "$ac_source" = "$ac_file" && test "$srcdir" = '.'; then : else # Prefer the file from the source tree if names are identical. if test "$ac_source" = "$ac_file" || test ! -r "$ac_source"; then ac_source=$srcdir/$ac_source fi { $as_echo "$as_me:${as_lineno-$LINENO}: linking $ac_source to $ac_file" >&5 $as_echo "$as_me: linking $ac_source to $ac_file" >&6;} if test ! -r "$ac_source"; then as_fn_error $? "$ac_source: file not found" "$LINENO" 5 fi rm -f "$ac_file" # Try a relative symlink, then a hard link, then a copy. case $ac_source in [\\/$]* | ?:[\\/]* ) ac_rel_source=$ac_source ;; *) ac_rel_source=$ac_top_build_prefix$ac_source ;; esac ln -s "$ac_rel_source" "$ac_file" 2>/dev/null || ln "$ac_source" "$ac_file" 2>/dev/null || cp -p "$ac_source" "$ac_file" || as_fn_error $? "cannot link or copy $ac_source to $ac_file" "$LINENO" 5 fi ;; :C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5 $as_echo "$as_me: executing $ac_file commands" >&6;} ;; esac case $ac_file$ac_mode in "depfiles":C) test x"$AMDEP_TRUE" != x"" || { # Autoconf 2.62 quotes --file arguments for eval, but not when files # are listed without --file. Let's play safe and only enable the eval # if we detect the quoting. case $CONFIG_FILES in *\'*) eval set x "$CONFIG_FILES" ;; *) set x $CONFIG_FILES ;; esac shift for mf do # Strip MF so we end up with the name of the file. mf=`echo "$mf" | sed -e 's/:.*$//'` # Check whether this is an Automake generated Makefile or not. # We used to match only the files named `Makefile.in', but # some people rename them; so instead we look at the file content. # Grep'ing the first line is not enough: some people post-process # each Makefile.in and add a new line on top of each file to say so. # Grep'ing the whole file is not good either: AIX grep has a line # limit of 2048, but all sed's we know have understand at least 4000. if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then dirpart=`$as_dirname -- "$mf" || $as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$mf" : 'X\(//\)[^/]' \| \ X"$mf" : 'X\(//\)$' \| \ X"$mf" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$mf" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` else continue fi # Extract the definition of DEPDIR, am__include, and am__quote # from the Makefile without running `make'. DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` test -z "$DEPDIR" && continue am__include=`sed -n 's/^am__include = //p' < "$mf"` test -z "am__include" && continue am__quote=`sed -n 's/^am__quote = //p' < "$mf"` # When using ansi2knr, U may be empty or an underscore; expand it U=`sed -n 's/^U = //p' < "$mf"` # Find all dependency output files, they are included files with # $(DEPDIR) in their names. We invoke sed twice because it is the # simplest approach to changing $(DEPDIR) to its actual value in the # expansion. for file in `sed -n " s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do # Make sure the directory exists. test -f "$dirpart/$file" && continue fdir=`$as_dirname -- "$file" || $as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$file" : 'X\(//\)[^/]' \| \ X"$file" : 'X\(//\)$' \| \ X"$file" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` as_dir=$dirpart/$fdir; as_fn_mkdir_p # echo "creating $dirpart/$file" echo '# dummy' > "$dirpart/$file" done done } ;; "libtool":C) # See if we are running on zsh, and set the options which allow our # commands through without removal of \ escapes. if test -n "${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi cfgfile="${ofile}T" trap "$RM \"$cfgfile\"; exit 1" 1 2 15 $RM "$cfgfile" cat <<_LT_EOF >> "$cfgfile" #! $SHELL # `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. # Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION # Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: # NOTE: Changes made to this file will be lost: look at ltmain.sh. # # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, # 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # Written by Gordon Matzigkeit, 1996 # # This file is part of GNU Libtool. # # GNU Libtool is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of # the License, or (at your option) any later version. # # As a special exception to the GNU General Public License, # if you distribute this file as part of a program or library that # is built using GNU Libtool, you may include this file under the # same distribution terms that you use for the rest of that program. # # GNU Libtool is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with GNU Libtool; see the file COPYING. If not, a copy # can be downloaded from http://www.gnu.org/licenses/gpl.html, or # obtained by writing to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # The names of the tagged configurations supported by this script. available_tags="" # ### BEGIN LIBTOOL CONFIG # Which release of libtool.m4 was used? macro_version=$macro_version macro_revision=$macro_revision # Whether or not to build shared libraries. build_libtool_libs=$enable_shared # Whether or not to build static libraries. build_old_libs=$enable_static # What type of objects to build. pic_mode=$pic_mode # Whether or not to optimize for fast installation. fast_install=$enable_fast_install # Shell to use when invoking shell scripts. SHELL=$lt_SHELL # An echo program that protects backslashes. ECHO=$lt_ECHO # The PATH separator for the build system. PATH_SEPARATOR=$lt_PATH_SEPARATOR # The host system. host_alias=$host_alias host=$host host_os=$host_os # The build system. build_alias=$build_alias build=$build build_os=$build_os # A sed program that does not truncate output. SED=$lt_SED # Sed that helps us avoid accidentally triggering echo(1) options like -n. Xsed="\$SED -e 1s/^X//" # A grep program that handles long lines. GREP=$lt_GREP # An ERE matcher. EGREP=$lt_EGREP # A literal string matcher. FGREP=$lt_FGREP # A BSD- or MS-compatible name lister. NM=$lt_NM # Whether we need soft or hard links. LN_S=$lt_LN_S # What is the maximum length of a command? max_cmd_len=$max_cmd_len # Object file suffix (normally "o"). objext=$ac_objext # Executable file suffix (normally ""). exeext=$exeext # whether the shell understands "unset". lt_unset=$lt_unset # turn spaces into newlines. SP2NL=$lt_lt_SP2NL # turn newlines into spaces. NL2SP=$lt_lt_NL2SP # convert \$build file names to \$host format. to_host_file_cmd=$lt_cv_to_host_file_cmd # convert \$build files to toolchain format. to_tool_file_cmd=$lt_cv_to_tool_file_cmd # An object symbol dumper. OBJDUMP=$lt_OBJDUMP # Method to check whether dependent libraries are shared objects. deplibs_check_method=$lt_deplibs_check_method # Command to use when deplibs_check_method = "file_magic". file_magic_cmd=$lt_file_magic_cmd # How to find potential files when deplibs_check_method = "file_magic". file_magic_glob=$lt_file_magic_glob # Find potential files using nocaseglob when deplibs_check_method = "file_magic". want_nocaseglob=$lt_want_nocaseglob # DLL creation program. DLLTOOL=$lt_DLLTOOL # Command to associate shared and link libraries. sharedlib_from_linklib_cmd=$lt_sharedlib_from_linklib_cmd # The archiver. AR=$lt_AR # Flags to create an archive. AR_FLAGS=$lt_AR_FLAGS # How to feed a file listing to the archiver. archiver_list_spec=$lt_archiver_list_spec # A symbol stripping program. STRIP=$lt_STRIP # Commands used to install an old-style archive. RANLIB=$lt_RANLIB old_postinstall_cmds=$lt_old_postinstall_cmds old_postuninstall_cmds=$lt_old_postuninstall_cmds # Whether to use a lock for old archive extraction. lock_old_archive_extraction=$lock_old_archive_extraction # A C compiler. LTCC=$lt_CC # LTCC compiler flags. LTCFLAGS=$lt_CFLAGS # Take the output of nm and produce a listing of raw symbols and C names. global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe # Transform the output of nm in a proper C declaration. global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl # Transform the output of nm in a C name address pair. global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address # Transform the output of nm in a C name address pair when lib prefix is needed. global_symbol_to_c_name_address_lib_prefix=$lt_lt_cv_sys_global_symbol_to_c_name_address_lib_prefix # Specify filename containing input files for \$NM. nm_file_list_spec=$lt_nm_file_list_spec # The root where to search for dependent libraries,and in which our libraries should be installed. lt_sysroot=$lt_sysroot # The name of the directory that contains temporary libtool files. objdir=$objdir # Used to examine libraries when file_magic_cmd begins with "file". MAGIC_CMD=$MAGIC_CMD # Must we lock files when doing compilation? need_locks=$lt_need_locks # Manifest tool. MANIFEST_TOOL=$lt_MANIFEST_TOOL # Tool to manipulate archived DWARF debug symbol files on Mac OS X. DSYMUTIL=$lt_DSYMUTIL # Tool to change global to local symbols on Mac OS X. NMEDIT=$lt_NMEDIT # Tool to manipulate fat objects and archives on Mac OS X. LIPO=$lt_LIPO # ldd/readelf like tool for Mach-O binaries on Mac OS X. OTOOL=$lt_OTOOL # ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4. OTOOL64=$lt_OTOOL64 # Old archive suffix (normally "a"). libext=$libext # Shared library suffix (normally ".so"). shrext_cmds=$lt_shrext_cmds # The commands to extract the exported symbol list from a shared archive. extract_expsyms_cmds=$lt_extract_expsyms_cmds # Variables whose values should be saved in libtool wrapper scripts and # restored at link time. variables_saved_for_relink=$lt_variables_saved_for_relink # Do we need the "lib" prefix for modules? need_lib_prefix=$need_lib_prefix # Do we need a version for libraries? need_version=$need_version # Library versioning type. version_type=$version_type # Shared library runtime path variable. runpath_var=$runpath_var # Shared library path variable. shlibpath_var=$shlibpath_var # Is shlibpath searched before the hard-coded library search path? shlibpath_overrides_runpath=$shlibpath_overrides_runpath # Format of library name prefix. libname_spec=$lt_libname_spec # List of archive names. First name is the real one, the rest are links. # The last name is the one that the linker finds with -lNAME library_names_spec=$lt_library_names_spec # The coded name of the library, if different from the real name. soname_spec=$lt_soname_spec # Permission mode override for installation of shared libraries. install_override_mode=$lt_install_override_mode # Command to use after installation of a shared archive. postinstall_cmds=$lt_postinstall_cmds # Command to use after uninstallation of a shared archive. postuninstall_cmds=$lt_postuninstall_cmds # Commands used to finish a libtool library installation in a directory. finish_cmds=$lt_finish_cmds # As "finish_cmds", except a single script fragment to be evaled but # not shown. finish_eval=$lt_finish_eval # Whether we should hardcode library paths into libraries. hardcode_into_libs=$hardcode_into_libs # Compile-time system search path for libraries. sys_lib_search_path_spec=$lt_sys_lib_search_path_spec # Run-time system search path for libraries. sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec # Whether dlopen is supported. dlopen_support=$enable_dlopen # Whether dlopen of programs is supported. dlopen_self=$enable_dlopen_self # Whether dlopen of statically linked programs is supported. dlopen_self_static=$enable_dlopen_self_static # Commands to strip libraries. old_striplib=$lt_old_striplib striplib=$lt_striplib # The linker used to build libraries. LD=$lt_LD # How to create reloadable object files. reload_flag=$lt_reload_flag reload_cmds=$lt_reload_cmds # Commands used to build an old-style archive. old_archive_cmds=$lt_old_archive_cmds # A language specific compiler. CC=$lt_compiler # Is the compiler the GNU compiler? with_gcc=$GCC # Compiler flag to turn off builtin functions. no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag # Additional compiler flags for building library objects. pic_flag=$lt_lt_prog_compiler_pic # How to pass a linker flag through the compiler. wl=$lt_lt_prog_compiler_wl # Compiler flag to prevent dynamic linking. link_static_flag=$lt_lt_prog_compiler_static # Does compiler simultaneously support -c and -o options? compiler_c_o=$lt_lt_cv_prog_compiler_c_o # Whether or not to add -lc for building shared libraries. build_libtool_need_lc=$archive_cmds_need_lc # Whether or not to disallow shared libs when runtime libs are static. allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes # Compiler flag to allow reflexive dlopens. export_dynamic_flag_spec=$lt_export_dynamic_flag_spec # Compiler flag to generate shared objects directly from archives. whole_archive_flag_spec=$lt_whole_archive_flag_spec # Whether the compiler copes with passing no objects directly. compiler_needs_object=$lt_compiler_needs_object # Create an old-style archive from a shared archive. old_archive_from_new_cmds=$lt_old_archive_from_new_cmds # Create a temporary old-style archive to link instead of a shared archive. old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds # Commands used to build a shared archive. archive_cmds=$lt_archive_cmds archive_expsym_cmds=$lt_archive_expsym_cmds # Commands used to build a loadable module if different from building # a shared archive. module_cmds=$lt_module_cmds module_expsym_cmds=$lt_module_expsym_cmds # Whether we are building with GNU ld or not. with_gnu_ld=$lt_with_gnu_ld # Flag that allows shared libraries with undefined symbols to be built. allow_undefined_flag=$lt_allow_undefined_flag # Flag that enforces no undefined symbols. no_undefined_flag=$lt_no_undefined_flag # Flag to hardcode \$libdir into a binary during linking. # This must work even if \$libdir does not exist hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec # Whether we need a single "-rpath" flag with a separated argument. hardcode_libdir_separator=$lt_hardcode_libdir_separator # Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes # DIR into the resulting binary. hardcode_direct=$hardcode_direct # Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes # DIR into the resulting binary and the resulting library dependency is # "absolute",i.e impossible to change by setting \${shlibpath_var} if the # library is relocated. hardcode_direct_absolute=$hardcode_direct_absolute # Set to "yes" if using the -LDIR flag during linking hardcodes DIR # into the resulting binary. hardcode_minus_L=$hardcode_minus_L # Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR # into the resulting binary. hardcode_shlibpath_var=$hardcode_shlibpath_var # Set to "yes" if building a shared library automatically hardcodes DIR # into the library and all subsequent libraries and executables linked # against it. hardcode_automatic=$hardcode_automatic # Set to yes if linker adds runtime paths of dependent libraries # to runtime path list. inherit_rpath=$inherit_rpath # Whether libtool must link a program against all its dependency libraries. link_all_deplibs=$link_all_deplibs # Set to "yes" if exported symbols are required. always_export_symbols=$always_export_symbols # The commands to list exported symbols. export_symbols_cmds=$lt_export_symbols_cmds # Symbols that should not be listed in the preloaded symbols. exclude_expsyms=$lt_exclude_expsyms # Symbols that must always be exported. include_expsyms=$lt_include_expsyms # Commands necessary for linking programs (against libraries) with templates. prelink_cmds=$lt_prelink_cmds # Commands necessary for finishing linking programs. postlink_cmds=$lt_postlink_cmds # Specify filename containing input files. file_list_spec=$lt_file_list_spec # How to hardcode a shared library path into an executable. hardcode_action=$hardcode_action # ### END LIBTOOL CONFIG _LT_EOF case $host_os in aix3*) cat <<\_LT_EOF >> "$cfgfile" # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test "X${COLLECT_NAMES+set}" != Xset; then COLLECT_NAMES= export COLLECT_NAMES fi _LT_EOF ;; esac ltmain="$ac_aux_dir/ltmain.sh" # We use sed instead of cat because bash on DJGPP gets confused if # if finds mixed CR/LF and LF-only lines. Since sed operates in # text mode, it properly converts lines to CR/LF. This bash problem # is reportedly fixed, but why not run on old versions too? sed '$q' "$ltmain" >> "$cfgfile" \ || (rm -f "$cfgfile"; exit 1) if test x"$xsi_shell" = xyes; then sed -e '/^func_dirname ()$/,/^} # func_dirname /c\ func_dirname ()\ {\ \ case ${1} in\ \ */*) func_dirname_result="${1%/*}${2}" ;;\ \ * ) func_dirname_result="${3}" ;;\ \ esac\ } # Extended-shell func_dirname implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_basename ()$/,/^} # func_basename /c\ func_basename ()\ {\ \ func_basename_result="${1##*/}"\ } # Extended-shell func_basename implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_dirname_and_basename ()$/,/^} # func_dirname_and_basename /c\ func_dirname_and_basename ()\ {\ \ case ${1} in\ \ */*) func_dirname_result="${1%/*}${2}" ;;\ \ * ) func_dirname_result="${3}" ;;\ \ esac\ \ func_basename_result="${1##*/}"\ } # Extended-shell func_dirname_and_basename implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_stripname ()$/,/^} # func_stripname /c\ func_stripname ()\ {\ \ # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are\ \ # positional parameters, so assign one to ordinary parameter first.\ \ func_stripname_result=${3}\ \ func_stripname_result=${func_stripname_result#"${1}"}\ \ func_stripname_result=${func_stripname_result%"${2}"}\ } # Extended-shell func_stripname implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_split_long_opt ()$/,/^} # func_split_long_opt /c\ func_split_long_opt ()\ {\ \ func_split_long_opt_name=${1%%=*}\ \ func_split_long_opt_arg=${1#*=}\ } # Extended-shell func_split_long_opt implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_split_short_opt ()$/,/^} # func_split_short_opt /c\ func_split_short_opt ()\ {\ \ func_split_short_opt_arg=${1#??}\ \ func_split_short_opt_name=${1%"$func_split_short_opt_arg"}\ } # Extended-shell func_split_short_opt implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_lo2o ()$/,/^} # func_lo2o /c\ func_lo2o ()\ {\ \ case ${1} in\ \ *.lo) func_lo2o_result=${1%.lo}.${objext} ;;\ \ *) func_lo2o_result=${1} ;;\ \ esac\ } # Extended-shell func_lo2o implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_xform ()$/,/^} # func_xform /c\ func_xform ()\ {\ func_xform_result=${1%.*}.lo\ } # Extended-shell func_xform implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_arith ()$/,/^} # func_arith /c\ func_arith ()\ {\ func_arith_result=$(( $* ))\ } # Extended-shell func_arith implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_len ()$/,/^} # func_len /c\ func_len ()\ {\ func_len_result=${#1}\ } # Extended-shell func_len implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: fi if test x"$lt_shell_append" = xyes; then sed -e '/^func_append ()$/,/^} # func_append /c\ func_append ()\ {\ eval "${1}+=\\${2}"\ } # Extended-shell func_append implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: sed -e '/^func_append_quoted ()$/,/^} # func_append_quoted /c\ func_append_quoted ()\ {\ \ func_quote_for_eval "${2}"\ \ eval "${1}+=\\\\ \\$func_quote_for_eval_result"\ } # Extended-shell func_append_quoted implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: # Save a `func_append' function call where possible by direct use of '+=' sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: else # Save a `func_append' function call even when '+=' is not available sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: fi if test x"$_lt_function_replace_fail" = x":"; then { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unable to substitute extended shell functions in $ofile" >&5 $as_echo "$as_me: WARNING: Unable to substitute extended shell functions in $ofile" >&2;} fi mv -f "$cfgfile" "$ofile" || (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") chmod +x "$ofile" ;; esac done # for ac_tag as_fn_exit 0 _ACEOF ac_clean_files=$ac_clean_files_save test $ac_write_fail = 0 || as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 # configure is writing to config.log, and then calls config.status. # config.status does its own redirection, appending to config.log. # Unfortunately, on DOS this fails, as config.log is still kept open # by configure, so config.status won't be able to write to it; its # output is simply discarded. So we exec the FD to /dev/null, # effectively closing config.log, so it can be properly (re)opened and # appended to by config.status. When coming back to configure, we # need to make the FD available again. if test "$no_create" != yes; then ac_cs_success=: ac_config_status_args= test "$silent" = yes && ac_config_status_args="$ac_config_status_args --quiet" exec 5>/dev/null $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false exec 5>>config.log # Use ||, not &&, to avoid exiting from the if with $? = 1, which # would make configure fail if this is the last instruction. $ac_cs_success || as_fn_exit 1 fi if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} fi { $as_echo "$as_me:${as_lineno-$LINENO}: Configuration:" >&5 $as_echo "$as_me: Configuration:" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: Build for host type $host" >&5 $as_echo "$as_me: Build for host type $host" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: CC=$CC, CFLAGS=$CFLAGS" >&5 $as_echo "$as_me: CC=$CC, CFLAGS=$CFLAGS" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: Linking GMP with $GMPLIB" >&5 $as_echo "$as_me: Linking GMP with $GMPLIB" >&6;} if test "x$enable_asm_redc" = xyes; then { $as_echo "$as_me:${as_lineno-$LINENO}: Using asm redc code from directory $ASMPATH" >&5 $as_echo "$as_me: Using asm redc code from directory $ASMPATH" >&6;} else { $as_echo "$as_me:${as_lineno-$LINENO}: Not using asm redc code" >&5 $as_echo "$as_me: Not using asm redc code" >&6;} fi if test "x$enable_sse2" = xyes; then { $as_echo "$as_me:${as_lineno-$LINENO}: Using SSE2 instructions in NTT code" >&5 $as_echo "$as_me: Using SSE2 instructions in NTT code" >&6;} else { $as_echo "$as_me:${as_lineno-$LINENO}: Not using SSE2 instructions in NTT code" >&5 $as_echo "$as_me: Not using SSE2 instructions in NTT code" >&6;} fi if test "x$with_gwnum" != "x"; then { $as_echo "$as_me:${as_lineno-$LINENO}: Linking with George Woltman's GWNUM" >&5 $as_echo "$as_me: Linking with George Woltman's GWNUM" >&6;} fi if test "x$enable_assert" = xyes; then { $as_echo "$as_me:${as_lineno-$LINENO}: Assertions enabled" >&5 $as_echo "$as_me: Assertions enabled" >&6;} else { $as_echo "$as_me:${as_lineno-$LINENO}: Assertions disabled" >&5 $as_echo "$as_me: Assertions disabled" >&6;} fi if test "x$enable_shellcmd" = xyes; then { $as_echo "$as_me:${as_lineno-$LINENO}: Shell command execution enabled" >&5 $as_echo "$as_me: Shell command execution enabled" >&6;} else { $as_echo "$as_me:${as_lineno-$LINENO}: Shell command execution disabled" >&5 $as_echo "$as_me: Shell command execution disabled" >&6;} fi if test "x$enable_openmp" = xyes; then { $as_echo "$as_me:${as_lineno-$LINENO}: OpenMP enabled" >&5 $as_echo "$as_me: OpenMP enabled" >&6;} else { $as_echo "$as_me:${as_lineno-$LINENO}: OpenMP disabled" >&5 $as_echo "$as_me: OpenMP disabled" >&6;} fi if test "x$enable_memory_debug" = xyes; then { $as_echo "$as_me:${as_lineno-$LINENO}: Memory debugging enabled" >&5 $as_echo "$as_me: Memory debugging enabled" >&6;} else { $as_echo "$as_me:${as_lineno-$LINENO}: Memory debugging disabled" >&5 $as_echo "$as_me: Memory debugging disabled" >&6;} fi if test x"$core2warn" = xyes; then { $as_echo "$as_me:${as_lineno-$LINENO}: Your cpu was detected as x86_64; if it is a Core 2, please either use the ecm-params.h.core2 file by executing the commands:" >&5 $as_echo "$as_me: Your cpu was detected as x86_64; if it is a Core 2, please either use the ecm-params.h.core2 file by executing the commands:" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: rm ecm-params.h" >&5 $as_echo "$as_me: rm ecm-params.h" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: ln -s ecm-params.h.core2 ecm-params.h" >&5 $as_echo "$as_me: ln -s ecm-params.h.core2 ecm-params.h" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: or generate a custom ecm-params.h file for your system as described in INSTALL." >&5 $as_echo "$as_me: or generate a custom ecm-params.h file for your system as described in INSTALL." >&6;} fi if test x"$pentium4warn" = xyes; then { $as_echo "$as_me:${as_lineno-$LINENO}: Your cpu was detected as default; if it is a Pentium 4, please either use the ecm-params.h.pentium4 file by executing the commands:" >&5 $as_echo "$as_me: Your cpu was detected as default; if it is a Pentium 4, please either use the ecm-params.h.pentium4 file by executing the commands:" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: rm ecm-params.h" >&5 $as_echo "$as_me: rm ecm-params.h" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: ln -s ecm-params.h.pentium4 ecm-params.h" >&5 $as_echo "$as_me: ln -s ecm-params.h.pentium4 ecm-params.h" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: or generate a custom ecm-params.h file for your system as described in INSTALL." >&5 $as_echo "$as_me: or generate a custom ecm-params.h file for your system as described in INSTALL." >&6;} fi ecm-6.4.4/polyeval.c0000644023561000001540000002362212106741274011223 00000000000000/* Implements algorithm polyeval and remainder tree using middle product. Copyright 2003, 2004, 2005, 2006, 2007, 2008, 2009 Laurent Fousse, Alexander Kruppa, Paul Zimmermann. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include /* for strlen */ #include "ecm-impl.h" #ifdef HAVE_UNISTD_H # include /* for unlink */ #endif #ifndef MAX #define MAX(a,b) (((a) > (b)) ? (a) : (b)) #endif /* #define DEBUG_TREEDATA */ extern unsigned int Fermat; /* algorithm polyeval from section 3.7 of Peter Montgomery's dissertation. Input: G - an array of k elements of R, G[i], 0 <= i < k representing the coefficients of a polynomial G(x) of degree < k Tree - the product tree produced by PolyFromRoots Tree[0][0..k-1] (degree k/2) Tree[1][0..k-1] (degree k/4), ..., Tree[lgk-1][0..k-1] (degree 1) Output: the sequence of values of G(a[i]) are stored in G[i] for 0 <= i < k Remark: we need an auxiliary (k+1)-th cell G[k] in G. The memory used is M(k) = max(3*floor(k/2)+list_mul_mem(floor(k/2)), k+list_mul_mem(ceil(k/2)), floor(k/2) + M(ceil(k/2))). Since list_mul_mem(k) >= 2*k, the maximum is the 1st. */ void polyeval (listz_t G, unsigned int k, listz_t *Tree, listz_t T, mpz_t n, unsigned int sh) { unsigned int l, m; listz_t T0; if (k == 1) return; T0 = Tree[0] + sh; m = k / 2; l = k - m; /* divide G[0]+G[1]*x+...+G[k-1]*x^(k-1) by T0[l]+...+T0[k-1]*x^(m-1)+x^m, quotient in {T+m,l-1}, remainder in {T,m} */ if (k == 2 * m) { /* FIXME: avoid the copy here by giving different 2nd and 3rd arguments to RecursiveDivision */ list_set (T, G, k); /* the following needs k+m+list_mul_mem(m) in T */ RecursiveDivision (T + k, T, T0 + l, m, T + k + m, n, 1); } else /* k = 2m+1: subtract G[k-1]*x^(l-1) * T0 from G */ { /* G - G[k-1] * (x^m + {T0+l,m}) * x^m */ list_set (T, G, m); list_mul_z (T + m, T0 + l, G[k - 1], m, n); list_sub (T + m, G + m, T + m, m); /* the following needs 3m+list_mul_mem(m) in T */ RecursiveDivision (T + 2 * m, T, T0 + l, m, T + 3 * m, n, 1); } /* in both cases we need 3*(k/2)+list_mul_mem(k/2) */ /* right remainder is in {T,m} */ /* k = 2l or k = 2l-1 */ /* divide G[0]+G[1]*x+...+G[k-1]*x^(k-1) by T0[0]+...+T0[l-1]*x^(l-1)+x^l: quotient in {T+m,m-1}, remainder in {G,l} */ if (k < 2 * l) mpz_set_ui (G[k], 0); /* the following needs k+list_mul_mem(l) in T */ RecursiveDivision (T + m, G, T0, l, T + k, n, 1); /* left remainder is in {G,l} */ polyeval (G, l, Tree + 1, T + m, n, sh); /* copy right remainder in {G+l,m} */ list_set (G + l, T, m); polyeval (G + l, m, Tree + 1, T, n, sh + l); } #if defined(DEBUG) || defined(DEBUG_TREEDATA) void print_vect (listz_t t, unsigned int l) { unsigned int i; fprintf (ECM_STDOUT, "["); for (i = 0; i < l; i++) { mpz_out_str (ECM_STDOUT, 10, t[i]); if (i != l - 1) fprintf (ECM_STDOUT, ", "); else fprintf (ECM_STDOUT, "]"); } } #endif /* Computes TUpTree as described in ref[1]. k is the degree of the * polynomial at the root of the tree. sh is the shift we need to * apply to find the actual coefficients of the polynomial at the root * of the tree. */ void TUpTree (listz_t b, listz_t *Tree, unsigned int k, listz_t tmp, int dolvl, unsigned int sh, mpz_t n, FILE *TreeFile) { unsigned int m, l; m = k / 2; l = k - m; if (k == 1) return; #ifdef DEBUG fprintf (ECM_STDOUT, "In TupTree, k = %d.\n", k); fprintf (ECM_STDOUT, "b = "); print_vect (b, k); fprintf (ECM_STDOUT, "\nThe polynomials at that level are: "); print_vect (Tree[0] + sh, k); fprintf (ECM_STDOUT, "\n"); #endif if (dolvl == 0 || dolvl == -1) { if (TreeFile != NULL) { list_inp_raw (tmp + k, TreeFile, l); #ifdef DEBUG_TREEDATA printf ("Read from file: "); print_vect (tmp + k, l); #endif TMulGen (tmp + l, m - 1, tmp + k, l - 1, b, k - 1, tmp + k + l, n); list_inp_raw (tmp + k, TreeFile, m); #ifdef DEBUG_TREEDATA print_vect (tmp + k, m); printf ("\n"); #endif TMulGen (tmp, l - 1, tmp + k, m - 1, b, k - 1, tmp + k + m, n); } else { #ifdef DEBUG_TREEDATA printf ("Got from Tree: "); print_vect (Tree[0] + sh, l); print_vect (Tree[0] + sh + l, m); printf ("\n"); #endif TMulGen (tmp + l, m - 1, Tree[0] + sh, l - 1, b, k - 1, tmp + k, n); TMulGen (tmp, l - 1, Tree[0] + sh + l, m - 1, b, k - 1, tmp + k, n); } #if defined(DEBUG) || defined (DEBUG_TREEDATA) fprintf (ECM_STDOUT, "And the result at that level (before correction) is:"); print_vect (tmp, k); fprintf (ECM_STDOUT, "\n"); #endif /* GMP-ECM specific: leading coefficients in the product tree * are implicit ones, so we need some extra work here. */ list_add (tmp, tmp, b + m, l); list_add (tmp + l, tmp + l, b + l, m); list_mod (b, tmp, k, n); /* reduce both parts simultaneously */ #ifdef DEBUG fprintf (ECM_STDOUT, "And the result at this level is:"); print_vect (b, k); fprintf (ECM_STDOUT, "\n"); #endif } if (dolvl > 0 || dolvl == -1) { if (dolvl > 0) dolvl--; TUpTree (b, Tree + 1, l, tmp, dolvl, sh, n, TreeFile); TUpTree (b + l, Tree + 1, m, tmp, dolvl, sh + l, n, TreeFile); } } static unsigned int TUpTree_space (unsigned int k) { unsigned int m, l; unsigned int r1, r2; m = k / 2; l = k - m; if (k == 1) return 0; r1 = TMulGen_space (l - 1, m - 1, k - 1) + l; if (m != l) { r2 = TMulGen_space (m - 1, l - 1, k - 1) + k; r1 = MAX (r1, r2); } r2 = TUpTree_space (l); r1 = MAX (r1, r2); if (m != l) { r2 = TUpTree_space (m); r1 = MAX (r1, r2); } return r1; } /* Same as polyeval. Needs invF as extra argument. Return non-zero iff an error occurred. */ int polyeval_tellegen (listz_t b, unsigned int k, listz_t *Tree, listz_t tmp, unsigned int sizeT, listz_t invF, mpz_t n, char *TreeFilename) { unsigned int tupspace; unsigned int tkspace; int allocated = 0, r = 0; /* return value, 0 = no error */ listz_t T; ASSERT(Tree != NULL || TreeFilename != NULL); tupspace = TUpTree_space (k) + k; #ifndef USE_SHORT_PRODUCT tkspace = TMulGen_space (k - 1, k - 1, k - 1) + k; #else tkspace = 2 * k - 1 + list_mul_mem (k); #endif tupspace = MAX (tupspace, tkspace); if (TreeFilename != NULL) tupspace += (k + 1) / 2; if (sizeT >= tupspace) T = tmp; else { outputf (OUTPUT_DEVVERBOSE, "polyeval_tellegen: allocating extra temp" " space, want %d but T has only %d\n", tupspace, sizeT); MEMORY_TAG; T = init_list (tupspace); MEMORY_UNTAG; if (T == NULL) return ECM_ERROR; allocated = 1; } #ifdef TELLEGEN_DEBUG fprintf (ECM_STDOUT, "In polyeval_tellegen, k = %d.\n", k); fprintf (ECM_STDOUT, "Required memory: %d.\n", TMulGen_space (k - 1, k - 1, k - 1)); #endif if (Fermat) { /* Schoenhage-Strassen can't do a half product faster than a full */ F_mul (T, invF, b, k, DEFAULT, Fermat, T + 2 * k); list_mod (T, T + k - 1, k, n); } else { #ifdef USE_SHORT_PRODUCT /* need space 2k-1+list_mul_mem(k) in T */ list_mul_high (T, invF, b, k, T + 2 * k - 1); list_mod (T, T + k - 1, k, n); #else /* revert invF for call to TMulGen below */ list_revert (invF, k); TMulGen (T, k - 1, invF, k - 1, b, k - 1, T + k, n); #endif } list_revert (T, k); if (TreeFilename != NULL) { unsigned int lgk, i; FILE *TreeFile; char *fullname = (char *) malloc (strlen (TreeFilename) + 1 + 2 + 1); if (fullname == NULL) { fprintf (stderr, "Cannot allocate memory in polyeval_tellegen\n"); exit (1); } lgk = ceil_log2 (k); for (i = 0; i < lgk; i++) { sprintf (fullname, "%s.%d", TreeFilename, i); TreeFile = fopen (fullname, "rb"); if (TreeFile == NULL) { outputf (OUTPUT_ERROR, "Error opening file %s for product tree of F\n", fullname); r = ECM_ERROR; goto clear_T; } TUpTree (T, NULL, k, T + k, i, 0, n, TreeFile); fclose (TreeFile); unlink (fullname); } free (fullname); } else TUpTree (T, Tree, k, T + k, -1, 0, n, NULL); list_swap (b, T, k); /* more efficient than list_set, since T is not needed anymore */ clear_T: if (allocated) clear_list (T, tupspace); return r; } ecm-6.4.4/mul_fft-params.h.default0000644023561000001540000000017612106741273013733 00000000000000/* Empty file so that #include won't produce an error message. With no parameters defined, mul_fft.c will use defaults. */ ecm-6.4.4/m4/0000755023561000001540000000000012113421640007605 500000000000000ecm-6.4.4/m4/ltversion.m40000644023561000001540000000126212106744307012027 00000000000000# ltversion.m4 -- version numbers -*- Autoconf -*- # # Copyright (C) 2004 Free Software Foundation, Inc. # Written by Scott James Remnant, 2004 # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. # @configure_input@ # serial 3337 ltversion.m4 # This file is part of GNU Libtool m4_define([LT_PACKAGE_VERSION], [2.4.2]) m4_define([LT_PACKAGE_REVISION], [1.3337]) AC_DEFUN([LTVERSION_VERSION], [macro_version='2.4.2' macro_revision='1.3337' _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) _LT_DECL(, macro_revision, 0) ]) ecm-6.4.4/m4/libtool.m40000644023561000001540000106043412106744307011455 00000000000000# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- # # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, # 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # Written by Gordon Matzigkeit, 1996 # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. m4_define([_LT_COPYING], [dnl # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, # 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # Written by Gordon Matzigkeit, 1996 # # This file is part of GNU Libtool. # # GNU Libtool is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of # the License, or (at your option) any later version. # # As a special exception to the GNU General Public License, # if you distribute this file as part of a program or library that # is built using GNU Libtool, you may include this file under the # same distribution terms that you use for the rest of that program. # # GNU Libtool is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with GNU Libtool; see the file COPYING. If not, a copy # can be downloaded from http://www.gnu.org/licenses/gpl.html, or # obtained by writing to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ]) # serial 57 LT_INIT # LT_PREREQ(VERSION) # ------------------ # Complain and exit if this libtool version is less that VERSION. m4_defun([LT_PREREQ], [m4_if(m4_version_compare(m4_defn([LT_PACKAGE_VERSION]), [$1]), -1, [m4_default([$3], [m4_fatal([Libtool version $1 or higher is required], 63)])], [$2])]) # _LT_CHECK_BUILDDIR # ------------------ # Complain if the absolute build directory name contains unusual characters m4_defun([_LT_CHECK_BUILDDIR], [case `pwd` in *\ * | *\ *) AC_MSG_WARN([Libtool does not cope well with whitespace in `pwd`]) ;; esac ]) # LT_INIT([OPTIONS]) # ------------------ AC_DEFUN([LT_INIT], [AC_PREREQ([2.58])dnl We use AC_INCLUDES_DEFAULT AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl AC_BEFORE([$0], [LT_LANG])dnl AC_BEFORE([$0], [LT_OUTPUT])dnl AC_BEFORE([$0], [LTDL_INIT])dnl m4_require([_LT_CHECK_BUILDDIR])dnl dnl Autoconf doesn't catch unexpanded LT_ macros by default: m4_pattern_forbid([^_?LT_[A-Z_]+$])dnl m4_pattern_allow([^(_LT_EOF|LT_DLGLOBAL|LT_DLLAZY_OR_NOW|LT_MULTI_MODULE)$])dnl dnl aclocal doesn't pull ltoptions.m4, ltsugar.m4, or ltversion.m4 dnl unless we require an AC_DEFUNed macro: AC_REQUIRE([LTOPTIONS_VERSION])dnl AC_REQUIRE([LTSUGAR_VERSION])dnl AC_REQUIRE([LTVERSION_VERSION])dnl AC_REQUIRE([LTOBSOLETE_VERSION])dnl m4_require([_LT_PROG_LTMAIN])dnl _LT_SHELL_INIT([SHELL=${CONFIG_SHELL-/bin/sh}]) dnl Parse OPTIONS _LT_SET_OPTIONS([$0], [$1]) # This can be used to rebuild libtool when needed LIBTOOL_DEPS="$ltmain" # Always use our own libtool. LIBTOOL='$(SHELL) $(top_builddir)/libtool' AC_SUBST(LIBTOOL)dnl _LT_SETUP # Only expand once: m4_define([LT_INIT]) ])# LT_INIT # Old names: AU_ALIAS([AC_PROG_LIBTOOL], [LT_INIT]) AU_ALIAS([AM_PROG_LIBTOOL], [LT_INIT]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_PROG_LIBTOOL], []) dnl AC_DEFUN([AM_PROG_LIBTOOL], []) # _LT_CC_BASENAME(CC) # ------------------- # Calculate cc_basename. Skip known compiler wrappers and cross-prefix. m4_defun([_LT_CC_BASENAME], [for cc_temp in $1""; do case $cc_temp in compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;; distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;; \-*) ;; *) break;; esac done cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"` ]) # _LT_FILEUTILS_DEFAULTS # ---------------------- # It is okay to use these file commands and assume they have been set # sensibly after `m4_require([_LT_FILEUTILS_DEFAULTS])'. m4_defun([_LT_FILEUTILS_DEFAULTS], [: ${CP="cp -f"} : ${MV="mv -f"} : ${RM="rm -f"} ])# _LT_FILEUTILS_DEFAULTS # _LT_SETUP # --------- m4_defun([_LT_SETUP], [AC_REQUIRE([AC_CANONICAL_HOST])dnl AC_REQUIRE([AC_CANONICAL_BUILD])dnl AC_REQUIRE([_LT_PREPARE_SED_QUOTE_VARS])dnl AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH])dnl _LT_DECL([], [PATH_SEPARATOR], [1], [The PATH separator for the build system])dnl dnl _LT_DECL([], [host_alias], [0], [The host system])dnl _LT_DECL([], [host], [0])dnl _LT_DECL([], [host_os], [0])dnl dnl _LT_DECL([], [build_alias], [0], [The build system])dnl _LT_DECL([], [build], [0])dnl _LT_DECL([], [build_os], [0])dnl dnl AC_REQUIRE([AC_PROG_CC])dnl AC_REQUIRE([LT_PATH_LD])dnl AC_REQUIRE([LT_PATH_NM])dnl dnl AC_REQUIRE([AC_PROG_LN_S])dnl test -z "$LN_S" && LN_S="ln -s" _LT_DECL([], [LN_S], [1], [Whether we need soft or hard links])dnl dnl AC_REQUIRE([LT_CMD_MAX_LEN])dnl _LT_DECL([objext], [ac_objext], [0], [Object file suffix (normally "o")])dnl _LT_DECL([], [exeext], [0], [Executable file suffix (normally "")])dnl dnl m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_CHECK_SHELL_FEATURES])dnl m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl m4_require([_LT_CMD_RELOAD])dnl m4_require([_LT_CHECK_MAGIC_METHOD])dnl m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl m4_require([_LT_CMD_OLD_ARCHIVE])dnl m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl m4_require([_LT_WITH_SYSROOT])dnl _LT_CONFIG_LIBTOOL_INIT([ # See if we are running on zsh, and set the options which allow our # commands through without removal of \ escapes INIT. if test -n "\${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi ]) if test -n "${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi _LT_CHECK_OBJDIR m4_require([_LT_TAG_COMPILER])dnl case $host_os in aix3*) # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test "X${COLLECT_NAMES+set}" != Xset; then COLLECT_NAMES= export COLLECT_NAMES fi ;; esac # Global variables: ofile=libtool can_build_shared=yes # All known linkers require a `.a' archive for static linking (except MSVC, # which needs '.lib'). libext=a with_gnu_ld="$lt_cv_prog_gnu_ld" old_CC="$CC" old_CFLAGS="$CFLAGS" # Set sane defaults for various variables test -z "$CC" && CC=cc test -z "$LTCC" && LTCC=$CC test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS test -z "$LD" && LD=ld test -z "$ac_objext" && ac_objext=o _LT_CC_BASENAME([$compiler]) # Only perform the check for file, if the check method requires it test -z "$MAGIC_CMD" && MAGIC_CMD=file case $deplibs_check_method in file_magic*) if test "$file_magic_cmd" = '$MAGIC_CMD'; then _LT_PATH_MAGIC fi ;; esac # Use C for the default configuration in the libtool script LT_SUPPORTED_TAG([CC]) _LT_LANG_C_CONFIG _LT_LANG_DEFAULT_CONFIG _LT_CONFIG_COMMANDS ])# _LT_SETUP # _LT_PREPARE_SED_QUOTE_VARS # -------------------------- # Define a few sed substitution that help us do robust quoting. m4_defun([_LT_PREPARE_SED_QUOTE_VARS], [# Backslashify metacharacters that are still active within # double-quoted strings. sed_quote_subst='s/\([["`$\\]]\)/\\\1/g' # Same as above, but do not quote variable references. double_quote_subst='s/\([["`\\]]\)/\\\1/g' # Sed substitution to delay expansion of an escaped shell variable in a # double_quote_subst'ed string. delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' # Sed substitution to delay expansion of an escaped single quote. delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g' # Sed substitution to avoid accidental globbing in evaled expressions no_glob_subst='s/\*/\\\*/g' ]) # _LT_PROG_LTMAIN # --------------- # Note that this code is called both from `configure', and `config.status' # now that we use AC_CONFIG_COMMANDS to generate libtool. Notably, # `config.status' has no value for ac_aux_dir unless we are using Automake, # so we pass a copy along to make sure it has a sensible value anyway. m4_defun([_LT_PROG_LTMAIN], [m4_ifdef([AC_REQUIRE_AUX_FILE], [AC_REQUIRE_AUX_FILE([ltmain.sh])])dnl _LT_CONFIG_LIBTOOL_INIT([ac_aux_dir='$ac_aux_dir']) ltmain="$ac_aux_dir/ltmain.sh" ])# _LT_PROG_LTMAIN ## ------------------------------------- ## ## Accumulate code for creating libtool. ## ## ------------------------------------- ## # So that we can recreate a full libtool script including additional # tags, we accumulate the chunks of code to send to AC_CONFIG_COMMANDS # in macros and then make a single call at the end using the `libtool' # label. # _LT_CONFIG_LIBTOOL_INIT([INIT-COMMANDS]) # ---------------------------------------- # Register INIT-COMMANDS to be passed to AC_CONFIG_COMMANDS later. m4_define([_LT_CONFIG_LIBTOOL_INIT], [m4_ifval([$1], [m4_append([_LT_OUTPUT_LIBTOOL_INIT], [$1 ])])]) # Initialize. m4_define([_LT_OUTPUT_LIBTOOL_INIT]) # _LT_CONFIG_LIBTOOL([COMMANDS]) # ------------------------------ # Register COMMANDS to be passed to AC_CONFIG_COMMANDS later. m4_define([_LT_CONFIG_LIBTOOL], [m4_ifval([$1], [m4_append([_LT_OUTPUT_LIBTOOL_COMMANDS], [$1 ])])]) # Initialize. m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS]) # _LT_CONFIG_SAVE_COMMANDS([COMMANDS], [INIT_COMMANDS]) # ----------------------------------------------------- m4_defun([_LT_CONFIG_SAVE_COMMANDS], [_LT_CONFIG_LIBTOOL([$1]) _LT_CONFIG_LIBTOOL_INIT([$2]) ]) # _LT_FORMAT_COMMENT([COMMENT]) # ----------------------------- # Add leading comment marks to the start of each line, and a trailing # full-stop to the whole comment if one is not present already. m4_define([_LT_FORMAT_COMMENT], [m4_ifval([$1], [ m4_bpatsubst([m4_bpatsubst([$1], [^ *], [# ])], [['`$\]], [\\\&])]m4_bmatch([$1], [[!?.]$], [], [.]) )]) ## ------------------------ ## ## FIXME: Eliminate VARNAME ## ## ------------------------ ## # _LT_DECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION], [IS-TAGGED?]) # ------------------------------------------------------------------- # CONFIGNAME is the name given to the value in the libtool script. # VARNAME is the (base) name used in the configure script. # VALUE may be 0, 1 or 2 for a computed quote escaped value based on # VARNAME. Any other value will be used directly. m4_define([_LT_DECL], [lt_if_append_uniq([lt_decl_varnames], [$2], [, ], [lt_dict_add_subkey([lt_decl_dict], [$2], [libtool_name], [m4_ifval([$1], [$1], [$2])]) lt_dict_add_subkey([lt_decl_dict], [$2], [value], [$3]) m4_ifval([$4], [lt_dict_add_subkey([lt_decl_dict], [$2], [description], [$4])]) lt_dict_add_subkey([lt_decl_dict], [$2], [tagged?], [m4_ifval([$5], [yes], [no])])]) ]) # _LT_TAGDECL([CONFIGNAME], VARNAME, VALUE, [DESCRIPTION]) # -------------------------------------------------------- m4_define([_LT_TAGDECL], [_LT_DECL([$1], [$2], [$3], [$4], [yes])]) # lt_decl_tag_varnames([SEPARATOR], [VARNAME1...]) # ------------------------------------------------ m4_define([lt_decl_tag_varnames], [_lt_decl_filter([tagged?], [yes], $@)]) # _lt_decl_filter(SUBKEY, VALUE, [SEPARATOR], [VARNAME1..]) # --------------------------------------------------------- m4_define([_lt_decl_filter], [m4_case([$#], [0], [m4_fatal([$0: too few arguments: $#])], [1], [m4_fatal([$0: too few arguments: $#: $1])], [2], [lt_dict_filter([lt_decl_dict], [$1], [$2], [], lt_decl_varnames)], [3], [lt_dict_filter([lt_decl_dict], [$1], [$2], [$3], lt_decl_varnames)], [lt_dict_filter([lt_decl_dict], $@)])[]dnl ]) # lt_decl_quote_varnames([SEPARATOR], [VARNAME1...]) # -------------------------------------------------- m4_define([lt_decl_quote_varnames], [_lt_decl_filter([value], [1], $@)]) # lt_decl_dquote_varnames([SEPARATOR], [VARNAME1...]) # --------------------------------------------------- m4_define([lt_decl_dquote_varnames], [_lt_decl_filter([value], [2], $@)]) # lt_decl_varnames_tagged([SEPARATOR], [VARNAME1...]) # --------------------------------------------------- m4_define([lt_decl_varnames_tagged], [m4_assert([$# <= 2])dnl _$0(m4_quote(m4_default([$1], [[, ]])), m4_ifval([$2], [[$2]], [m4_dquote(lt_decl_tag_varnames)]), m4_split(m4_normalize(m4_quote(_LT_TAGS)), [ ]))]) m4_define([_lt_decl_varnames_tagged], [m4_ifval([$3], [lt_combine([$1], [$2], [_], $3)])]) # lt_decl_all_varnames([SEPARATOR], [VARNAME1...]) # ------------------------------------------------ m4_define([lt_decl_all_varnames], [_$0(m4_quote(m4_default([$1], [[, ]])), m4_if([$2], [], m4_quote(lt_decl_varnames), m4_quote(m4_shift($@))))[]dnl ]) m4_define([_lt_decl_all_varnames], [lt_join($@, lt_decl_varnames_tagged([$1], lt_decl_tag_varnames([[, ]], m4_shift($@))))dnl ]) # _LT_CONFIG_STATUS_DECLARE([VARNAME]) # ------------------------------------ # Quote a variable value, and forward it to `config.status' so that its # declaration there will have the same value as in `configure'. VARNAME # must have a single quote delimited value for this to work. m4_define([_LT_CONFIG_STATUS_DECLARE], [$1='`$ECHO "$][$1" | $SED "$delay_single_quote_subst"`']) # _LT_CONFIG_STATUS_DECLARATIONS # ------------------------------ # We delimit libtool config variables with single quotes, so when # we write them to config.status, we have to be sure to quote all # embedded single quotes properly. In configure, this macro expands # each variable declared with _LT_DECL (and _LT_TAGDECL) into: # # ='`$ECHO "$" | $SED "$delay_single_quote_subst"`' m4_defun([_LT_CONFIG_STATUS_DECLARATIONS], [m4_foreach([_lt_var], m4_quote(lt_decl_all_varnames), [m4_n([_LT_CONFIG_STATUS_DECLARE(_lt_var)])])]) # _LT_LIBTOOL_TAGS # ---------------- # Output comment and list of tags supported by the script m4_defun([_LT_LIBTOOL_TAGS], [_LT_FORMAT_COMMENT([The names of the tagged configurations supported by this script])dnl available_tags="_LT_TAGS"dnl ]) # _LT_LIBTOOL_DECLARE(VARNAME, [TAG]) # ----------------------------------- # Extract the dictionary values for VARNAME (optionally with TAG) and # expand to a commented shell variable setting: # # # Some comment about what VAR is for. # visible_name=$lt_internal_name m4_define([_LT_LIBTOOL_DECLARE], [_LT_FORMAT_COMMENT(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [description])))[]dnl m4_pushdef([_libtool_name], m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [libtool_name])))[]dnl m4_case(m4_quote(lt_dict_fetch([lt_decl_dict], [$1], [value])), [0], [_libtool_name=[$]$1], [1], [_libtool_name=$lt_[]$1], [2], [_libtool_name=$lt_[]$1], [_libtool_name=lt_dict_fetch([lt_decl_dict], [$1], [value])])[]dnl m4_ifval([$2], [_$2])[]m4_popdef([_libtool_name])[]dnl ]) # _LT_LIBTOOL_CONFIG_VARS # ----------------------- # Produce commented declarations of non-tagged libtool config variables # suitable for insertion in the LIBTOOL CONFIG section of the `libtool' # script. Tagged libtool config variables (even for the LIBTOOL CONFIG # section) are produced by _LT_LIBTOOL_TAG_VARS. m4_defun([_LT_LIBTOOL_CONFIG_VARS], [m4_foreach([_lt_var], m4_quote(_lt_decl_filter([tagged?], [no], [], lt_decl_varnames)), [m4_n([_LT_LIBTOOL_DECLARE(_lt_var)])])]) # _LT_LIBTOOL_TAG_VARS(TAG) # ------------------------- m4_define([_LT_LIBTOOL_TAG_VARS], [m4_foreach([_lt_var], m4_quote(lt_decl_tag_varnames), [m4_n([_LT_LIBTOOL_DECLARE(_lt_var, [$1])])])]) # _LT_TAGVAR(VARNAME, [TAGNAME]) # ------------------------------ m4_define([_LT_TAGVAR], [m4_ifval([$2], [$1_$2], [$1])]) # _LT_CONFIG_COMMANDS # ------------------- # Send accumulated output to $CONFIG_STATUS. Thanks to the lists of # variables for single and double quote escaping we saved from calls # to _LT_DECL, we can put quote escaped variables declarations # into `config.status', and then the shell code to quote escape them in # for loops in `config.status'. Finally, any additional code accumulated # from calls to _LT_CONFIG_LIBTOOL_INIT is expanded. m4_defun([_LT_CONFIG_COMMANDS], [AC_PROVIDE_IFELSE([LT_OUTPUT], dnl If the libtool generation code has been placed in $CONFIG_LT, dnl instead of duplicating it all over again into config.status, dnl then we will have config.status run $CONFIG_LT later, so it dnl needs to know what name is stored there: [AC_CONFIG_COMMANDS([libtool], [$SHELL $CONFIG_LT || AS_EXIT(1)], [CONFIG_LT='$CONFIG_LT'])], dnl If the libtool generation code is destined for config.status, dnl expand the accumulated commands and init code now: [AC_CONFIG_COMMANDS([libtool], [_LT_OUTPUT_LIBTOOL_COMMANDS], [_LT_OUTPUT_LIBTOOL_COMMANDS_INIT])]) ])#_LT_CONFIG_COMMANDS # Initialize. m4_define([_LT_OUTPUT_LIBTOOL_COMMANDS_INIT], [ # The HP-UX ksh and POSIX shell print the target directory to stdout # if CDPATH is set. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH sed_quote_subst='$sed_quote_subst' double_quote_subst='$double_quote_subst' delay_variable_subst='$delay_variable_subst' _LT_CONFIG_STATUS_DECLARATIONS LTCC='$LTCC' LTCFLAGS='$LTCFLAGS' compiler='$compiler_DEFAULT' # A function that is used when there is no print builtin or printf. func_fallback_echo () { eval 'cat <<_LTECHO_EOF \$[]1 _LTECHO_EOF' } # Quote evaled strings. for var in lt_decl_all_varnames([[ \ ]], lt_decl_quote_varnames); do case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in *[[\\\\\\\`\\"\\\$]]*) eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED \\"\\\$sed_quote_subst\\"\\\`\\\\\\"" ;; *) eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" ;; esac done # Double-quote double-evaled strings. for var in lt_decl_all_varnames([[ \ ]], lt_decl_dquote_varnames); do case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in *[[\\\\\\\`\\"\\\$]]*) eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e \\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e \\"\\\$delay_variable_subst\\"\\\`\\\\\\"" ;; *) eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\"" ;; esac done _LT_OUTPUT_LIBTOOL_INIT ]) # _LT_GENERATED_FILE_INIT(FILE, [COMMENT]) # ------------------------------------ # Generate a child script FILE with all initialization necessary to # reuse the environment learned by the parent script, and make the # file executable. If COMMENT is supplied, it is inserted after the # `#!' sequence but before initialization text begins. After this # macro, additional text can be appended to FILE to form the body of # the child script. The macro ends with non-zero status if the # file could not be fully written (such as if the disk is full). m4_ifdef([AS_INIT_GENERATED], [m4_defun([_LT_GENERATED_FILE_INIT],[AS_INIT_GENERATED($@)])], [m4_defun([_LT_GENERATED_FILE_INIT], [m4_require([AS_PREPARE])]dnl [m4_pushdef([AS_MESSAGE_LOG_FD])]dnl [lt_write_fail=0 cat >$1 <<_ASEOF || lt_write_fail=1 #! $SHELL # Generated by $as_me. $2 SHELL=\${CONFIG_SHELL-$SHELL} export SHELL _ASEOF cat >>$1 <<\_ASEOF || lt_write_fail=1 AS_SHELL_SANITIZE _AS_PREPARE exec AS_MESSAGE_FD>&1 _ASEOF test $lt_write_fail = 0 && chmod +x $1[]dnl m4_popdef([AS_MESSAGE_LOG_FD])])])# _LT_GENERATED_FILE_INIT # LT_OUTPUT # --------- # This macro allows early generation of the libtool script (before # AC_OUTPUT is called), incase it is used in configure for compilation # tests. AC_DEFUN([LT_OUTPUT], [: ${CONFIG_LT=./config.lt} AC_MSG_NOTICE([creating $CONFIG_LT]) _LT_GENERATED_FILE_INIT(["$CONFIG_LT"], [# Run this file to recreate a libtool stub with the current configuration.]) cat >>"$CONFIG_LT" <<\_LTEOF lt_cl_silent=false exec AS_MESSAGE_LOG_FD>>config.log { echo AS_BOX([Running $as_me.]) } >&AS_MESSAGE_LOG_FD lt_cl_help="\ \`$as_me' creates a local libtool stub from the current configuration, for use in further configure time tests before the real libtool is generated. Usage: $[0] [[OPTIONS]] -h, --help print this help, then exit -V, --version print version number, then exit -q, --quiet do not print progress messages -d, --debug don't remove temporary files Report bugs to ." lt_cl_version="\ m4_ifset([AC_PACKAGE_NAME], [AC_PACKAGE_NAME ])config.lt[]dnl m4_ifset([AC_PACKAGE_VERSION], [ AC_PACKAGE_VERSION]) configured by $[0], generated by m4_PACKAGE_STRING. Copyright (C) 2011 Free Software Foundation, Inc. This config.lt script is free software; the Free Software Foundation gives unlimited permision to copy, distribute and modify it." while test $[#] != 0 do case $[1] in --version | --v* | -V ) echo "$lt_cl_version"; exit 0 ;; --help | --h* | -h ) echo "$lt_cl_help"; exit 0 ;; --debug | --d* | -d ) debug=: ;; --quiet | --q* | --silent | --s* | -q ) lt_cl_silent=: ;; -*) AC_MSG_ERROR([unrecognized option: $[1] Try \`$[0] --help' for more information.]) ;; *) AC_MSG_ERROR([unrecognized argument: $[1] Try \`$[0] --help' for more information.]) ;; esac shift done if $lt_cl_silent; then exec AS_MESSAGE_FD>/dev/null fi _LTEOF cat >>"$CONFIG_LT" <<_LTEOF _LT_OUTPUT_LIBTOOL_COMMANDS_INIT _LTEOF cat >>"$CONFIG_LT" <<\_LTEOF AC_MSG_NOTICE([creating $ofile]) _LT_OUTPUT_LIBTOOL_COMMANDS AS_EXIT(0) _LTEOF chmod +x "$CONFIG_LT" # configure is writing to config.log, but config.lt does its own redirection, # appending to config.log, which fails on DOS, as config.log is still kept # open by configure. Here we exec the FD to /dev/null, effectively closing # config.log, so it can be properly (re)opened and appended to by config.lt. lt_cl_success=: test "$silent" = yes && lt_config_lt_args="$lt_config_lt_args --quiet" exec AS_MESSAGE_LOG_FD>/dev/null $SHELL "$CONFIG_LT" $lt_config_lt_args || lt_cl_success=false exec AS_MESSAGE_LOG_FD>>config.log $lt_cl_success || AS_EXIT(1) ])# LT_OUTPUT # _LT_CONFIG(TAG) # --------------- # If TAG is the built-in tag, create an initial libtool script with a # default configuration from the untagged config vars. Otherwise add code # to config.status for appending the configuration named by TAG from the # matching tagged config vars. m4_defun([_LT_CONFIG], [m4_require([_LT_FILEUTILS_DEFAULTS])dnl _LT_CONFIG_SAVE_COMMANDS([ m4_define([_LT_TAG], m4_if([$1], [], [C], [$1]))dnl m4_if(_LT_TAG, [C], [ # See if we are running on zsh, and set the options which allow our # commands through without removal of \ escapes. if test -n "${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi cfgfile="${ofile}T" trap "$RM \"$cfgfile\"; exit 1" 1 2 15 $RM "$cfgfile" cat <<_LT_EOF >> "$cfgfile" #! $SHELL # `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. # Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION # Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: # NOTE: Changes made to this file will be lost: look at ltmain.sh. # _LT_COPYING _LT_LIBTOOL_TAGS # ### BEGIN LIBTOOL CONFIG _LT_LIBTOOL_CONFIG_VARS _LT_LIBTOOL_TAG_VARS # ### END LIBTOOL CONFIG _LT_EOF case $host_os in aix3*) cat <<\_LT_EOF >> "$cfgfile" # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test "X${COLLECT_NAMES+set}" != Xset; then COLLECT_NAMES= export COLLECT_NAMES fi _LT_EOF ;; esac _LT_PROG_LTMAIN # We use sed instead of cat because bash on DJGPP gets confused if # if finds mixed CR/LF and LF-only lines. Since sed operates in # text mode, it properly converts lines to CR/LF. This bash problem # is reportedly fixed, but why not run on old versions too? sed '$q' "$ltmain" >> "$cfgfile" \ || (rm -f "$cfgfile"; exit 1) _LT_PROG_REPLACE_SHELLFNS mv -f "$cfgfile" "$ofile" || (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") chmod +x "$ofile" ], [cat <<_LT_EOF >> "$ofile" dnl Unfortunately we have to use $1 here, since _LT_TAG is not expanded dnl in a comment (ie after a #). # ### BEGIN LIBTOOL TAG CONFIG: $1 _LT_LIBTOOL_TAG_VARS(_LT_TAG) # ### END LIBTOOL TAG CONFIG: $1 _LT_EOF ])dnl /m4_if ], [m4_if([$1], [], [ PACKAGE='$PACKAGE' VERSION='$VERSION' TIMESTAMP='$TIMESTAMP' RM='$RM' ofile='$ofile'], []) ])dnl /_LT_CONFIG_SAVE_COMMANDS ])# _LT_CONFIG # LT_SUPPORTED_TAG(TAG) # --------------------- # Trace this macro to discover what tags are supported by the libtool # --tag option, using: # autoconf --trace 'LT_SUPPORTED_TAG:$1' AC_DEFUN([LT_SUPPORTED_TAG], []) # C support is built-in for now m4_define([_LT_LANG_C_enabled], []) m4_define([_LT_TAGS], []) # LT_LANG(LANG) # ------------- # Enable libtool support for the given language if not already enabled. AC_DEFUN([LT_LANG], [AC_BEFORE([$0], [LT_OUTPUT])dnl m4_case([$1], [C], [_LT_LANG(C)], [C++], [_LT_LANG(CXX)], [Go], [_LT_LANG(GO)], [Java], [_LT_LANG(GCJ)], [Fortran 77], [_LT_LANG(F77)], [Fortran], [_LT_LANG(FC)], [Windows Resource], [_LT_LANG(RC)], [m4_ifdef([_LT_LANG_]$1[_CONFIG], [_LT_LANG($1)], [m4_fatal([$0: unsupported language: "$1"])])])dnl ])# LT_LANG # _LT_LANG(LANGNAME) # ------------------ m4_defun([_LT_LANG], [m4_ifdef([_LT_LANG_]$1[_enabled], [], [LT_SUPPORTED_TAG([$1])dnl m4_append([_LT_TAGS], [$1 ])dnl m4_define([_LT_LANG_]$1[_enabled], [])dnl _LT_LANG_$1_CONFIG($1)])dnl ])# _LT_LANG m4_ifndef([AC_PROG_GO], [ ############################################################ # NOTE: This macro has been submitted for inclusion into # # GNU Autoconf as AC_PROG_GO. When it is available in # # a released version of Autoconf we should remove this # # macro and use it instead. # ############################################################ m4_defun([AC_PROG_GO], [AC_LANG_PUSH(Go)dnl AC_ARG_VAR([GOC], [Go compiler command])dnl AC_ARG_VAR([GOFLAGS], [Go compiler flags])dnl _AC_ARG_VAR_LDFLAGS()dnl AC_CHECK_TOOL(GOC, gccgo) if test -z "$GOC"; then if test -n "$ac_tool_prefix"; then AC_CHECK_PROG(GOC, [${ac_tool_prefix}gccgo], [${ac_tool_prefix}gccgo]) fi fi if test -z "$GOC"; then AC_CHECK_PROG(GOC, gccgo, gccgo, false) fi ])#m4_defun ])#m4_ifndef # _LT_LANG_DEFAULT_CONFIG # ----------------------- m4_defun([_LT_LANG_DEFAULT_CONFIG], [AC_PROVIDE_IFELSE([AC_PROG_CXX], [LT_LANG(CXX)], [m4_define([AC_PROG_CXX], defn([AC_PROG_CXX])[LT_LANG(CXX)])]) AC_PROVIDE_IFELSE([AC_PROG_F77], [LT_LANG(F77)], [m4_define([AC_PROG_F77], defn([AC_PROG_F77])[LT_LANG(F77)])]) AC_PROVIDE_IFELSE([AC_PROG_FC], [LT_LANG(FC)], [m4_define([AC_PROG_FC], defn([AC_PROG_FC])[LT_LANG(FC)])]) dnl The call to [A][M_PROG_GCJ] is quoted like that to stop aclocal dnl pulling things in needlessly. AC_PROVIDE_IFELSE([AC_PROG_GCJ], [LT_LANG(GCJ)], [AC_PROVIDE_IFELSE([A][M_PROG_GCJ], [LT_LANG(GCJ)], [AC_PROVIDE_IFELSE([LT_PROG_GCJ], [LT_LANG(GCJ)], [m4_ifdef([AC_PROG_GCJ], [m4_define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[LT_LANG(GCJ)])]) m4_ifdef([A][M_PROG_GCJ], [m4_define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[LT_LANG(GCJ)])]) m4_ifdef([LT_PROG_GCJ], [m4_define([LT_PROG_GCJ], defn([LT_PROG_GCJ])[LT_LANG(GCJ)])])])])]) AC_PROVIDE_IFELSE([AC_PROG_GO], [LT_LANG(GO)], [m4_define([AC_PROG_GO], defn([AC_PROG_GO])[LT_LANG(GO)])]) AC_PROVIDE_IFELSE([LT_PROG_RC], [LT_LANG(RC)], [m4_define([LT_PROG_RC], defn([LT_PROG_RC])[LT_LANG(RC)])]) ])# _LT_LANG_DEFAULT_CONFIG # Obsolete macros: AU_DEFUN([AC_LIBTOOL_CXX], [LT_LANG(C++)]) AU_DEFUN([AC_LIBTOOL_F77], [LT_LANG(Fortran 77)]) AU_DEFUN([AC_LIBTOOL_FC], [LT_LANG(Fortran)]) AU_DEFUN([AC_LIBTOOL_GCJ], [LT_LANG(Java)]) AU_DEFUN([AC_LIBTOOL_RC], [LT_LANG(Windows Resource)]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_CXX], []) dnl AC_DEFUN([AC_LIBTOOL_F77], []) dnl AC_DEFUN([AC_LIBTOOL_FC], []) dnl AC_DEFUN([AC_LIBTOOL_GCJ], []) dnl AC_DEFUN([AC_LIBTOOL_RC], []) # _LT_TAG_COMPILER # ---------------- m4_defun([_LT_TAG_COMPILER], [AC_REQUIRE([AC_PROG_CC])dnl _LT_DECL([LTCC], [CC], [1], [A C compiler])dnl _LT_DECL([LTCFLAGS], [CFLAGS], [1], [LTCC compiler flags])dnl _LT_TAGDECL([CC], [compiler], [1], [A language specific compiler])dnl _LT_TAGDECL([with_gcc], [GCC], [0], [Is the compiler the GNU compiler?])dnl # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC ])# _LT_TAG_COMPILER # _LT_COMPILER_BOILERPLATE # ------------------------ # Check for compiler boilerplate output or warnings with # the simple compiler test code. m4_defun([_LT_COMPILER_BOILERPLATE], [m4_require([_LT_DECL_SED])dnl ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $RM conftest* ])# _LT_COMPILER_BOILERPLATE # _LT_LINKER_BOILERPLATE # ---------------------- # Check for linker boilerplate output or warnings with # the simple link test code. m4_defun([_LT_LINKER_BOILERPLATE], [m4_require([_LT_DECL_SED])dnl ac_outfile=conftest.$ac_objext echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` $RM -r conftest* ])# _LT_LINKER_BOILERPLATE # _LT_REQUIRED_DARWIN_CHECKS # ------------------------- m4_defun_once([_LT_REQUIRED_DARWIN_CHECKS],[ case $host_os in rhapsody* | darwin*) AC_CHECK_TOOL([DSYMUTIL], [dsymutil], [:]) AC_CHECK_TOOL([NMEDIT], [nmedit], [:]) AC_CHECK_TOOL([LIPO], [lipo], [:]) AC_CHECK_TOOL([OTOOL], [otool], [:]) AC_CHECK_TOOL([OTOOL64], [otool64], [:]) _LT_DECL([], [DSYMUTIL], [1], [Tool to manipulate archived DWARF debug symbol files on Mac OS X]) _LT_DECL([], [NMEDIT], [1], [Tool to change global to local symbols on Mac OS X]) _LT_DECL([], [LIPO], [1], [Tool to manipulate fat objects and archives on Mac OS X]) _LT_DECL([], [OTOOL], [1], [ldd/readelf like tool for Mach-O binaries on Mac OS X]) _LT_DECL([], [OTOOL64], [1], [ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4]) AC_CACHE_CHECK([for -single_module linker flag],[lt_cv_apple_cc_single_mod], [lt_cv_apple_cc_single_mod=no if test -z "${LT_MULTI_MODULE}"; then # By default we will add the -single_module flag. You can override # by either setting the environment variable LT_MULTI_MODULE # non-empty at configure time, or by adding -multi_module to the # link flags. rm -rf libconftest.dylib* echo "int foo(void){return 1;}" > conftest.c echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ -dynamiclib -Wl,-single_module conftest.c" >&AS_MESSAGE_LOG_FD $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \ -dynamiclib -Wl,-single_module conftest.c 2>conftest.err _lt_result=$? # If there is a non-empty error log, and "single_module" # appears in it, assume the flag caused a linker warning if test -s conftest.err && $GREP single_module conftest.err; then cat conftest.err >&AS_MESSAGE_LOG_FD # Otherwise, if the output was created with a 0 exit code from # the compiler, it worked. elif test -f libconftest.dylib && test $_lt_result -eq 0; then lt_cv_apple_cc_single_mod=yes else cat conftest.err >&AS_MESSAGE_LOG_FD fi rm -rf libconftest.dylib* rm -f conftest.* fi]) AC_CACHE_CHECK([for -exported_symbols_list linker flag], [lt_cv_ld_exported_symbols_list], [lt_cv_ld_exported_symbols_list=no save_LDFLAGS=$LDFLAGS echo "_main" > conftest.sym LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym" AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], [lt_cv_ld_exported_symbols_list=yes], [lt_cv_ld_exported_symbols_list=no]) LDFLAGS="$save_LDFLAGS" ]) AC_CACHE_CHECK([for -force_load linker flag],[lt_cv_ld_force_load], [lt_cv_ld_force_load=no cat > conftest.c << _LT_EOF int forced_loaded() { return 2;} _LT_EOF echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&AS_MESSAGE_LOG_FD $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&AS_MESSAGE_LOG_FD echo "$AR cru libconftest.a conftest.o" >&AS_MESSAGE_LOG_FD $AR cru libconftest.a conftest.o 2>&AS_MESSAGE_LOG_FD echo "$RANLIB libconftest.a" >&AS_MESSAGE_LOG_FD $RANLIB libconftest.a 2>&AS_MESSAGE_LOG_FD cat > conftest.c << _LT_EOF int main() { return 0;} _LT_EOF echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a" >&AS_MESSAGE_LOG_FD $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c -Wl,-force_load,./libconftest.a 2>conftest.err _lt_result=$? if test -s conftest.err && $GREP force_load conftest.err; then cat conftest.err >&AS_MESSAGE_LOG_FD elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load conftest >/dev/null 2>&1 ; then lt_cv_ld_force_load=yes else cat conftest.err >&AS_MESSAGE_LOG_FD fi rm -f conftest.err libconftest.a conftest conftest.c rm -rf conftest.dSYM ]) case $host_os in rhapsody* | darwin1.[[012]]) _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;; darwin1.*) _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; darwin*) # darwin 5.x on # if running on 10.5 or later, the deployment target defaults # to the OS version, if on x86, and 10.4, the deployment # target defaults to 10.4. Don't you love it? case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in 10.0,*86*-darwin8*|10.0,*-darwin[[91]]*) _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; 10.[[012]]*) _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; 10.*) _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;; esac ;; esac if test "$lt_cv_apple_cc_single_mod" = "yes"; then _lt_dar_single_mod='$single_module' fi if test "$lt_cv_ld_exported_symbols_list" = "yes"; then _lt_dar_export_syms=' ${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym' else _lt_dar_export_syms='~$NMEDIT -s $output_objdir/${libname}-symbols.expsym ${lib}' fi if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then _lt_dsymutil='~$DSYMUTIL $lib || :' else _lt_dsymutil= fi ;; esac ]) # _LT_DARWIN_LINKER_FEATURES([TAG]) # --------------------------------- # Checks for linker and compiler features on darwin m4_defun([_LT_DARWIN_LINKER_FEATURES], [ m4_require([_LT_REQUIRED_DARWIN_CHECKS]) _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_automatic, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported if test "$lt_cv_ld_force_load" = "yes"; then _LT_TAGVAR(whole_archive_flag_spec, $1)='`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; func_echo_all \"$new_convenience\"`' m4_case([$1], [F77], [_LT_TAGVAR(compiler_needs_object, $1)=yes], [FC], [_LT_TAGVAR(compiler_needs_object, $1)=yes]) else _LT_TAGVAR(whole_archive_flag_spec, $1)='' fi _LT_TAGVAR(link_all_deplibs, $1)=yes _LT_TAGVAR(allow_undefined_flag, $1)="$_lt_dar_allow_undefined" case $cc_basename in ifort*) _lt_dar_can_shared=yes ;; *) _lt_dar_can_shared=$GCC ;; esac if test "$_lt_dar_can_shared" = "yes"; then output_verbose_link_cmd=func_echo_all _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod${_lt_dsymutil}" _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dsymutil}" _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring ${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}" _LT_TAGVAR(module_expsym_cmds, $1)="sed -e 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}" m4_if([$1], [CXX], [ if test "$lt_cv_apple_cc_single_mod" != "yes"; then _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dsymutil}" _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's,^,_,' < \$export_symbols > \$output_objdir/\${libname}-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \${lib}-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \${lib}-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring${_lt_dar_export_syms}${_lt_dsymutil}" fi ],[]) else _LT_TAGVAR(ld_shlibs, $1)=no fi ]) # _LT_SYS_MODULE_PATH_AIX([TAGNAME]) # ---------------------------------- # Links a minimal program and checks the executable # for the system default hardcoded library path. In most cases, # this is /usr/lib:/lib, but when the MPI compilers are used # the location of the communication and MPI libs are included too. # If we don't find anything, use the default library path according # to the aix ld manual. # Store the results from the different compilers for each TAGNAME. # Allow to override them for all tags through lt_cv_aix_libpath. m4_defun([_LT_SYS_MODULE_PATH_AIX], [m4_require([_LT_DECL_SED])dnl if test "${lt_cv_aix_libpath+set}" = set; then aix_libpath=$lt_cv_aix_libpath else AC_CACHE_VAL([_LT_TAGVAR([lt_cv_aix_libpath_], [$1])], [AC_LINK_IFELSE([AC_LANG_PROGRAM],[ lt_aix_libpath_sed='[ /Import File Strings/,/^$/ { /^0/ { s/^0 *\([^ ]*\) *$/\1/ p } }]' _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then _LT_TAGVAR([lt_cv_aix_libpath_], [$1])=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi],[]) if test -z "$_LT_TAGVAR([lt_cv_aix_libpath_], [$1])"; then _LT_TAGVAR([lt_cv_aix_libpath_], [$1])="/usr/lib:/lib" fi ]) aix_libpath=$_LT_TAGVAR([lt_cv_aix_libpath_], [$1]) fi ])# _LT_SYS_MODULE_PATH_AIX # _LT_SHELL_INIT(ARG) # ------------------- m4_define([_LT_SHELL_INIT], [m4_divert_text([M4SH-INIT], [$1 ])])# _LT_SHELL_INIT # _LT_PROG_ECHO_BACKSLASH # ----------------------- # Find how we can fake an echo command that does not interpret backslash. # In particular, with Autoconf 2.60 or later we add some code to the start # of the generated configure script which will find a shell with a builtin # printf (which we can use as an echo command). m4_defun([_LT_PROG_ECHO_BACKSLASH], [ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO AC_MSG_CHECKING([how to print strings]) # Test print first, because it will be a builtin if present. if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \ test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then ECHO='print -r --' elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then ECHO='printf %s\n' else # Use this function as a fallback that always works. func_fallback_echo () { eval 'cat <<_LTECHO_EOF $[]1 _LTECHO_EOF' } ECHO='func_fallback_echo' fi # func_echo_all arg... # Invoke $ECHO with all args, space-separated. func_echo_all () { $ECHO "$*" } case "$ECHO" in printf*) AC_MSG_RESULT([printf]) ;; print*) AC_MSG_RESULT([print -r]) ;; *) AC_MSG_RESULT([cat]) ;; esac m4_ifdef([_AS_DETECT_SUGGESTED], [_AS_DETECT_SUGGESTED([ test -n "${ZSH_VERSION+set}${BASH_VERSION+set}" || ( ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO PATH=/empty FPATH=/empty; export PATH FPATH test "X`printf %s $ECHO`" = "X$ECHO" \ || test "X`print -r -- $ECHO`" = "X$ECHO" )])]) _LT_DECL([], [SHELL], [1], [Shell to use when invoking shell scripts]) _LT_DECL([], [ECHO], [1], [An echo program that protects backslashes]) ])# _LT_PROG_ECHO_BACKSLASH # _LT_WITH_SYSROOT # ---------------- AC_DEFUN([_LT_WITH_SYSROOT], [AC_MSG_CHECKING([for sysroot]) AC_ARG_WITH([sysroot], [ --with-sysroot[=DIR] Search for dependent libraries within DIR (or the compiler's sysroot if not specified).], [], [with_sysroot=no]) dnl lt_sysroot will always be passed unquoted. We quote it here dnl in case the user passed a directory name. lt_sysroot= case ${with_sysroot} in #( yes) if test "$GCC" = yes; then lt_sysroot=`$CC --print-sysroot 2>/dev/null` fi ;; #( /*) lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"` ;; #( no|'') ;; #( *) AC_MSG_RESULT([${with_sysroot}]) AC_MSG_ERROR([The sysroot must be an absolute path.]) ;; esac AC_MSG_RESULT([${lt_sysroot:-no}]) _LT_DECL([], [lt_sysroot], [0], [The root where to search for ]dnl [dependent libraries, and in which our libraries should be installed.])]) # _LT_ENABLE_LOCK # --------------- m4_defun([_LT_ENABLE_LOCK], [AC_ARG_ENABLE([libtool-lock], [AS_HELP_STRING([--disable-libtool-lock], [avoid locking (might break parallel builds)])]) test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes # Some flags need to be propagated to the compiler or linker for good # libtool support. case $host in ia64-*-hpux*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then case `/usr/bin/file conftest.$ac_objext` in *ELF-32*) HPUX_IA64_MODE="32" ;; *ELF-64*) HPUX_IA64_MODE="64" ;; esac fi rm -rf conftest* ;; *-*-irix6*) # Find out which ABI we are using. echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then if test "$lt_cv_prog_gnu_ld" = yes; then case `/usr/bin/file conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -melf32bsmip" ;; *N32*) LD="${LD-ld} -melf32bmipn32" ;; *64-bit*) LD="${LD-ld} -melf64bmip" ;; esac else case `/usr/bin/file conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -32" ;; *N32*) LD="${LD-ld} -n32" ;; *64-bit*) LD="${LD-ld} -64" ;; esac fi fi rm -rf conftest* ;; x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \ s390*-*linux*|s390*-*tpf*|sparc*-*linux*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then case `/usr/bin/file conftest.o` in *32-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_i386_fbsd" ;; x86_64-*linux*) LD="${LD-ld} -m elf_i386" ;; ppc64-*linux*|powerpc64-*linux*) LD="${LD-ld} -m elf32ppclinux" ;; s390x-*linux*) LD="${LD-ld} -m elf_s390" ;; sparc64-*linux*) LD="${LD-ld} -m elf32_sparc" ;; esac ;; *64-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_x86_64_fbsd" ;; x86_64-*linux*) LD="${LD-ld} -m elf_x86_64" ;; ppc*-*linux*|powerpc*-*linux*) LD="${LD-ld} -m elf64ppc" ;; s390*-*linux*|s390*-*tpf*) LD="${LD-ld} -m elf64_s390" ;; sparc*-*linux*) LD="${LD-ld} -m elf64_sparc" ;; esac ;; esac fi rm -rf conftest* ;; *-*-sco3.2v5*) # On SCO OpenServer 5, we need -belf to get full-featured binaries. SAVE_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS -belf" AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, [AC_LANG_PUSH(C) AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) AC_LANG_POP]) if test x"$lt_cv_cc_needs_belf" != x"yes"; then # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf CFLAGS="$SAVE_CFLAGS" fi ;; *-*solaris*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then case `/usr/bin/file conftest.o` in *64-bit*) case $lt_cv_prog_gnu_ld in yes*) case $host in i?86-*-solaris*) LD="${LD-ld} -m elf_x86_64" ;; sparc*-*-solaris*) LD="${LD-ld} -m elf64_sparc" ;; esac # GNU ld 2.21 introduced _sol2 emulations. Use them if available. if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then LD="${LD-ld}_sol2" fi ;; *) if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then LD="${LD-ld} -64" fi ;; esac ;; esac fi rm -rf conftest* ;; esac need_locks="$enable_libtool_lock" ])# _LT_ENABLE_LOCK # _LT_PROG_AR # ----------- m4_defun([_LT_PROG_AR], [AC_CHECK_TOOLS(AR, [ar], false) : ${AR=ar} : ${AR_FLAGS=cru} _LT_DECL([], [AR], [1], [The archiver]) _LT_DECL([], [AR_FLAGS], [1], [Flags to create an archive]) AC_CACHE_CHECK([for archiver @FILE support], [lt_cv_ar_at_file], [lt_cv_ar_at_file=no AC_COMPILE_IFELSE([AC_LANG_PROGRAM], [echo conftest.$ac_objext > conftest.lst lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&AS_MESSAGE_LOG_FD' AC_TRY_EVAL([lt_ar_try]) if test "$ac_status" -eq 0; then # Ensure the archiver fails upon bogus file names. rm -f conftest.$ac_objext libconftest.a AC_TRY_EVAL([lt_ar_try]) if test "$ac_status" -ne 0; then lt_cv_ar_at_file=@ fi fi rm -f conftest.* libconftest.a ]) ]) if test "x$lt_cv_ar_at_file" = xno; then archiver_list_spec= else archiver_list_spec=$lt_cv_ar_at_file fi _LT_DECL([], [archiver_list_spec], [1], [How to feed a file listing to the archiver]) ])# _LT_PROG_AR # _LT_CMD_OLD_ARCHIVE # ------------------- m4_defun([_LT_CMD_OLD_ARCHIVE], [_LT_PROG_AR AC_CHECK_TOOL(STRIP, strip, :) test -z "$STRIP" && STRIP=: _LT_DECL([], [STRIP], [1], [A symbol stripping program]) AC_CHECK_TOOL(RANLIB, ranlib, :) test -z "$RANLIB" && RANLIB=: _LT_DECL([], [RANLIB], [1], [Commands used to install an old-style archive]) # Determine commands to create old-style static archives. old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' old_postinstall_cmds='chmod 644 $oldlib' old_postuninstall_cmds= if test -n "$RANLIB"; then case $host_os in openbsd*) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib" ;; *) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib" ;; esac old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib" fi case $host_os in darwin*) lock_old_archive_extraction=yes ;; *) lock_old_archive_extraction=no ;; esac _LT_DECL([], [old_postinstall_cmds], [2]) _LT_DECL([], [old_postuninstall_cmds], [2]) _LT_TAGDECL([], [old_archive_cmds], [2], [Commands used to build an old-style archive]) _LT_DECL([], [lock_old_archive_extraction], [0], [Whether to use a lock for old archive extraction]) ])# _LT_CMD_OLD_ARCHIVE # _LT_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, # [OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE]) # ---------------------------------------------------------------- # Check whether the given compiler option works AC_DEFUN([_LT_COMPILER_OPTION], [m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_DECL_SED])dnl AC_CACHE_CHECK([$1], [$2], [$2=no m4_if([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4]) echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$3" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&AS_MESSAGE_LOG_FD echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then $2=yes fi fi $RM conftest* ]) if test x"[$]$2" = xyes; then m4_if([$5], , :, [$5]) else m4_if([$6], , :, [$6]) fi ])# _LT_COMPILER_OPTION # Old name: AU_ALIAS([AC_LIBTOOL_COMPILER_OPTION], [_LT_COMPILER_OPTION]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], []) # _LT_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, # [ACTION-SUCCESS], [ACTION-FAILURE]) # ---------------------------------------------------- # Check whether the given linker option works AC_DEFUN([_LT_LINKER_OPTION], [m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_DECL_SED])dnl AC_CACHE_CHECK([$1], [$2], [$2=no save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS $3" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&AS_MESSAGE_LOG_FD $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then $2=yes fi else $2=yes fi fi $RM -r conftest* LDFLAGS="$save_LDFLAGS" ]) if test x"[$]$2" = xyes; then m4_if([$4], , :, [$4]) else m4_if([$5], , :, [$5]) fi ])# _LT_LINKER_OPTION # Old name: AU_ALIAS([AC_LIBTOOL_LINKER_OPTION], [_LT_LINKER_OPTION]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], []) # LT_CMD_MAX_LEN #--------------- AC_DEFUN([LT_CMD_MAX_LEN], [AC_REQUIRE([AC_CANONICAL_HOST])dnl # find the maximum length of command line arguments AC_MSG_CHECKING([the maximum length of command line arguments]) AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl i=0 teststring="ABCD" case $build_os in msdosdjgpp*) # On DJGPP, this test can blow up pretty badly due to problems in libc # (any single argument exceeding 2000 bytes causes a buffer overrun # during glob expansion). Even if it were fixed, the result of this # check would be larger than it should be. lt_cv_sys_max_cmd_len=12288; # 12K is about right ;; gnu*) # Under GNU Hurd, this test is not required because there is # no limit to the length of command line arguments. # Libtool will interpret -1 as no limit whatsoever lt_cv_sys_max_cmd_len=-1; ;; cygwin* | mingw* | cegcc*) # On Win9x/ME, this test blows up -- it succeeds, but takes # about 5 minutes as the teststring grows exponentially. # Worse, since 9x/ME are not pre-emptively multitasking, # you end up with a "frozen" computer, even though with patience # the test eventually succeeds (with a max line length of 256k). # Instead, let's just punt: use the minimum linelength reported by # all of the supported platforms: 8192 (on NT/2K/XP). lt_cv_sys_max_cmd_len=8192; ;; mint*) # On MiNT this can take a long time and run out of memory. lt_cv_sys_max_cmd_len=8192; ;; amigaos*) # On AmigaOS with pdksh, this test takes hours, literally. # So we just punt and use a minimum line length of 8192. lt_cv_sys_max_cmd_len=8192; ;; netbsd* | freebsd* | openbsd* | darwin* | dragonfly*) # This has been around since 386BSD, at least. Likely further. if test -x /sbin/sysctl; then lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` elif test -x /usr/sbin/sysctl; then lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` else lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs fi # And add a safety zone lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` ;; interix*) # We know the value 262144 and hardcode it with a safety zone (like BSD) lt_cv_sys_max_cmd_len=196608 ;; os2*) # The test takes a long time on OS/2. lt_cv_sys_max_cmd_len=8192 ;; osf*) # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not # nice to cause kernel panics so lets avoid the loop below. # First set a reasonable default. lt_cv_sys_max_cmd_len=16384 # if test -x /sbin/sysconfig; then case `/sbin/sysconfig -q proc exec_disable_arg_limit` in *1*) lt_cv_sys_max_cmd_len=-1 ;; esac fi ;; sco3.2v5*) lt_cv_sys_max_cmd_len=102400 ;; sysv5* | sco5v6* | sysv4.2uw2*) kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` if test -n "$kargmax"; then lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[ ]]//'` else lt_cv_sys_max_cmd_len=32768 fi ;; *) lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` if test -n "$lt_cv_sys_max_cmd_len"; then lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` else # Make teststring a little bigger before we do anything with it. # a 1K string should be a reasonable start. for i in 1 2 3 4 5 6 7 8 ; do teststring=$teststring$teststring done SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} # If test is not a shell built-in, we'll probably end up computing a # maximum length that is only half of the actual maximum length, but # we can't tell. while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \ = "X$teststring$teststring"; } >/dev/null 2>&1 && test $i != 17 # 1/2 MB should be enough do i=`expr $i + 1` teststring=$teststring$teststring done # Only check the string length outside the loop. lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1` teststring= # Add a significant safety factor because C++ compilers can tack on # massive amounts of additional arguments before passing them to the # linker. It appears as though 1/2 is a usable value. lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` fi ;; esac ]) if test -n $lt_cv_sys_max_cmd_len ; then AC_MSG_RESULT($lt_cv_sys_max_cmd_len) else AC_MSG_RESULT(none) fi max_cmd_len=$lt_cv_sys_max_cmd_len _LT_DECL([], [max_cmd_len], [0], [What is the maximum length of a command?]) ])# LT_CMD_MAX_LEN # Old name: AU_ALIAS([AC_LIBTOOL_SYS_MAX_CMD_LEN], [LT_CMD_MAX_LEN]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], []) # _LT_HEADER_DLFCN # ---------------- m4_defun([_LT_HEADER_DLFCN], [AC_CHECK_HEADERS([dlfcn.h], [], [], [AC_INCLUDES_DEFAULT])dnl ])# _LT_HEADER_DLFCN # _LT_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, # ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) # ---------------------------------------------------------------- m4_defun([_LT_TRY_DLOPEN_SELF], [m4_require([_LT_HEADER_DLFCN])dnl if test "$cross_compiling" = yes; then : [$4] else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF [#line $LINENO "configure" #include "confdefs.h" #if HAVE_DLFCN_H #include #endif #include #ifdef RTLD_GLOBAL # define LT_DLGLOBAL RTLD_GLOBAL #else # ifdef DL_GLOBAL # define LT_DLGLOBAL DL_GLOBAL # else # define LT_DLGLOBAL 0 # endif #endif /* We may have to define LT_DLLAZY_OR_NOW in the command line if we find out it does not work in some platform. */ #ifndef LT_DLLAZY_OR_NOW # ifdef RTLD_LAZY # define LT_DLLAZY_OR_NOW RTLD_LAZY # else # ifdef DL_LAZY # define LT_DLLAZY_OR_NOW DL_LAZY # else # ifdef RTLD_NOW # define LT_DLLAZY_OR_NOW RTLD_NOW # else # ifdef DL_NOW # define LT_DLLAZY_OR_NOW DL_NOW # else # define LT_DLLAZY_OR_NOW 0 # endif # endif # endif # endif #endif /* When -fvisbility=hidden is used, assume the code has been annotated correspondingly for the symbols needed. */ #if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || (__GNUC__ > 3)) int fnord () __attribute__((visibility("default"))); #endif int fnord () { return 42; } int main () { void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); int status = $lt_dlunknown; if (self) { if (dlsym (self,"fnord")) status = $lt_dlno_uscore; else { if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; else puts (dlerror ()); } /* dlclose (self); */ } else puts (dlerror ()); return status; }] _LT_EOF if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null lt_status=$? case x$lt_status in x$lt_dlno_uscore) $1 ;; x$lt_dlneed_uscore) $2 ;; x$lt_dlunknown|x*) $3 ;; esac else : # compilation failed $3 fi fi rm -fr conftest* ])# _LT_TRY_DLOPEN_SELF # LT_SYS_DLOPEN_SELF # ------------------ AC_DEFUN([LT_SYS_DLOPEN_SELF], [m4_require([_LT_HEADER_DLFCN])dnl if test "x$enable_dlopen" != xyes; then enable_dlopen=unknown enable_dlopen_self=unknown enable_dlopen_self_static=unknown else lt_cv_dlopen=no lt_cv_dlopen_libs= case $host_os in beos*) lt_cv_dlopen="load_add_on" lt_cv_dlopen_libs= lt_cv_dlopen_self=yes ;; mingw* | pw32* | cegcc*) lt_cv_dlopen="LoadLibrary" lt_cv_dlopen_libs= ;; cygwin*) lt_cv_dlopen="dlopen" lt_cv_dlopen_libs= ;; darwin*) # if libdl is installed we need to link against it AC_CHECK_LIB([dl], [dlopen], [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[ lt_cv_dlopen="dyld" lt_cv_dlopen_libs= lt_cv_dlopen_self=yes ]) ;; *) AC_CHECK_FUNC([shl_load], [lt_cv_dlopen="shl_load"], [AC_CHECK_LIB([dld], [shl_load], [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"], [AC_CHECK_FUNC([dlopen], [lt_cv_dlopen="dlopen"], [AC_CHECK_LIB([dl], [dlopen], [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"], [AC_CHECK_LIB([svld], [dlopen], [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"], [AC_CHECK_LIB([dld], [dld_link], [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"]) ]) ]) ]) ]) ]) ;; esac if test "x$lt_cv_dlopen" != xno; then enable_dlopen=yes else enable_dlopen=no fi case $lt_cv_dlopen in dlopen) save_CPPFLAGS="$CPPFLAGS" test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" save_LDFLAGS="$LDFLAGS" wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" save_LIBS="$LIBS" LIBS="$lt_cv_dlopen_libs $LIBS" AC_CACHE_CHECK([whether a program can dlopen itself], lt_cv_dlopen_self, [dnl _LT_TRY_DLOPEN_SELF( lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes, lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross) ]) if test "x$lt_cv_dlopen_self" = xyes; then wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" AC_CACHE_CHECK([whether a statically linked program can dlopen itself], lt_cv_dlopen_self_static, [dnl _LT_TRY_DLOPEN_SELF( lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross) ]) fi CPPFLAGS="$save_CPPFLAGS" LDFLAGS="$save_LDFLAGS" LIBS="$save_LIBS" ;; esac case $lt_cv_dlopen_self in yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; *) enable_dlopen_self=unknown ;; esac case $lt_cv_dlopen_self_static in yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; *) enable_dlopen_self_static=unknown ;; esac fi _LT_DECL([dlopen_support], [enable_dlopen], [0], [Whether dlopen is supported]) _LT_DECL([dlopen_self], [enable_dlopen_self], [0], [Whether dlopen of programs is supported]) _LT_DECL([dlopen_self_static], [enable_dlopen_self_static], [0], [Whether dlopen of statically linked programs is supported]) ])# LT_SYS_DLOPEN_SELF # Old name: AU_ALIAS([AC_LIBTOOL_DLOPEN_SELF], [LT_SYS_DLOPEN_SELF]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], []) # _LT_COMPILER_C_O([TAGNAME]) # --------------------------- # Check to see if options -c and -o are simultaneously supported by compiler. # This macro does not hard code the compiler like AC_PROG_CC_C_O. m4_defun([_LT_COMPILER_C_O], [m4_require([_LT_DECL_SED])dnl m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_TAG_COMPILER])dnl AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext], [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)], [_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no $RM -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&AS_MESSAGE_LOG_FD) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&AS_MESSAGE_LOG_FD echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes fi fi chmod u+w . 2>&AS_MESSAGE_LOG_FD $RM conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files $RM out/* && rmdir out cd .. $RM -r conftest $RM conftest* ]) _LT_TAGDECL([compiler_c_o], [lt_cv_prog_compiler_c_o], [1], [Does compiler simultaneously support -c and -o options?]) ])# _LT_COMPILER_C_O # _LT_COMPILER_FILE_LOCKS([TAGNAME]) # ---------------------------------- # Check to see if we can do hard links to lock some files if needed m4_defun([_LT_COMPILER_FILE_LOCKS], [m4_require([_LT_ENABLE_LOCK])dnl m4_require([_LT_FILEUTILS_DEFAULTS])dnl _LT_COMPILER_C_O([$1]) hard_links="nottested" if test "$_LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)" = no && test "$need_locks" != no; then # do not overwrite the value of need_locks provided by the user AC_MSG_CHECKING([if we can lock with hard links]) hard_links=yes $RM conftest* ln conftest.a conftest.b 2>/dev/null && hard_links=no touch conftest.a ln conftest.a conftest.b 2>&5 || hard_links=no ln conftest.a conftest.b 2>/dev/null && hard_links=no AC_MSG_RESULT([$hard_links]) if test "$hard_links" = no; then AC_MSG_WARN([`$CC' does not support `-c -o', so `make -j' may be unsafe]) need_locks=warn fi else need_locks=no fi _LT_DECL([], [need_locks], [1], [Must we lock files when doing compilation?]) ])# _LT_COMPILER_FILE_LOCKS # _LT_CHECK_OBJDIR # ---------------- m4_defun([_LT_CHECK_OBJDIR], [AC_CACHE_CHECK([for objdir], [lt_cv_objdir], [rm -f .libs 2>/dev/null mkdir .libs 2>/dev/null if test -d .libs; then lt_cv_objdir=.libs else # MS-DOS does not allow filenames that begin with a dot. lt_cv_objdir=_libs fi rmdir .libs 2>/dev/null]) objdir=$lt_cv_objdir _LT_DECL([], [objdir], [0], [The name of the directory that contains temporary libtool files])dnl m4_pattern_allow([LT_OBJDIR])dnl AC_DEFINE_UNQUOTED(LT_OBJDIR, "$lt_cv_objdir/", [Define to the sub-directory in which libtool stores uninstalled libraries.]) ])# _LT_CHECK_OBJDIR # _LT_LINKER_HARDCODE_LIBPATH([TAGNAME]) # -------------------------------------- # Check hardcoding attributes. m4_defun([_LT_LINKER_HARDCODE_LIBPATH], [AC_MSG_CHECKING([how to hardcode library paths into programs]) _LT_TAGVAR(hardcode_action, $1)= if test -n "$_LT_TAGVAR(hardcode_libdir_flag_spec, $1)" || test -n "$_LT_TAGVAR(runpath_var, $1)" || test "X$_LT_TAGVAR(hardcode_automatic, $1)" = "Xyes" ; then # We can hardcode non-existent directories. if test "$_LT_TAGVAR(hardcode_direct, $1)" != no && # If the only mechanism to avoid hardcoding is shlibpath_var, we # have to relink, otherwise we might link with an installed library # when we should be linking with a yet-to-be-installed one ## test "$_LT_TAGVAR(hardcode_shlibpath_var, $1)" != no && test "$_LT_TAGVAR(hardcode_minus_L, $1)" != no; then # Linking always hardcodes the temporary library directory. _LT_TAGVAR(hardcode_action, $1)=relink else # We can link without hardcoding, and we can hardcode nonexisting dirs. _LT_TAGVAR(hardcode_action, $1)=immediate fi else # We cannot hardcode anything, or else we can only hardcode existing # directories. _LT_TAGVAR(hardcode_action, $1)=unsupported fi AC_MSG_RESULT([$_LT_TAGVAR(hardcode_action, $1)]) if test "$_LT_TAGVAR(hardcode_action, $1)" = relink || test "$_LT_TAGVAR(inherit_rpath, $1)" = yes; then # Fast installation is not supported enable_fast_install=no elif test "$shlibpath_overrides_runpath" = yes || test "$enable_shared" = no; then # Fast installation is not necessary enable_fast_install=needless fi _LT_TAGDECL([], [hardcode_action], [0], [How to hardcode a shared library path into an executable]) ])# _LT_LINKER_HARDCODE_LIBPATH # _LT_CMD_STRIPLIB # ---------------- m4_defun([_LT_CMD_STRIPLIB], [m4_require([_LT_DECL_EGREP]) striplib= old_striplib= AC_MSG_CHECKING([whether stripping libraries is possible]) if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" test -z "$striplib" && striplib="$STRIP --strip-unneeded" AC_MSG_RESULT([yes]) else # FIXME - insert some real tests, host_os isn't really good enough case $host_os in darwin*) if test -n "$STRIP" ; then striplib="$STRIP -x" old_striplib="$STRIP -S" AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) fi ;; *) AC_MSG_RESULT([no]) ;; esac fi _LT_DECL([], [old_striplib], [1], [Commands to strip libraries]) _LT_DECL([], [striplib], [1]) ])# _LT_CMD_STRIPLIB # _LT_SYS_DYNAMIC_LINKER([TAG]) # ----------------------------- # PORTME Fill in your ld.so characteristics m4_defun([_LT_SYS_DYNAMIC_LINKER], [AC_REQUIRE([AC_CANONICAL_HOST])dnl m4_require([_LT_DECL_EGREP])dnl m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_DECL_OBJDUMP])dnl m4_require([_LT_DECL_SED])dnl m4_require([_LT_CHECK_SHELL_FEATURES])dnl AC_MSG_CHECKING([dynamic linker characteristics]) m4_if([$1], [], [ if test "$GCC" = yes; then case $host_os in darwin*) lt_awk_arg="/^libraries:/,/LR/" ;; *) lt_awk_arg="/^libraries:/" ;; esac case $host_os in mingw* | cegcc*) lt_sed_strip_eq="s,=\([[A-Za-z]]:\),\1,g" ;; *) lt_sed_strip_eq="s,=/,/,g" ;; esac lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e $lt_sed_strip_eq` case $lt_search_path_spec in *\;*) # if the path contains ";" then we assume it to be the separator # otherwise default to the standard path separator (i.e. ":") - it is # assumed that no part of a normal pathname contains ";" but that should # okay in the real world where ";" in dirpaths is itself problematic. lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'` ;; *) lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED "s/$PATH_SEPARATOR/ /g"` ;; esac # Ok, now we have the path, separated by spaces, we can step through it # and add multilib dir if necessary. lt_tmp_lt_search_path_spec= lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` for lt_sys_path in $lt_search_path_spec; do if test -d "$lt_sys_path/$lt_multi_os_dir"; then lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" else test -d "$lt_sys_path" && \ lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" fi done lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk ' BEGIN {RS=" "; FS="/|\n";} { lt_foo=""; lt_count=0; for (lt_i = NF; lt_i > 0; lt_i--) { if ($lt_i != "" && $lt_i != ".") { if ($lt_i == "..") { lt_count++; } else { if (lt_count == 0) { lt_foo="/" $lt_i lt_foo; } else { lt_count--; } } } } if (lt_foo != "") { lt_freq[[lt_foo]]++; } if (lt_freq[[lt_foo]] == 1) { print lt_foo; } }'` # AWK program above erroneously prepends '/' to C:/dos/paths # for these hosts. case $host_os in mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\ $SED 's,/\([[A-Za-z]]:\),\1,g'` ;; esac sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP` else sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" fi]) library_names_spec= libname_spec='lib$name' soname_spec= shrext_cmds=".so" postinstall_cmds= postuninstall_cmds= finish_cmds= finish_eval= shlibpath_var= shlibpath_overrides_runpath=unknown version_type=none dynamic_linker="$host_os ld.so" sys_lib_dlsearch_path_spec="/lib /usr/lib" need_lib_prefix=unknown hardcode_into_libs=no # when you set need_version to no, make sure it does not cause -set_version # flags to be left without arguments need_version=unknown case $host_os in aix3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' shlibpath_var=LIBPATH # AIX 3 has no versioning support, so we append a major version to the name. soname_spec='${libname}${release}${shared_ext}$major' ;; aix[[4-9]]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no hardcode_into_libs=yes if test "$host_cpu" = ia64; then # AIX 5 supports IA64 library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH else # With GCC up to 2.95.x, collect2 would create an import file # for dependence libraries. The import file would start with # the line `#! .'. This would cause the generated library to # depend on `.', always an invalid library. This was fixed in # development snapshots of GCC prior to 3.0. case $host_os in aix4 | aix4.[[01]] | aix4.[[01]].*) if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' echo ' yes ' echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then : else can_build_shared=no fi ;; esac # AIX (on Power*) has no versioning support, so currently we can not hardcode correct # soname into executable. Probably we can add versioning support to # collect2, so additional links can be useful in future. if test "$aix_use_runtimelinking" = yes; then # If using run time linking (on AIX 4.2 or later) use lib.so # instead of lib.a to let people know that these are not # typical AIX shared libraries. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' else # We preserve .a as extension for shared libraries through AIX4.2 # and later when we are not doing run time linking. library_names_spec='${libname}${release}.a $libname.a' soname_spec='${libname}${release}${shared_ext}$major' fi shlibpath_var=LIBPATH fi ;; amigaos*) case $host_cpu in powerpc) # Since July 2007 AmigaOS4 officially supports .so libraries. # When compiling the executable, add -use-dynld -Lsobjs: to the compileline. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' ;; m68k) library_names_spec='$libname.ixlibrary $libname.a' # Create ${libname}_ixlibrary.a entries in /sys/libs. finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' ;; esac ;; beos*) library_names_spec='${libname}${shared_ext}' dynamic_linker="$host_os ld.so" shlibpath_var=LIBRARY_PATH ;; bsdi[[45]]*) version_type=linux # correct to gnu/linux during the next big refactor need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" # the default ld.so.conf also contains /usr/contrib/lib and # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow # libtool to hard-code these into programs ;; cygwin* | mingw* | pw32* | cegcc*) version_type=windows shrext_cmds=".dll" need_version=no need_lib_prefix=no case $GCC,$cc_basename in yes,*) # gcc library_names_spec='$libname.dll.a' # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \${file}`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname~ if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then eval '\''$striplib \$dldir/$dlname'\'' || exit \$?; fi' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' m4_if([$1], [],[ sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"]) ;; mingw* | cegcc*) # MinGW DLLs use traditional 'lib' prefix soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' ;; esac dynamic_linker='Win32 ld.exe' ;; *,cl*) # Native MSVC libname_spec='$name' soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' library_names_spec='${libname}.dll.lib' case $build_os in mingw*) sys_lib_search_path_spec= lt_save_ifs=$IFS IFS=';' for lt_path in $LIB do IFS=$lt_save_ifs # Let DOS variable expansion print the short 8.3 style file name. lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do @echo %~si"` sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path" done IFS=$lt_save_ifs # Convert to MSYS style. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'` ;; cygwin*) # Convert to unix form, then to dos form, then back to unix form # but this time dos style (no spaces!) so that the unix form looks # like /cygdrive/c/PROGRA~1:/cygdr... sys_lib_search_path_spec=`cygpath --path --unix "$LIB"` sys_lib_search_path_spec=`cygpath --path --dos "$sys_lib_search_path_spec" 2>/dev/null` sys_lib_search_path_spec=`cygpath --path --unix "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` ;; *) sys_lib_search_path_spec="$LIB" if $ECHO "$sys_lib_search_path_spec" | [$GREP ';[c-zC-Z]:/' >/dev/null]; then # It is most probably a Windows format PATH. sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` else sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi # FIXME: find the short name or the path components, as spaces are # common. (e.g. "Program Files" -> "PROGRA~1") ;; esac # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \${file}`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $RM \$dlpath' shlibpath_overrides_runpath=yes dynamic_linker='Win32 link.exe' ;; *) # Assume MSVC wrapper library_names_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext} $libname.lib' dynamic_linker='Win32 ld.exe' ;; esac # FIXME: first we should search . and the directory the executable is in shlibpath_var=PATH ;; darwin* | rhapsody*) dynamic_linker="$host_os dyld" version_type=darwin need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${major}$shared_ext ${libname}$shared_ext' soname_spec='${libname}${release}${major}$shared_ext' shlibpath_overrides_runpath=yes shlibpath_var=DYLD_LIBRARY_PATH shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' m4_if([$1], [],[ sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"]) sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' ;; dgux*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; freebsd* | dragonfly*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then objformat=`/usr/bin/objformat` else case $host_os in freebsd[[23]].*) objformat=aout ;; *) objformat=elf ;; esac fi version_type=freebsd-$objformat case $version_type in freebsd-elf*) library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' need_version=no need_lib_prefix=no ;; freebsd-*) library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' need_version=yes ;; esac shlibpath_var=LD_LIBRARY_PATH case $host_os in freebsd2.*) shlibpath_overrides_runpath=yes ;; freebsd3.[[01]]* | freebsdelf3.[[01]]*) shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \ freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1) shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; *) # from 4.6 on, and DragonFly shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; esac ;; gnu*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; haiku*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no dynamic_linker="$host_os runtime_loader" library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LIBRARY_PATH shlibpath_overrides_runpath=yes sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib /boot/system/lib' hardcode_into_libs=yes ;; hpux9* | hpux10* | hpux11*) # Give a soname corresponding to the major version so that dld.sl refuses to # link against other versions. version_type=sunos need_lib_prefix=no need_version=no case $host_cpu in ia64*) shrext_cmds='.so' hardcode_into_libs=yes dynamic_linker="$host_os dld.so" shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' if test "X$HPUX_IA64_MODE" = X32; then sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" else sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" fi sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; hppa*64*) shrext_cmds='.sl' hardcode_into_libs=yes dynamic_linker="$host_os dld.sl" shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; *) shrext_cmds='.sl' dynamic_linker="$host_os dld.sl" shlibpath_var=SHLIB_PATH shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' ;; esac # HP-UX runs *really* slowly unless shared libraries are mode 555, ... postinstall_cmds='chmod 555 $lib' # or fails outright, so override atomically: install_override_mode=555 ;; interix[[3-9]]*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; irix5* | irix6* | nonstopux*) case $host_os in nonstopux*) version_type=nonstopux ;; *) if test "$lt_cv_prog_gnu_ld" = yes; then version_type=linux # correct to gnu/linux during the next big refactor else version_type=irix fi ;; esac need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' case $host_os in irix5* | nonstopux*) libsuff= shlibsuff= ;; *) case $LD in # libtool.m4 will add one of these switches to LD *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= libmagic=32-bit;; *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 libmagic=64-bit;; *) libsuff= shlibsuff= libmagic=never-match;; esac ;; esac shlibpath_var=LD_LIBRARY${shlibsuff}_PATH shlibpath_overrides_runpath=no sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" hardcode_into_libs=yes ;; # No shared lib support for Linux oldld, aout, or coff. linux*oldld* | linux*aout* | linux*coff*) dynamic_linker=no ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no # Some binutils ld are patched to set DT_RUNPATH AC_CACHE_VAL([lt_cv_shlibpath_overrides_runpath], [lt_cv_shlibpath_overrides_runpath=no save_LDFLAGS=$LDFLAGS save_libdir=$libdir eval "libdir=/foo; wl=\"$_LT_TAGVAR(lt_prog_compiler_wl, $1)\"; \ LDFLAGS=\"\$LDFLAGS $_LT_TAGVAR(hardcode_libdir_flag_spec, $1)\"" AC_LINK_IFELSE([AC_LANG_PROGRAM([],[])], [AS_IF([ ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" >/dev/null], [lt_cv_shlibpath_overrides_runpath=yes])]) LDFLAGS=$save_LDFLAGS libdir=$save_libdir ]) shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes # Append ld.so.conf contents to the search path if test -f /etc/ld.so.conf; then lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '` sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra" fi # We used to test for /lib/ld.so.1 and disable shared libraries on # powerpc, because MkLinux only supported shared libraries with the # GNU dynamic linker. Since this was broken with cross compilers, # most powerpc-linux boxes support dynamic linking these days and # people can always --disable-shared, the test was removed, and we # assume the GNU/Linux dynamic linker is in use. dynamic_linker='GNU/Linux ld.so' ;; netbsdelf*-gnu) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='NetBSD ld.elf_so' ;; netbsd*) version_type=sunos need_lib_prefix=no need_version=no if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' dynamic_linker='NetBSD (a.out) ld.so' else library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='NetBSD ld.elf_so' fi shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; newsos6) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; *nto* | *qnx*) version_type=qnx need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes dynamic_linker='ldqnx.so' ;; openbsd*) version_type=sunos sys_lib_dlsearch_path_spec="/usr/lib" need_lib_prefix=no # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. case $host_os in openbsd3.3 | openbsd3.3.*) need_version=yes ;; *) need_version=no ;; esac library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' shlibpath_var=LD_LIBRARY_PATH if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then case $host_os in openbsd2.[[89]] | openbsd2.[[89]].*) shlibpath_overrides_runpath=no ;; *) shlibpath_overrides_runpath=yes ;; esac else shlibpath_overrides_runpath=yes fi ;; os2*) libname_spec='$name' shrext_cmds=".dll" need_lib_prefix=no library_names_spec='$libname${shared_ext} $libname.a' dynamic_linker='OS/2 ld.exe' shlibpath_var=LIBPATH ;; osf3* | osf4* | osf5*) version_type=osf need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" ;; rdos*) dynamic_linker=no ;; solaris*) version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes # ldd complains unless libraries are executable postinstall_cmds='chmod +x $lib' ;; sunos4*) version_type=sunos library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes if test "$with_gnu_ld" = yes; then need_lib_prefix=no fi need_version=yes ;; sysv4 | sysv4.3*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH case $host_vendor in sni) shlibpath_overrides_runpath=no need_lib_prefix=no runpath_var=LD_RUN_PATH ;; siemens) need_lib_prefix=no ;; motorola) need_lib_prefix=no need_version=no shlibpath_overrides_runpath=no sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' ;; esac ;; sysv4*MP*) if test -d /usr/nec ;then version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' soname_spec='$libname${shared_ext}.$major' shlibpath_var=LD_LIBRARY_PATH fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) version_type=freebsd-elf need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes if test "$with_gnu_ld" = yes; then sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' else sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' case $host_os in sco3.2v5*) sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" ;; esac fi sys_lib_dlsearch_path_spec='/usr/lib' ;; tpf*) # TPF is a cross-target only. Preferred cross-host = GNU/Linux. version_type=linux # correct to gnu/linux during the next big refactor need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; uts4*) version_type=linux # correct to gnu/linux during the next big refactor library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; *) dynamic_linker=no ;; esac AC_MSG_RESULT([$dynamic_linker]) test "$dynamic_linker" = no && can_build_shared=no variables_saved_for_relink="PATH $shlibpath_var $runpath_var" if test "$GCC" = yes; then variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" fi if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec" fi if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec" fi _LT_DECL([], [variables_saved_for_relink], [1], [Variables whose values should be saved in libtool wrapper scripts and restored at link time]) _LT_DECL([], [need_lib_prefix], [0], [Do we need the "lib" prefix for modules?]) _LT_DECL([], [need_version], [0], [Do we need a version for libraries?]) _LT_DECL([], [version_type], [0], [Library versioning type]) _LT_DECL([], [runpath_var], [0], [Shared library runtime path variable]) _LT_DECL([], [shlibpath_var], [0],[Shared library path variable]) _LT_DECL([], [shlibpath_overrides_runpath], [0], [Is shlibpath searched before the hard-coded library search path?]) _LT_DECL([], [libname_spec], [1], [Format of library name prefix]) _LT_DECL([], [library_names_spec], [1], [[List of archive names. First name is the real one, the rest are links. The last name is the one that the linker finds with -lNAME]]) _LT_DECL([], [soname_spec], [1], [[The coded name of the library, if different from the real name]]) _LT_DECL([], [install_override_mode], [1], [Permission mode override for installation of shared libraries]) _LT_DECL([], [postinstall_cmds], [2], [Command to use after installation of a shared archive]) _LT_DECL([], [postuninstall_cmds], [2], [Command to use after uninstallation of a shared archive]) _LT_DECL([], [finish_cmds], [2], [Commands used to finish a libtool library installation in a directory]) _LT_DECL([], [finish_eval], [1], [[As "finish_cmds", except a single script fragment to be evaled but not shown]]) _LT_DECL([], [hardcode_into_libs], [0], [Whether we should hardcode library paths into libraries]) _LT_DECL([], [sys_lib_search_path_spec], [2], [Compile-time system search path for libraries]) _LT_DECL([], [sys_lib_dlsearch_path_spec], [2], [Run-time system search path for libraries]) ])# _LT_SYS_DYNAMIC_LINKER # _LT_PATH_TOOL_PREFIX(TOOL) # -------------------------- # find a file program which can recognize shared library AC_DEFUN([_LT_PATH_TOOL_PREFIX], [m4_require([_LT_DECL_EGREP])dnl AC_MSG_CHECKING([for $1]) AC_CACHE_VAL(lt_cv_path_MAGIC_CMD, [case $MAGIC_CMD in [[\\/*] | ?:[\\/]*]) lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. ;; *) lt_save_MAGIC_CMD="$MAGIC_CMD" lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR dnl $ac_dummy forces splitting on constant user-supplied paths. dnl POSIX.2 word splitting is done only on the output of word expansions, dnl not every word. This closes a longstanding sh security hole. ac_dummy="m4_if([$2], , $PATH, [$2])" for ac_dir in $ac_dummy; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$1; then lt_cv_path_MAGIC_CMD="$ac_dir/$1" if test -n "$file_magic_test_file"; then case $deplibs_check_method in "file_magic "*) file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | $EGREP "$file_magic_regex" > /dev/null; then : else cat <<_LT_EOF 1>&2 *** Warning: the command libtool uses to detect shared libraries, *** $file_magic_cmd, produces output that libtool cannot recognize. *** The result is that libtool may fail to recognize shared libraries *** as such. This will affect the creation of libtool libraries that *** depend on shared libraries, but programs linked with such libtool *** libraries will work regardless of this problem. Nevertheless, you *** may want to report the problem to your system manager and/or to *** bug-libtool@gnu.org _LT_EOF fi ;; esac fi break fi done IFS="$lt_save_ifs" MAGIC_CMD="$lt_save_MAGIC_CMD" ;; esac]) MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if test -n "$MAGIC_CMD"; then AC_MSG_RESULT($MAGIC_CMD) else AC_MSG_RESULT(no) fi _LT_DECL([], [MAGIC_CMD], [0], [Used to examine libraries when file_magic_cmd begins with "file"])dnl ])# _LT_PATH_TOOL_PREFIX # Old name: AU_ALIAS([AC_PATH_TOOL_PREFIX], [_LT_PATH_TOOL_PREFIX]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_PATH_TOOL_PREFIX], []) # _LT_PATH_MAGIC # -------------- # find a file program which can recognize a shared library m4_defun([_LT_PATH_MAGIC], [_LT_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH) if test -z "$lt_cv_path_MAGIC_CMD"; then if test -n "$ac_tool_prefix"; then _LT_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH) else MAGIC_CMD=: fi fi ])# _LT_PATH_MAGIC # LT_PATH_LD # ---------- # find the pathname to the GNU or non-GNU linker AC_DEFUN([LT_PATH_LD], [AC_REQUIRE([AC_PROG_CC])dnl AC_REQUIRE([AC_CANONICAL_HOST])dnl AC_REQUIRE([AC_CANONICAL_BUILD])dnl m4_require([_LT_DECL_SED])dnl m4_require([_LT_DECL_EGREP])dnl m4_require([_LT_PROG_ECHO_BACKSLASH])dnl AC_ARG_WITH([gnu-ld], [AS_HELP_STRING([--with-gnu-ld], [assume the C compiler uses GNU ld @<:@default=no@:>@])], [test "$withval" = no || with_gnu_ld=yes], [with_gnu_ld=no])dnl ac_prog=ld if test "$GCC" = yes; then # Check if gcc -print-prog-name=ld gives a path. AC_MSG_CHECKING([for ld used by $CC]) case $host in *-*-mingw*) # gcc leaves a trailing carriage return which upsets mingw ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; *) ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; esac case $ac_prog in # Accept absolute paths. [[\\/]]* | ?:[[\\/]]*) re_direlt='/[[^/]][[^/]]*/\.\./' # Canonicalize the pathname of ld ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'` while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"` done test -z "$LD" && LD="$ac_prog" ;; "") # If it fails, then pretend we aren't using GCC. ac_prog=ld ;; *) # If it is relative, then search for the first ld in PATH. with_gnu_ld=unknown ;; esac elif test "$with_gnu_ld" = yes; then AC_MSG_CHECKING([for GNU ld]) else AC_MSG_CHECKING([for non-GNU ld]) fi AC_CACHE_VAL(lt_cv_path_LD, [if test -z "$LD"; then lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then lt_cv_path_LD="$ac_dir/$ac_prog" # Check to see if the program is GNU ld. I'd rather use --version, # but apparently some variants of GNU ld only accept -v. # Break only if it was the GNU/non-GNU ld that we prefer. case `"$lt_cv_path_LD" -v 2>&1 &1 /dev/null 2>&1; then lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' lt_cv_file_magic_cmd='func_win32_libid' else # Keep this pattern in sync with the one in func_win32_libid. lt_cv_deplibs_check_method='file_magic file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' lt_cv_file_magic_cmd='$OBJDUMP -f' fi ;; cegcc*) # use the weaker test based on 'objdump'. See mingw*. lt_cv_deplibs_check_method='file_magic file format pe-arm-.*little(.*architecture: arm)?' lt_cv_file_magic_cmd='$OBJDUMP -f' ;; darwin* | rhapsody*) lt_cv_deplibs_check_method=pass_all ;; freebsd* | dragonfly*) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then case $host_cpu in i*86 ) # Not sure whether the presence of OpenBSD here was a mistake. # Let's accept both of them until this is cleared up. lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library' lt_cv_file_magic_cmd=/usr/bin/file lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` ;; esac else lt_cv_deplibs_check_method=pass_all fi ;; gnu*) lt_cv_deplibs_check_method=pass_all ;; haiku*) lt_cv_deplibs_check_method=pass_all ;; hpux10.20* | hpux11*) lt_cv_file_magic_cmd=/usr/bin/file case $host_cpu in ia64*) lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64' lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so ;; hppa*64*) [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ -][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'] lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl ;; *) lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]]\.[[0-9]]) shared library' lt_cv_file_magic_test_file=/usr/lib/libc.sl ;; esac ;; interix[[3-9]]*) # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$' ;; irix5* | irix6* | nonstopux*) case $LD in *-32|*"-32 ") libmagic=32-bit;; *-n32|*"-n32 ") libmagic=N32;; *-64|*"-64 ") libmagic=64-bit;; *) libmagic=never-match;; esac lt_cv_deplibs_check_method=pass_all ;; # This must be glibc/ELF. linux* | k*bsd*-gnu | kopensolaris*-gnu) lt_cv_deplibs_check_method=pass_all ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$' fi ;; newos6*) lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' lt_cv_file_magic_cmd=/usr/bin/file lt_cv_file_magic_test_file=/usr/lib/libnls.so ;; *nto* | *qnx*) lt_cv_deplibs_check_method=pass_all ;; openbsd*) if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' fi ;; osf3* | osf4* | osf5*) lt_cv_deplibs_check_method=pass_all ;; rdos*) lt_cv_deplibs_check_method=pass_all ;; solaris*) lt_cv_deplibs_check_method=pass_all ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) lt_cv_deplibs_check_method=pass_all ;; sysv4 | sysv4.3*) case $host_vendor in motorola) lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]' lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` ;; ncr) lt_cv_deplibs_check_method=pass_all ;; sequent) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' ;; sni) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib" lt_cv_file_magic_test_file=/lib/libc.so ;; siemens) lt_cv_deplibs_check_method=pass_all ;; pc) lt_cv_deplibs_check_method=pass_all ;; esac ;; tpf*) lt_cv_deplibs_check_method=pass_all ;; esac ]) file_magic_glob= want_nocaseglob=no if test "$build" = "$host"; then case $host_os in mingw* | pw32*) if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then want_nocaseglob=yes else file_magic_glob=`echo aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e "s/\(..\)/s\/[[\1]]\/[[\1]]\/g;/g"` fi ;; esac fi file_magic_cmd=$lt_cv_file_magic_cmd deplibs_check_method=$lt_cv_deplibs_check_method test -z "$deplibs_check_method" && deplibs_check_method=unknown _LT_DECL([], [deplibs_check_method], [1], [Method to check whether dependent libraries are shared objects]) _LT_DECL([], [file_magic_cmd], [1], [Command to use when deplibs_check_method = "file_magic"]) _LT_DECL([], [file_magic_glob], [1], [How to find potential files when deplibs_check_method = "file_magic"]) _LT_DECL([], [want_nocaseglob], [1], [Find potential files using nocaseglob when deplibs_check_method = "file_magic"]) ])# _LT_CHECK_MAGIC_METHOD # LT_PATH_NM # ---------- # find the pathname to a BSD- or MS-compatible name lister AC_DEFUN([LT_PATH_NM], [AC_REQUIRE([AC_PROG_CC])dnl AC_CACHE_CHECK([for BSD- or MS-compatible name lister (nm)], lt_cv_path_NM, [if test -n "$NM"; then # Let the user override the test. lt_cv_path_NM="$NM" else lt_nm_to_check="${ac_tool_prefix}nm" if test -n "$ac_tool_prefix" && test "$build" = "$host"; then lt_nm_to_check="$lt_nm_to_check nm" fi for lt_tmp_nm in $lt_nm_to_check; do lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. tmp_nm="$ac_dir/$lt_tmp_nm" if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then # Check to see if the nm accepts a BSD-compat flag. # Adding the `sed 1q' prevents false positives on HP-UX, which says: # nm: unknown option "B" ignored # Tru64's nm complains that /dev/null is an invalid object file case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in */dev/null* | *'Invalid file or object type'*) lt_cv_path_NM="$tmp_nm -B" break ;; *) case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in */dev/null*) lt_cv_path_NM="$tmp_nm -p" break ;; *) lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but continue # so that we can try to find one that supports BSD flags ;; esac ;; esac fi done IFS="$lt_save_ifs" done : ${lt_cv_path_NM=no} fi]) if test "$lt_cv_path_NM" != "no"; then NM="$lt_cv_path_NM" else # Didn't find any BSD compatible name lister, look for dumpbin. if test -n "$DUMPBIN"; then : # Let the user override the test. else AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :) case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in *COFF*) DUMPBIN="$DUMPBIN -symbols" ;; *) DUMPBIN=: ;; esac fi AC_SUBST([DUMPBIN]) if test "$DUMPBIN" != ":"; then NM="$DUMPBIN" fi fi test -z "$NM" && NM=nm AC_SUBST([NM]) _LT_DECL([], [NM], [1], [A BSD- or MS-compatible name lister])dnl AC_CACHE_CHECK([the name lister ($NM) interface], [lt_cv_nm_interface], [lt_cv_nm_interface="BSD nm" echo "int some_variable = 0;" > conftest.$ac_ext (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&AS_MESSAGE_LOG_FD) (eval "$ac_compile" 2>conftest.err) cat conftest.err >&AS_MESSAGE_LOG_FD (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&AS_MESSAGE_LOG_FD) (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) cat conftest.err >&AS_MESSAGE_LOG_FD (eval echo "\"\$as_me:$LINENO: output\"" >&AS_MESSAGE_LOG_FD) cat conftest.out >&AS_MESSAGE_LOG_FD if $GREP 'External.*some_variable' conftest.out > /dev/null; then lt_cv_nm_interface="MS dumpbin" fi rm -f conftest*]) ])# LT_PATH_NM # Old names: AU_ALIAS([AM_PROG_NM], [LT_PATH_NM]) AU_ALIAS([AC_PROG_NM], [LT_PATH_NM]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AM_PROG_NM], []) dnl AC_DEFUN([AC_PROG_NM], []) # _LT_CHECK_SHAREDLIB_FROM_LINKLIB # -------------------------------- # how to determine the name of the shared library # associated with a specific link library. # -- PORTME fill in with the dynamic library characteristics m4_defun([_LT_CHECK_SHAREDLIB_FROM_LINKLIB], [m4_require([_LT_DECL_EGREP]) m4_require([_LT_DECL_OBJDUMP]) m4_require([_LT_DECL_DLLTOOL]) AC_CACHE_CHECK([how to associate runtime and link libraries], lt_cv_sharedlib_from_linklib_cmd, [lt_cv_sharedlib_from_linklib_cmd='unknown' case $host_os in cygwin* | mingw* | pw32* | cegcc*) # two different shell functions defined in ltmain.sh # decide which to use based on capabilities of $DLLTOOL case `$DLLTOOL --help 2>&1` in *--identify-strict*) lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib ;; *) lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback ;; esac ;; *) # fallback: assume linklib IS sharedlib lt_cv_sharedlib_from_linklib_cmd="$ECHO" ;; esac ]) sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO _LT_DECL([], [sharedlib_from_linklib_cmd], [1], [Command to associate shared and link libraries]) ])# _LT_CHECK_SHAREDLIB_FROM_LINKLIB # _LT_PATH_MANIFEST_TOOL # ---------------------- # locate the manifest tool m4_defun([_LT_PATH_MANIFEST_TOOL], [AC_CHECK_TOOL(MANIFEST_TOOL, mt, :) test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt AC_CACHE_CHECK([if $MANIFEST_TOOL is a manifest tool], [lt_cv_path_mainfest_tool], [lt_cv_path_mainfest_tool=no echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&AS_MESSAGE_LOG_FD $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out cat conftest.err >&AS_MESSAGE_LOG_FD if $GREP 'Manifest Tool' conftest.out > /dev/null; then lt_cv_path_mainfest_tool=yes fi rm -f conftest*]) if test "x$lt_cv_path_mainfest_tool" != xyes; then MANIFEST_TOOL=: fi _LT_DECL([], [MANIFEST_TOOL], [1], [Manifest tool])dnl ])# _LT_PATH_MANIFEST_TOOL # LT_LIB_M # -------- # check for math library AC_DEFUN([LT_LIB_M], [AC_REQUIRE([AC_CANONICAL_HOST])dnl LIBM= case $host in *-*-beos* | *-*-cegcc* | *-*-cygwin* | *-*-haiku* | *-*-pw32* | *-*-darwin*) # These system don't have libm, or don't need it ;; *-ncr-sysv4.3*) AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw") AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm") ;; *) AC_CHECK_LIB(m, cos, LIBM="-lm") ;; esac AC_SUBST([LIBM]) ])# LT_LIB_M # Old name: AU_ALIAS([AC_CHECK_LIBM], [LT_LIB_M]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_CHECK_LIBM], []) # _LT_COMPILER_NO_RTTI([TAGNAME]) # ------------------------------- m4_defun([_LT_COMPILER_NO_RTTI], [m4_require([_LT_TAG_COMPILER])dnl _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= if test "$GCC" = yes; then case $cc_basename in nvcc*) _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -Xcompiler -fno-builtin' ;; *) _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' ;; esac _LT_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions], lt_cv_prog_compiler_rtti_exceptions, [-fno-rtti -fno-exceptions], [], [_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"]) fi _LT_TAGDECL([no_builtin_flag], [lt_prog_compiler_no_builtin_flag], [1], [Compiler flag to turn off builtin functions]) ])# _LT_COMPILER_NO_RTTI # _LT_CMD_GLOBAL_SYMBOLS # ---------------------- m4_defun([_LT_CMD_GLOBAL_SYMBOLS], [AC_REQUIRE([AC_CANONICAL_HOST])dnl AC_REQUIRE([AC_PROG_CC])dnl AC_REQUIRE([AC_PROG_AWK])dnl AC_REQUIRE([LT_PATH_NM])dnl AC_REQUIRE([LT_PATH_LD])dnl m4_require([_LT_DECL_SED])dnl m4_require([_LT_DECL_EGREP])dnl m4_require([_LT_TAG_COMPILER])dnl # Check for command to grab the raw symbol name followed by C symbol from nm. AC_MSG_CHECKING([command to parse $NM output from $compiler object]) AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], [ # These are sane defaults that work on at least a few old systems. # [They come from Ultrix. What could be older than Ultrix?!! ;)] # Character class describing NM global symbol codes. symcode='[[BCDEGRST]]' # Regexp to match symbols that can be accessed directly from C. sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)' # Define system-specific variables. case $host_os in aix*) symcode='[[BCDT]]' ;; cygwin* | mingw* | pw32* | cegcc*) symcode='[[ABCDGISTW]]' ;; hpux*) if test "$host_cpu" = ia64; then symcode='[[ABCDEGRST]]' fi ;; irix* | nonstopux*) symcode='[[BCDEGRST]]' ;; osf*) symcode='[[BCDEGQRST]]' ;; solaris*) symcode='[[BDRT]]' ;; sco3.2v5*) symcode='[[DT]]' ;; sysv4.2uw2*) symcode='[[DT]]' ;; sysv5* | sco5v6* | unixware* | OpenUNIX*) symcode='[[ABDT]]' ;; sysv4) symcode='[[DFNSTU]]' ;; esac # If we're using GNU nm, then use its standard symbol codes. case `$NM -V 2>&1` in *GNU* | *'with BFD'*) symcode='[[ABCDGIRSTW]]' ;; esac # Transform an extracted symbol line into a proper C declaration. # Some systems (esp. on ia64) link data and code symbols differently, # so use this general approach. lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" # Transform an extracted symbol line into symbol name and symbol address lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (void *) \&\2},/p'" lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([[^ ]]*\)[[ ]]*$/ {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \(lib[[^ ]]*\)$/ {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"lib\2\", (void *) \&\2},/p'" # Handle CRLF in mingw tool chain opt_cr= case $build_os in mingw*) opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp ;; esac # Try without a prefix underscore, then with it. for ac_symprfx in "" "_"; do # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. symxfrm="\\1 $ac_symprfx\\2 \\2" # Write the raw and C identifiers. if test "$lt_cv_nm_interface" = "MS dumpbin"; then # Fake it for dumpbin and say T for any non-static function # and D for any global variable. # Also find C++ and __fastcall symbols from MSVC++, # which start with @ or ?. lt_cv_sys_global_symbol_pipe="$AWK ['"\ " {last_section=section; section=\$ 3};"\ " /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\ " /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\ " \$ 0!~/External *\|/{next};"\ " / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\ " {if(hide[section]) next};"\ " {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\ " {split(\$ 0, a, /\||\r/); split(a[2], s)};"\ " s[1]~/^[@?]/{print s[1], s[1]; next};"\ " s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\ " ' prfx=^$ac_symprfx]" else lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" fi lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'" # Check to see that the pipe works correctly. pipe_works=no rm -f conftest* cat > conftest.$ac_ext <<_LT_EOF #ifdef __cplusplus extern "C" { #endif char nm_test_var; void nm_test_func(void); void nm_test_func(void){} #ifdef __cplusplus } #endif int main(){nm_test_var='a';nm_test_func();return(0);} _LT_EOF if AC_TRY_EVAL(ac_compile); then # Now try to grab the symbols. nlist=conftest.nm if AC_TRY_EVAL(NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) && test -s "$nlist"; then # Try sorting and uniquifying the output. if sort "$nlist" | uniq > "$nlist"T; then mv -f "$nlist"T "$nlist" else rm -f "$nlist"T fi # Make sure that we snagged all the symbols we need. if $GREP ' nm_test_var$' "$nlist" >/dev/null; then if $GREP ' nm_test_func$' "$nlist" >/dev/null; then cat <<_LT_EOF > conftest.$ac_ext /* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ #if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) /* DATA imports from DLLs on WIN32 con't be const, because runtime relocations are performed -- see ld's documentation on pseudo-relocs. */ # define LT@&t@_DLSYM_CONST #elif defined(__osf__) /* This system does not cope well with relocations in const data. */ # define LT@&t@_DLSYM_CONST #else # define LT@&t@_DLSYM_CONST const #endif #ifdef __cplusplus extern "C" { #endif _LT_EOF # Now generate the symbol file. eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main >> conftest.$ac_ext' cat <<_LT_EOF >> conftest.$ac_ext /* The mapping between symbol names and symbols. */ LT@&t@_DLSYM_CONST struct { const char *name; void *address; } lt__PROGRAM__LTX_preloaded_symbols[[]] = { { "@PROGRAM@", (void *) 0 }, _LT_EOF $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (void *) \&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext cat <<\_LT_EOF >> conftest.$ac_ext {0, (void *) 0} }; /* This works around a problem in FreeBSD linker */ #ifdef FREEBSD_WORKAROUND static const void *lt_preloaded_setup() { return lt__PROGRAM__LTX_preloaded_symbols; } #endif #ifdef __cplusplus } #endif _LT_EOF # Now try linking the two files. mv conftest.$ac_objext conftstm.$ac_objext lt_globsym_save_LIBS=$LIBS lt_globsym_save_CFLAGS=$CFLAGS LIBS="conftstm.$ac_objext" CFLAGS="$CFLAGS$_LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)" if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext}; then pipe_works=yes fi LIBS=$lt_globsym_save_LIBS CFLAGS=$lt_globsym_save_CFLAGS else echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD fi else echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD fi else echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD fi else echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD cat conftest.$ac_ext >&5 fi rm -rf conftest* conftst* # Do not use the global_symbol_pipe unless it works. if test "$pipe_works" = yes; then break else lt_cv_sys_global_symbol_pipe= fi done ]) if test -z "$lt_cv_sys_global_symbol_pipe"; then lt_cv_sys_global_symbol_to_cdecl= fi if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then AC_MSG_RESULT(failed) else AC_MSG_RESULT(ok) fi # Response file support. if test "$lt_cv_nm_interface" = "MS dumpbin"; then nm_file_list_spec='@' elif $NM --help 2>/dev/null | grep '[[@]]FILE' >/dev/null; then nm_file_list_spec='@' fi _LT_DECL([global_symbol_pipe], [lt_cv_sys_global_symbol_pipe], [1], [Take the output of nm and produce a listing of raw symbols and C names]) _LT_DECL([global_symbol_to_cdecl], [lt_cv_sys_global_symbol_to_cdecl], [1], [Transform the output of nm in a proper C declaration]) _LT_DECL([global_symbol_to_c_name_address], [lt_cv_sys_global_symbol_to_c_name_address], [1], [Transform the output of nm in a C name address pair]) _LT_DECL([global_symbol_to_c_name_address_lib_prefix], [lt_cv_sys_global_symbol_to_c_name_address_lib_prefix], [1], [Transform the output of nm in a C name address pair when lib prefix is needed]) _LT_DECL([], [nm_file_list_spec], [1], [Specify filename containing input files for $NM]) ]) # _LT_CMD_GLOBAL_SYMBOLS # _LT_COMPILER_PIC([TAGNAME]) # --------------------------- m4_defun([_LT_COMPILER_PIC], [m4_require([_LT_TAG_COMPILER])dnl _LT_TAGVAR(lt_prog_compiler_wl, $1)= _LT_TAGVAR(lt_prog_compiler_pic, $1)= _LT_TAGVAR(lt_prog_compiler_static, $1)= m4_if([$1], [CXX], [ # C++ specific cases for pic, static, wl, etc. if test "$GXX" = yes; then _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' case $host_os in aix*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; m68k) # FIXME: we need at least 68020 code to build shared libraries, but # adding the `-m68020' flag to GCC prevents building anything better, # like `-m68040'. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' ;; esac ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | os2* | pw32* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries m4_if([$1], [GCJ], [], [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' ;; *djgpp*) # DJGPP does not support shared libraries at all _LT_TAGVAR(lt_prog_compiler_pic, $1)= ;; haiku*) # PIC is the default for Haiku. # The "-static" flag exists, but is broken. _LT_TAGVAR(lt_prog_compiler_static, $1)= ;; interix[[3-9]]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; sysv4*MP*) if test -d /usr/nec; then _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic fi ;; hpux*) # PIC is the default for 64-bit PA HP-UX, but not for 32-bit # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag # sets the default TLS model and affects inlining. case $host_cpu in hppa*64*) ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac ;; *qnx* | *nto*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac else case $host_os in aix[[4-9]]*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' else _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' fi ;; chorus*) case $cc_basename in cxch68*) # Green Hills C++ Compiler # _LT_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" ;; esac ;; mingw* | cygwin* | os2* | pw32* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). m4_if([$1], [GCJ], [], [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) ;; dgux*) case $cc_basename in ec++*) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' ;; ghcx*) # Green Hills C++ Compiler _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' ;; *) ;; esac ;; freebsd* | dragonfly*) # FreeBSD uses GNU C++ ;; hpux9* | hpux10* | hpux11*) case $cc_basename in CC*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' if test "$host_cpu" != ia64; then _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' fi ;; aCC*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' ;; esac ;; *) ;; esac ;; interix*) # This is c89, which is MS Visual C++ (no shared libs) # Anyone wants to do a port? ;; irix5* | irix6* | nonstopux*) case $cc_basename in CC*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' # CC pic flag -KPIC is the default. ;; *) ;; esac ;; linux* | k*bsd*-gnu | kopensolaris*-gnu) case $cc_basename in KCC*) # KAI C++ Compiler _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; ecpc* ) # old Intel C++ for x86_64 which still supported -KPIC. _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; icpc* ) # Intel C++, used to be incompatible with GCC. # ICC 10 doesn't accept -KPIC any more. _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; pgCC* | pgcpp*) # Portland Group C++ compiler _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; cxx*) # Compaq C++ # Make sure the PIC flag is empty. It appears that all Alpha # Linux and Compaq Tru64 Unix objects are PIC. _LT_TAGVAR(lt_prog_compiler_pic, $1)= _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; xlc* | xlC* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL 8.0, 9.0 on PPC and BlueGene _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C++ 5.9 _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' ;; esac ;; esac ;; lynxos*) ;; m88k*) ;; mvs*) case $cc_basename in cxx*) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall' ;; *) ;; esac ;; netbsd* | netbsdelf*-gnu) ;; *qnx* | *nto*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' ;; osf3* | osf4* | osf5*) case $cc_basename in KCC*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' ;; RCC*) # Rational C++ 2.4.1 _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' ;; cxx*) # Digital/Compaq C++ _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # Make sure the PIC flag is empty. It appears that all Alpha # Linux and Compaq Tru64 Unix objects are PIC. _LT_TAGVAR(lt_prog_compiler_pic, $1)= _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; *) ;; esac ;; psos*) ;; solaris*) case $cc_basename in CC* | sunCC*) # Sun C++ 4.2, 5.x and Centerline C++ _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' ;; gcx*) # Green Hills C++ Compiler _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' ;; *) ;; esac ;; sunos4*) case $cc_basename in CC*) # Sun C++ 4.x _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; lcc*) # Lucid _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' ;; *) ;; esac ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) case $cc_basename in CC*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; esac ;; tandem*) case $cc_basename in NCC*) # NonStop-UX NCC 3.20 _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' ;; *) ;; esac ;; vxworks*) ;; *) _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no ;; esac fi ], [ if test "$GCC" = yes; then _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' case $host_os in aix*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; m68k) # FIXME: we need at least 68020 code to build shared libraries, but # adding the `-m68020' flag to GCC prevents building anything better, # like `-m68040'. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' ;; esac ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries m4_if([$1], [GCJ], [], [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' ;; haiku*) # PIC is the default for Haiku. # The "-static" flag exists, but is broken. _LT_TAGVAR(lt_prog_compiler_static, $1)= ;; hpux*) # PIC is the default for 64-bit PA HP-UX, but not for 32-bit # PA HP-UX. On IA64 HP-UX, PIC is the default but the pic flag # sets the default TLS model and affects inlining. case $host_cpu in hppa*64*) # +Z the default ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac ;; interix[[3-9]]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; msdosdjgpp*) # Just because we use GCC doesn't mean we suddenly get shared libraries # on systems that don't support them. _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no enable_shared=no ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' ;; sysv4*MP*) if test -d /usr/nec; then _LT_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic fi ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac case $cc_basename in nvcc*) # Cuda Compiler Driver 2.2 _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Xlinker ' if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then _LT_TAGVAR(lt_prog_compiler_pic, $1)="-Xcompiler $_LT_TAGVAR(lt_prog_compiler_pic, $1)" fi ;; esac else # PORTME Check for flag to pass linker flags through the system compiler. case $host_os in aix*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' else _LT_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' fi ;; mingw* | cygwin* | pw32* | os2* | cegcc*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). m4_if([$1], [GCJ], [], [_LT_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT']) ;; hpux9* | hpux10* | hpux11*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)='+Z' ;; esac # Is there a better lt_prog_compiler_static that works with the bundled CC? _LT_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' ;; irix5* | irix6* | nonstopux*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # PIC (with -KPIC) is the default. _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; linux* | k*bsd*-gnu | kopensolaris*-gnu) case $cc_basename in # old Intel for x86_64 which still supported -KPIC. ecc*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; # icc used to be incompatible with GCC. # ICC 10 doesn't accept -KPIC any more. icc* | ifort*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; # Lahey Fortran 8.1. lf95*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='--shared' _LT_TAGVAR(lt_prog_compiler_static, $1)='--static' ;; nagfor*) # NAG Fortran compiler _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,-Wl,,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group compilers (*not* the Pentium gcc compiler, # which looks to be a dead project) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; ccc*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # All Alpha code is PIC. _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; xl* | bgxl* | bgf* | mpixl*) # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-qpic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink' ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*) # Sun Fortran 8.3 passes all unrecognized flags to the linker _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_TAGVAR(lt_prog_compiler_wl, $1)='' ;; *Sun\ F* | *Sun*Fortran*) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' ;; *Sun\ C*) # Sun C 5.9 _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' ;; *Intel*\ [[CF]]*Compiler*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; *Portland\ Group*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; esac ;; esac ;; newsos6) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; *nto* | *qnx*) # QNX uses GNU C++, but need to define -shared option too, otherwise # it will coredump. _LT_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC -shared' ;; osf3* | osf4* | osf5*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # All OSF/1 code is PIC. _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; rdos*) _LT_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; solaris*) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' case $cc_basename in f77* | f90* | f95* | sunf77* | sunf90* | sunf95*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';; *) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';; esac ;; sunos4*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; sysv4 | sysv4.2uw2* | sysv4.3*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; sysv4*MP*) if test -d /usr/nec ;then _LT_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' fi ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; unicos*) _LT_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no ;; uts4*) _LT_TAGVAR(lt_prog_compiler_pic, $1)='-pic' _LT_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; *) _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no ;; esac fi ]) case $host_os in # For platforms which do not support PIC, -DPIC is meaningless: *djgpp*) _LT_TAGVAR(lt_prog_compiler_pic, $1)= ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])" ;; esac AC_CACHE_CHECK([for $compiler option to produce PIC], [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)], [_LT_TAGVAR(lt_cv_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_prog_compiler_pic, $1)]) _LT_TAGVAR(lt_prog_compiler_pic, $1)=$_LT_TAGVAR(lt_cv_prog_compiler_pic, $1) # # Check to make sure the PIC flag actually works. # if test -n "$_LT_TAGVAR(lt_prog_compiler_pic, $1)"; then _LT_COMPILER_OPTION([if $compiler PIC flag $_LT_TAGVAR(lt_prog_compiler_pic, $1) works], [_LT_TAGVAR(lt_cv_prog_compiler_pic_works, $1)], [$_LT_TAGVAR(lt_prog_compiler_pic, $1)@&t@m4_if([$1],[],[ -DPIC],[m4_if([$1],[CXX],[ -DPIC],[])])], [], [case $_LT_TAGVAR(lt_prog_compiler_pic, $1) in "" | " "*) ;; *) _LT_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_TAGVAR(lt_prog_compiler_pic, $1)" ;; esac], [_LT_TAGVAR(lt_prog_compiler_pic, $1)= _LT_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no]) fi _LT_TAGDECL([pic_flag], [lt_prog_compiler_pic], [1], [Additional compiler flags for building library objects]) _LT_TAGDECL([wl], [lt_prog_compiler_wl], [1], [How to pass a linker flag through the compiler]) # # Check to make sure the static flag actually works. # wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_TAGVAR(lt_prog_compiler_static, $1)\" _LT_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works], _LT_TAGVAR(lt_cv_prog_compiler_static_works, $1), $lt_tmp_static_flag, [], [_LT_TAGVAR(lt_prog_compiler_static, $1)=]) _LT_TAGDECL([link_static_flag], [lt_prog_compiler_static], [1], [Compiler flag to prevent dynamic linking]) ])# _LT_COMPILER_PIC # _LT_LINKER_SHLIBS([TAGNAME]) # ---------------------------- # See if the linker supports building shared libraries. m4_defun([_LT_LINKER_SHLIBS], [AC_REQUIRE([LT_PATH_LD])dnl AC_REQUIRE([LT_PATH_NM])dnl m4_require([_LT_PATH_MANIFEST_TOOL])dnl m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_DECL_EGREP])dnl m4_require([_LT_DECL_SED])dnl m4_require([_LT_CMD_GLOBAL_SYMBOLS])dnl m4_require([_LT_TAG_COMPILER])dnl AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) m4_if([$1], [CXX], [ _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] case $host_os in aix[[4-9]]*) # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to AIX nm, but means don't demangle with GNU nm # Also, AIX nm treats weak defined symbols like other global defined # symbols, whereas GNU nm marks them as "W". if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' else _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' fi ;; pw32*) _LT_TAGVAR(export_symbols_cmds, $1)="$ltdll_cmds" ;; cygwin* | mingw* | cegcc*) case $cc_basename in cl*) _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' ;; *) _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] ;; esac ;; linux* | k*bsd*-gnu | gnu*) _LT_TAGVAR(link_all_deplibs, $1)=no ;; *) _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' ;; esac ], [ runpath_var= _LT_TAGVAR(allow_undefined_flag, $1)= _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(archive_cmds, $1)= _LT_TAGVAR(archive_expsym_cmds, $1)= _LT_TAGVAR(compiler_needs_object, $1)=no _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no _LT_TAGVAR(export_dynamic_flag_spec, $1)= _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' _LT_TAGVAR(hardcode_automatic, $1)=no _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_TAGVAR(hardcode_libdir_separator, $1)= _LT_TAGVAR(hardcode_minus_L, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported _LT_TAGVAR(inherit_rpath, $1)=no _LT_TAGVAR(link_all_deplibs, $1)=unknown _LT_TAGVAR(module_cmds, $1)= _LT_TAGVAR(module_expsym_cmds, $1)= _LT_TAGVAR(old_archive_from_new_cmds, $1)= _LT_TAGVAR(old_archive_from_expsyms_cmds, $1)= _LT_TAGVAR(thread_safe_flag_spec, $1)= _LT_TAGVAR(whole_archive_flag_spec, $1)= # include_expsyms should be a list of space-separated symbols to be *always* # included in the symbol list _LT_TAGVAR(include_expsyms, $1)= # exclude_expsyms can be an extended regexp of symbols to exclude # it will be wrapped by ` (' and `)$', so one must not match beginning or # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', # as well as any symbol that contains `d'. _LT_TAGVAR(exclude_expsyms, $1)=['_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'] # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out # platforms (ab)use it in PIC code, but their linkers get confused if # the symbol is explicitly referenced. Since portable code cannot # rely on this symbol name, it's probably fine to never include it in # preloaded symbol tables. # Exclude shared library initialization/finalization symbols. dnl Note also adjust exclude_expsyms for C++ above. extract_expsyms_cmds= case $host_os in cygwin* | mingw* | pw32* | cegcc*) # FIXME: the MSVC++ port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using # Microsoft Visual C++. if test "$GCC" != yes; then with_gnu_ld=no fi ;; interix*) # we just hope/assume this is gcc and not c89 (= MSVC++) with_gnu_ld=yes ;; openbsd*) with_gnu_ld=no ;; linux* | k*bsd*-gnu | gnu*) _LT_TAGVAR(link_all_deplibs, $1)=no ;; esac _LT_TAGVAR(ld_shlibs, $1)=yes # On some targets, GNU ld is compatible enough with the native linker # that we're better off using the native interface for both. lt_use_gnu_ld_interface=no if test "$with_gnu_ld" = yes; then case $host_os in aix*) # The AIX port of GNU ld has always aspired to compatibility # with the native linker. However, as the warning in the GNU ld # block says, versions before 2.19.5* couldn't really create working # shared libraries, regardless of the interface used. case `$LD -v 2>&1` in *\ \(GNU\ Binutils\)\ 2.19.5*) ;; *\ \(GNU\ Binutils\)\ 2.[[2-9]]*) ;; *\ \(GNU\ Binutils\)\ [[3-9]]*) ;; *) lt_use_gnu_ld_interface=yes ;; esac ;; *) lt_use_gnu_ld_interface=yes ;; esac fi if test "$lt_use_gnu_ld_interface" = yes; then # If archive_cmds runs LD, not CC, wlarc should be empty wlarc='${wl}' # Set some defaults for GNU ld with shared library support. These # are reset later if shared libraries are not supported. Putting them # here allows them to be overridden if necessary. runpath_var=LD_RUN_PATH _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' # ancient GNU ld didn't support --whole-archive et. al. if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' else _LT_TAGVAR(whole_archive_flag_spec, $1)= fi supports_anon_versioning=no case `$LD -v 2>&1` in *GNU\ gold*) supports_anon_versioning=yes ;; *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11 *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... *\ 2.11.*) ;; # other 2.11 versions *) supports_anon_versioning=yes ;; esac # See if GNU ld supports shared libraries. case $host_os in aix[[3-9]]*) # On AIX/PPC, the GNU linker is very broken if test "$host_cpu" != ia64; then _LT_TAGVAR(ld_shlibs, $1)=no cat <<_LT_EOF 1>&2 *** Warning: the GNU linker, at least up to release 2.19, is reported *** to be unable to reliably create shared libraries on AIX. *** Therefore, libtool is disabling shared libraries support. If you *** really care for shared libraries, you may want to install binutils *** 2.20 or above, or modify your PATH so that a non-GNU linker is found. *** You will then need to restart the configuration process. _LT_EOF fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='' ;; m68k) _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_minus_L, $1)=yes ;; esac ;; beos*) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then _LT_TAGVAR(allow_undefined_flag, $1)=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; cygwin* | mingw* | pw32* | cegcc*) # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, # as there is no search path for DLLs. _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.* //'\'' | sort | uniq > $export_symbols' _LT_TAGVAR(exclude_expsyms, $1)=['[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'] if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file (1st line # is EXPORTS), use it as is; otherwise, prepend... _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; haiku*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(link_all_deplibs, $1)=yes ;; interix[[3-9]]*) _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu) tmp_diet=no if test "$host_os" = linux-dietlibc; then case $cc_basename in diet\ *) tmp_diet=yes;; # linux-dietlibc with static linking (!diet-dyn) esac fi if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \ && test "$tmp_diet" = no then tmp_addflag=' $pic_flag' tmp_sharedflag='-shared' case $cc_basename,$host_cpu in pgcc*) # Portland Group C compiler _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' tmp_addflag=' $pic_flag' ;; pgf77* | pgf90* | pgf95* | pgfortran*) # Portland Group f77 and f90 compilers _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' tmp_addflag=' $pic_flag -Mnomain' ;; ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 tmp_addflag=' -i_dynamic' ;; efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 tmp_addflag=' -i_dynamic -nofor_main' ;; ifc* | ifort*) # Intel Fortran compiler tmp_addflag=' -nofor_main' ;; lf95*) # Lahey Fortran 8.1 _LT_TAGVAR(whole_archive_flag_spec, $1)= tmp_sharedflag='--shared' ;; xl[[cC]]* | bgxl[[cC]]* | mpixl[[cC]]*) # IBM XL C 8.0 on PPC (deal with xlf below) tmp_sharedflag='-qmkshrobj' tmp_addflag= ;; nvcc*) # Cuda Compiler Driver 2.2 _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' _LT_TAGVAR(compiler_needs_object, $1)=yes ;; esac case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C 5.9 _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' _LT_TAGVAR(compiler_needs_object, $1)=yes tmp_sharedflag='-G' ;; *Sun\ F*) # Sun Fortran 8.3 tmp_sharedflag='-G' ;; esac _LT_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' if test "x$supports_anon_versioning" = xyes; then _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' fi case $cc_basename in xlf* | bgf* | bgxlf* | mpixlf*) # IBM XL Fortran 10.1 on PPC cannot create shared libs itself _LT_TAGVAR(whole_archive_flag_spec, $1)='--whole-archive$convenience --no-whole-archive' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib' if test "x$supports_anon_versioning" = xyes; then _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib' fi ;; esac else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' wlarc= else _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' fi ;; solaris*) if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then _LT_TAGVAR(ld_shlibs, $1)=no cat <<_LT_EOF 1>&2 *** Warning: The releases 2.8.* of the GNU linker cannot reliably *** create shared libraries on Solaris systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.9.1 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) case `$LD -v 2>&1` in *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*) _LT_TAGVAR(ld_shlibs, $1)=no cat <<_LT_EOF 1>&2 *** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not *** reliably create shared libraries on SCO systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.16.91.0.3 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF ;; *) # For security reasons, it is highly recommended that you always # use absolute paths for naming shared libraries, and exclude the # DT_RUNPATH tag from executables and libraries. But doing so # requires that you compile everything twice, which is a pain. if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; sunos4*) _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' wlarc= _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac if test "$_LT_TAGVAR(ld_shlibs, $1)" = no; then runpath_var= _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_TAGVAR(export_dynamic_flag_spec, $1)= _LT_TAGVAR(whole_archive_flag_spec, $1)= fi else # PORTME fill in a description of your system's linker (not GNU ld) case $host_os in aix3*) _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=yes _LT_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' # Note: this linker hardcodes the directories in LIBPATH if there # are no directories specified by -L. _LT_TAGVAR(hardcode_minus_L, $1)=yes if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then # Neither direct hardcoding nor static linking is supported with a # broken collect2. _LT_TAGVAR(hardcode_direct, $1)=unsupported fi ;; aix[[4-9]]*) if test "$host_cpu" = ia64; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag="" else # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to AIX nm, but means don't demangle with GNU nm # Also, AIX nm treats weak defined symbols like other global # defined symbols, whereas GNU nm marks them as "W". if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' else _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols' fi aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # need to do runtime linking. case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) for ld_flag in $LDFLAGS; do if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then aix_use_runtimelinking=yes break fi done ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. _LT_TAGVAR(archive_cmds, $1)='' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_TAGVAR(link_all_deplibs, $1)=yes _LT_TAGVAR(file_list_spec, $1)='${wl}-f,' if test "$GCC" = yes; then case $host_os in aix4.[[012]]|aix4.[[012]].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`${CC} -print-prog-name=collect2` if test -f "$collect2name" && strings "$collect2name" | $GREP resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 _LT_TAGVAR(hardcode_direct, $1)=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)= fi ;; esac shared_flag='-shared' if test "$aix_use_runtimelinking" = yes; then shared_flag="$shared_flag "'${wl}-G' fi _LT_TAGVAR(link_all_deplibs, $1)=no else # not using gcc if test "$host_cpu" = ia64; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test "$aix_use_runtimelinking" = yes; then shared_flag='${wl}-G' else shared_flag='${wl}-bM:SRE' fi fi fi _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall' # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to export. _LT_TAGVAR(always_export_symbols, $1)=yes if test "$aix_use_runtimelinking" = yes; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. _LT_TAGVAR(allow_undefined_flag, $1)='-berok' # Determine the default libpath from the value encoded in an # empty executable. _LT_SYS_MODULE_PATH_AIX([$1]) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" else if test "$host_cpu" = ia64; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib' _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an # empty executable. _LT_SYS_MODULE_PATH_AIX([$1]) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok' _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok' if test "$with_gnu_ld" = yes; then # We only use this code for GNU lds that support --whole-archive. _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' else # Exported symbols can be pulled into shared objects from archives _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' fi _LT_TAGVAR(archive_cmds_need_lc, $1)=yes # This is similar to how AIX traditionally builds its shared libraries. _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' fi fi ;; amigaos*) case $host_cpu in powerpc) # see comment about AmigaOS4 .so support _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='' ;; m68k) _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_minus_L, $1)=yes ;; esac ;; bsdi[[45]]*) _LT_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic ;; cygwin* | mingw* | pw32* | cegcc*) # When not using gcc, we currently assume that we are using # Microsoft Visual C++. # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. case $cc_basename in cl*) # Native MSVC _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=yes _LT_TAGVAR(file_list_spec, $1)='@' # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=".dll" # FIXME: Setting linknames here is a bad hack. _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; else sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; fi~ $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ linknames=' # The linker will not automatically build a static lib if we build a DLL. # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes _LT_TAGVAR(exclude_expsyms, $1)='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*' _LT_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1,DATA/'\'' | $SED -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' # Don't use ranlib _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ lt_tool_outputfile="@TOOL_OUTPUT@"~ case $lt_outputfile in *.exe|*.EXE) ;; *) lt_outputfile="$lt_outputfile.exe" lt_tool_outputfile="$lt_tool_outputfile.exe" ;; esac~ if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; $RM "$lt_outputfile.manifest"; fi' ;; *) # Assume MSVC wrapper _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=".dll" # FIXME: Setting linknames here is a bad hack. _LT_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `func_echo_all "$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames=' # The linker will automatically build a .lib file if we build a DLL. _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' # FIXME: Should let the user specify the lib program. _LT_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes ;; esac ;; darwin* | rhapsody*) _LT_DARWIN_LINKER_FEATURES($1) ;; dgux*) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor # support. Future versions do this automatically, but an explicit c++rt0.o # does not break anything, and helps significantly (at the cost of a little # extra space). freebsd2.2*) _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; # Unfortunately, older versions of FreeBSD 2 do not have this feature. freebsd2.*) _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; # FreeBSD 3 and greater uses gcc -shared to do shared libraries. freebsd* | dragonfly*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; hpux9*) if test "$GCC" = yes; then _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' else _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(hardcode_direct, $1)=yes # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' ;; hpux10*) if test "$GCC" = yes && test "$with_gnu_ld" = no; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' else _LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' fi if test "$with_gnu_ld" = no; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. _LT_TAGVAR(hardcode_minus_L, $1)=yes fi ;; hpux11*) if test "$GCC" = yes && test "$with_gnu_ld" = no; then case $host_cpu in hppa*64*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac else case $host_cpu in hppa*64*) _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) m4_if($1, [], [ # Older versions of the 11.00 compiler do not understand -b yet # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does) _LT_LINKER_OPTION([if $CC understands -b], _LT_TAGVAR(lt_cv_prog_compiler__b, $1), [-b], [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'], [_LT_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'])], [_LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags']) ;; esac fi if test "$with_gnu_ld" = no; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: case $host_cpu in hppa*64*|ia64*) _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. _LT_TAGVAR(hardcode_minus_L, $1)=yes ;; esac fi ;; irix5* | irix6* | nonstopux*) if test "$GCC" = yes; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' # Try to use the -exported_symbol ld option, if it does not # work, assume that -exports_file does not work either and # implicitly export all symbols. # This should be the same for all languages, so no per-tag cache variable. AC_CACHE_CHECK([whether the $host_os linker accepts -exported_symbol], [lt_cv_irix_exported_symbol], [save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null" AC_LINK_IFELSE( [AC_LANG_SOURCE( [AC_LANG_CASE([C], [[int foo (void) { return 0; }]], [C++], [[int foo (void) { return 0; }]], [Fortran 77], [[ subroutine foo end]], [Fortran], [[ subroutine foo end]])])], [lt_cv_irix_exported_symbol=yes], [lt_cv_irix_exported_symbol=no]) LDFLAGS="$save_LDFLAGS"]) if test "$lt_cv_irix_exported_symbol" = yes; then _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib' fi else _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -exports_file $export_symbols -o $lib' fi _LT_TAGVAR(archive_cmds_need_lc, $1)='no' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(inherit_rpath, $1)=yes _LT_TAGVAR(link_all_deplibs, $1)=yes ;; netbsd* | netbsdelf*-gnu) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out else _LT_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; newsos6) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *nto* | *qnx*) ;; openbsd*) if test -f /usr/libexec/ld.so; then _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=yes if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' else case $host_os in openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*) _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' ;; esac fi else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; os2*) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(archive_cmds, $1)='$ECHO "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' _LT_TAGVAR(old_archive_from_new_cmds, $1)='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' ;; osf3*) if test "$GCC" = yes; then _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' fi _LT_TAGVAR(archive_cmds_need_lc, $1)='no' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: ;; osf4* | osf5*) # as osf3* with the addition of -msym flag if test "$GCC" = yes; then _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $pic_flag $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' else _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> $lib.exp~ $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp $compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~$RM $lib.exp' # Both c and cxx compiler support -rpath directly _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' fi _LT_TAGVAR(archive_cmds_need_lc, $1)='no' _LT_TAGVAR(hardcode_libdir_separator, $1)=: ;; solaris*) _LT_TAGVAR(no_undefined_flag, $1)=' -z defs' if test "$GCC" = yes; then wlarc='${wl}' _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' else case `$CC -V 2>&1` in *"Compilers 5.0"*) wlarc='' _LT_TAGVAR(archive_cmds, $1)='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$RM $lib.exp' ;; *) wlarc='${wl}' _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp' ;; esac fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no case $host_os in solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands `-z linker_flag'. GCC discards it without `$wl', # but is careful enough not to reorder. # Supported since Solaris 2.6 (maybe 2.5.1?) if test "$GCC" = yes; then _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' else _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' fi ;; esac _LT_TAGVAR(link_all_deplibs, $1)=yes ;; sunos4*) if test "x$host_vendor" = xsequent; then # Use $CC to link under sequent, because it throws in some extra .o # files that make .init and .fini sections work. _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' else _LT_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; sysv4) case $host_vendor in sni) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_direct, $1)=yes # is this really true??? ;; siemens) ## LD is ld it makes a PLAMLIB ## CC just makes a GrossModule. _LT_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs' _LT_TAGVAR(hardcode_direct, $1)=no ;; motorola) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie ;; esac runpath_var='LD_RUN_PATH' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; sysv4.3*) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport' ;; sysv4*MP*) if test -d /usr/nec; then _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no runpath_var=LD_RUN_PATH hardcode_runpath_var=yes _LT_TAGVAR(ld_shlibs, $1)=yes fi ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no runpath_var='LD_RUN_PATH' if test "$GCC" = yes; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We can NOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs' _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_TAGVAR(link_all_deplibs, $1)=yes _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport' runpath_var='LD_RUN_PATH' if test "$GCC" = yes; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; uts4*) _LT_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) _LT_TAGVAR(ld_shlibs, $1)=no ;; esac if test x$host_vendor = xsni; then case $host in sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Blargedynsym' ;; esac fi fi ]) AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no _LT_TAGVAR(with_gnu_ld, $1)=$with_gnu_ld _LT_DECL([], [libext], [0], [Old archive suffix (normally "a")])dnl _LT_DECL([], [shrext_cmds], [1], [Shared library suffix (normally ".so")])dnl _LT_DECL([], [extract_expsyms_cmds], [2], [The commands to extract the exported symbol list from a shared archive]) # # Do we need to explicitly link libc? # case "x$_LT_TAGVAR(archive_cmds_need_lc, $1)" in x|xyes) # Assume -lc should be added _LT_TAGVAR(archive_cmds_need_lc, $1)=yes if test "$enable_shared" = yes && test "$GCC" = yes; then case $_LT_TAGVAR(archive_cmds, $1) in *'~'*) # FIXME: we may have to deal with multi-command sequences. ;; '$CC '*) # Test whether the compiler implicitly links with -lc since on some # systems, -lgcc has to come before -lc. If gcc already passes -lc # to ld, don't add -lc before -lgcc. AC_CACHE_CHECK([whether -lc should be explicitly linked in], [lt_cv_]_LT_TAGVAR(archive_cmds_need_lc, $1), [$RM conftest* echo "$lt_simple_compile_test_code" > conftest.$ac_ext if AC_TRY_EVAL(ac_compile) 2>conftest.err; then soname=conftest lib=conftest libobjs=conftest.$ac_objext deplibs= wl=$_LT_TAGVAR(lt_prog_compiler_wl, $1) pic_flag=$_LT_TAGVAR(lt_prog_compiler_pic, $1) compiler_flags=-v linker_flags=-v verstring= output_objdir=. libname=conftest lt_save_allow_undefined_flag=$_LT_TAGVAR(allow_undefined_flag, $1) _LT_TAGVAR(allow_undefined_flag, $1)= if AC_TRY_EVAL(_LT_TAGVAR(archive_cmds, $1) 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) then lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=no else lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1)=yes fi _LT_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag else cat conftest.err 1>&5 fi $RM conftest* ]) _LT_TAGVAR(archive_cmds_need_lc, $1)=$lt_cv_[]_LT_TAGVAR(archive_cmds_need_lc, $1) ;; esac fi ;; esac _LT_TAGDECL([build_libtool_need_lc], [archive_cmds_need_lc], [0], [Whether or not to add -lc for building shared libraries]) _LT_TAGDECL([allow_libtool_libs_with_static_runtimes], [enable_shared_with_static_runtimes], [0], [Whether or not to disallow shared libs when runtime libs are static]) _LT_TAGDECL([], [export_dynamic_flag_spec], [1], [Compiler flag to allow reflexive dlopens]) _LT_TAGDECL([], [whole_archive_flag_spec], [1], [Compiler flag to generate shared objects directly from archives]) _LT_TAGDECL([], [compiler_needs_object], [1], [Whether the compiler copes with passing no objects directly]) _LT_TAGDECL([], [old_archive_from_new_cmds], [2], [Create an old-style archive from a shared archive]) _LT_TAGDECL([], [old_archive_from_expsyms_cmds], [2], [Create a temporary old-style archive to link instead of a shared archive]) _LT_TAGDECL([], [archive_cmds], [2], [Commands used to build a shared archive]) _LT_TAGDECL([], [archive_expsym_cmds], [2]) _LT_TAGDECL([], [module_cmds], [2], [Commands used to build a loadable module if different from building a shared archive.]) _LT_TAGDECL([], [module_expsym_cmds], [2]) _LT_TAGDECL([], [with_gnu_ld], [1], [Whether we are building with GNU ld or not]) _LT_TAGDECL([], [allow_undefined_flag], [1], [Flag that allows shared libraries with undefined symbols to be built]) _LT_TAGDECL([], [no_undefined_flag], [1], [Flag that enforces no undefined symbols]) _LT_TAGDECL([], [hardcode_libdir_flag_spec], [1], [Flag to hardcode $libdir into a binary during linking. This must work even if $libdir does not exist]) _LT_TAGDECL([], [hardcode_libdir_separator], [1], [Whether we need a single "-rpath" flag with a separated argument]) _LT_TAGDECL([], [hardcode_direct], [0], [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes DIR into the resulting binary]) _LT_TAGDECL([], [hardcode_direct_absolute], [0], [Set to "yes" if using DIR/libNAME${shared_ext} during linking hardcodes DIR into the resulting binary and the resulting library dependency is "absolute", i.e impossible to change by setting ${shlibpath_var} if the library is relocated]) _LT_TAGDECL([], [hardcode_minus_L], [0], [Set to "yes" if using the -LDIR flag during linking hardcodes DIR into the resulting binary]) _LT_TAGDECL([], [hardcode_shlibpath_var], [0], [Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into the resulting binary]) _LT_TAGDECL([], [hardcode_automatic], [0], [Set to "yes" if building a shared library automatically hardcodes DIR into the library and all subsequent libraries and executables linked against it]) _LT_TAGDECL([], [inherit_rpath], [0], [Set to yes if linker adds runtime paths of dependent libraries to runtime path list]) _LT_TAGDECL([], [link_all_deplibs], [0], [Whether libtool must link a program against all its dependency libraries]) _LT_TAGDECL([], [always_export_symbols], [0], [Set to "yes" if exported symbols are required]) _LT_TAGDECL([], [export_symbols_cmds], [2], [The commands to list exported symbols]) _LT_TAGDECL([], [exclude_expsyms], [1], [Symbols that should not be listed in the preloaded symbols]) _LT_TAGDECL([], [include_expsyms], [1], [Symbols that must always be exported]) _LT_TAGDECL([], [prelink_cmds], [2], [Commands necessary for linking programs (against libraries) with templates]) _LT_TAGDECL([], [postlink_cmds], [2], [Commands necessary for finishing linking programs]) _LT_TAGDECL([], [file_list_spec], [1], [Specify filename containing input files]) dnl FIXME: Not yet implemented dnl _LT_TAGDECL([], [thread_safe_flag_spec], [1], dnl [Compiler flag to generate thread safe objects]) ])# _LT_LINKER_SHLIBS # _LT_LANG_C_CONFIG([TAG]) # ------------------------ # Ensure that the configuration variables for a C compiler are suitably # defined. These variables are subsequently used by _LT_CONFIG to write # the compiler configuration to `libtool'. m4_defun([_LT_LANG_C_CONFIG], [m4_require([_LT_DECL_EGREP])dnl lt_save_CC="$CC" AC_LANG_PUSH(C) # Source file extension for C test sources. ac_ext=c # Object file extension for compiled C test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests lt_simple_link_test_code='int main(){return(0);}' _LT_TAG_COMPILER # Save the default compiler, since it gets overwritten when the other # tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP. compiler_DEFAULT=$CC # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... if test -n "$compiler"; then _LT_COMPILER_NO_RTTI($1) _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_SYS_DYNAMIC_LINKER($1) _LT_LINKER_HARDCODE_LIBPATH($1) LT_SYS_DLOPEN_SELF _LT_CMD_STRIPLIB # Report which library types will actually be built AC_MSG_CHECKING([if libtool supports shared libraries]) AC_MSG_RESULT([$can_build_shared]) AC_MSG_CHECKING([whether to build shared libraries]) test "$can_build_shared" = "no" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test "$enable_shared" = yes && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix[[4-9]]*) if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then test "$enable_shared" = yes && enable_static=no fi ;; esac AC_MSG_RESULT([$enable_shared]) AC_MSG_CHECKING([whether to build static libraries]) # Make sure either enable_shared or enable_static is yes. test "$enable_shared" = yes || enable_static=yes AC_MSG_RESULT([$enable_static]) _LT_CONFIG($1) fi AC_LANG_POP CC="$lt_save_CC" ])# _LT_LANG_C_CONFIG # _LT_LANG_CXX_CONFIG([TAG]) # -------------------------- # Ensure that the configuration variables for a C++ compiler are suitably # defined. These variables are subsequently used by _LT_CONFIG to write # the compiler configuration to `libtool'. m4_defun([_LT_LANG_CXX_CONFIG], [m4_require([_LT_FILEUTILS_DEFAULTS])dnl m4_require([_LT_DECL_EGREP])dnl m4_require([_LT_PATH_MANIFEST_TOOL])dnl if test -n "$CXX" && ( test "X$CXX" != "Xno" && ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) || (test "X$CXX" != "Xg++"))) ; then AC_PROG_CXXCPP else _lt_caught_CXX_error=yes fi AC_LANG_PUSH(C++) _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(allow_undefined_flag, $1)= _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(archive_expsym_cmds, $1)= _LT_TAGVAR(compiler_needs_object, $1)=no _LT_TAGVAR(export_dynamic_flag_spec, $1)= _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_TAGVAR(hardcode_libdir_separator, $1)= _LT_TAGVAR(hardcode_minus_L, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=unsupported _LT_TAGVAR(hardcode_automatic, $1)=no _LT_TAGVAR(inherit_rpath, $1)=no _LT_TAGVAR(module_cmds, $1)= _LT_TAGVAR(module_expsym_cmds, $1)= _LT_TAGVAR(link_all_deplibs, $1)=unknown _LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_TAGVAR(reload_flag, $1)=$reload_flag _LT_TAGVAR(reload_cmds, $1)=$reload_cmds _LT_TAGVAR(no_undefined_flag, $1)= _LT_TAGVAR(whole_archive_flag_spec, $1)= _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no # Source file extension for C++ test sources. ac_ext=cpp # Object file extension for compiled C++ test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # No sense in running all these tests if we already determined that # the CXX compiler isn't working. Some variables (like enable_shared) # are currently assumed to apply to all compilers on this platform, # and will be corrupted by setting them based on a non-working compiler. if test "$_lt_caught_CXX_error" != yes; then # Code to be used in simple compile tests lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_CFLAGS=$CFLAGS lt_save_LD=$LD lt_save_GCC=$GCC GCC=$GXX lt_save_with_gnu_ld=$with_gnu_ld lt_save_path_LD=$lt_cv_path_LD if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx else $as_unset lt_cv_prog_gnu_ld fi if test -n "${lt_cv_path_LDCXX+set}"; then lt_cv_path_LD=$lt_cv_path_LDCXX else $as_unset lt_cv_path_LD fi test -z "${LDCXX+set}" || LD=$LDCXX CC=${CXX-"c++"} CFLAGS=$CXXFLAGS compiler=$CC _LT_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) if test -n "$compiler"; then # We don't want -fno-exception when compiling C++ code, so set the # no_builtin_flag separately if test "$GXX" = yes; then _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' else _LT_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= fi if test "$GXX" = yes; then # Set up default GNU C++ configuration LT_PATH_LD # Check if GNU C++ uses GNU ld as the underlying linker, since the # archiving commands below assume that GNU ld is being used. if test "$with_gnu_ld" = yes; then _LT_TAGVAR(archive_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC $pic_flag -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' # If archive_cmds runs LD, not CC, wlarc should be empty # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to # investigate it a little bit more. (MM) wlarc='${wl}' # ancient GNU ld didn't support --whole-archive et. al. if eval "`$CC -print-prog-name=ld` --help 2>&1" | $GREP 'no-whole-archive' > /dev/null; then _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' else _LT_TAGVAR(whole_archive_flag_spec, $1)= fi else with_gnu_ld=no wlarc= # A generic and very simple default shared library creation # command for GNU C++ for the case where it uses the native # linker, instead of GNU ld. If possible, this setting should # overridden to take advantage of the native linker features on # the platform it is being used on. _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' fi # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' else GXX=no with_gnu_ld=no wlarc= fi # PORTME: fill in a description of your system's C++ link characteristics AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) _LT_TAGVAR(ld_shlibs, $1)=yes case $host_os in aix3*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; aix[[4-9]]*) if test "$host_cpu" = ia64; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag="" else aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # need to do runtime linking. case $host_os in aix4.[[23]]|aix4.[[23]].*|aix[[5-9]]*) for ld_flag in $LDFLAGS; do case $ld_flag in *-brtl*) aix_use_runtimelinking=yes break ;; esac done ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. _LT_TAGVAR(archive_cmds, $1)='' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_TAGVAR(link_all_deplibs, $1)=yes _LT_TAGVAR(file_list_spec, $1)='${wl}-f,' if test "$GXX" = yes; then case $host_os in aix4.[[012]]|aix4.[[012]].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`${CC} -print-prog-name=collect2` if test -f "$collect2name" && strings "$collect2name" | $GREP resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 _LT_TAGVAR(hardcode_direct, $1)=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking _LT_TAGVAR(hardcode_minus_L, $1)=yes _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)= fi esac shared_flag='-shared' if test "$aix_use_runtimelinking" = yes; then shared_flag="$shared_flag "'${wl}-G' fi else # not using gcc if test "$host_cpu" = ia64; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test "$aix_use_runtimelinking" = yes; then shared_flag='${wl}-G' else shared_flag='${wl}-bM:SRE' fi fi fi _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-bexpall' # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to # export. _LT_TAGVAR(always_export_symbols, $1)=yes if test "$aix_use_runtimelinking" = yes; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. _LT_TAGVAR(allow_undefined_flag, $1)='-berok' # Determine the default libpath from the value encoded in an empty # executable. _LT_SYS_MODULE_PATH_AIX([$1]) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" else if test "$host_cpu" = ia64; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib' _LT_TAGVAR(allow_undefined_flag, $1)="-z nodefs" _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an # empty executable. _LT_SYS_MODULE_PATH_AIX([$1]) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok' _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok' if test "$with_gnu_ld" = yes; then # We only use this code for GNU lds that support --whole-archive. _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' else # Exported symbols can be pulled into shared objects from archives _LT_TAGVAR(whole_archive_flag_spec, $1)='$convenience' fi _LT_TAGVAR(archive_cmds_need_lc, $1)=yes # This is similar to how AIX traditionally builds its shared # libraries. _LT_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' fi fi ;; beos*) if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then _LT_TAGVAR(allow_undefined_flag, $1)=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME _LT_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; chorus*) case $cc_basename in *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac ;; cygwin* | mingw* | pw32* | cegcc*) case $GXX,$cc_basename in ,cl* | no,cl*) # Native MSVC # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. _LT_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=yes _LT_TAGVAR(file_list_spec, $1)='@' # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=".dll" # FIXME: Setting linknames here is a bad hack. _LT_TAGVAR(archive_cmds, $1)='$CC -o $output_objdir/$soname $libobjs $compiler_flags $deplibs -Wl,-dll~linknames=' _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then $SED -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e '1\\\!p' < $export_symbols > $output_objdir/$soname.exp; else $SED -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < $export_symbols > $output_objdir/$soname.exp; fi~ $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs "@$tool_output_objdir$soname.exp" -Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~ linknames=' # The linker will not automatically build a static lib if we build a DLL. # _LT_TAGVAR(old_archive_from_new_cmds, $1)='true' _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes # Don't use ranlib _LT_TAGVAR(old_postinstall_cmds, $1)='chmod 644 $oldlib' _LT_TAGVAR(postlink_cmds, $1)='lt_outputfile="@OUTPUT@"~ lt_tool_outputfile="@TOOL_OUTPUT@"~ case $lt_outputfile in *.exe|*.EXE) ;; *) lt_outputfile="$lt_outputfile.exe" lt_tool_outputfile="$lt_tool_outputfile.exe" ;; esac~ func_to_tool_file "$lt_outputfile"~ if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; then $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" -outputresource:"$lt_tool_outputfile" || exit 1; $RM "$lt_outputfile.manifest"; fi' ;; *) # g++ # _LT_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, # as there is no search path for DLLs. _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-all-symbols' _LT_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=yes if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file (1st line # is EXPORTS), use it as is; otherwise, prepend... _LT_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; darwin* | rhapsody*) _LT_DARWIN_LINKER_FEATURES($1) ;; dgux*) case $cc_basename in ec++*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; ghcx*) # Green Hills C++ Compiler # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac ;; freebsd2.*) # C++ shared libraries reported to be fairly broken before # switch to ELF _LT_TAGVAR(ld_shlibs, $1)=no ;; freebsd-elf*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; freebsd* | dragonfly*) # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF # conventions _LT_TAGVAR(ld_shlibs, $1)=yes ;; gnu*) ;; haiku*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(link_all_deplibs, $1)=yes ;; hpux9*) _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, # but as the default # location of the library. case $cc_basename in CC*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; aCC*) _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $EGREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test "$GXX" = yes; then _LT_TAGVAR(archive_cmds, $1)='$RM $output_objdir/$soname~$CC -shared -nostdlib $pic_flag ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' else # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; hpux10*|hpux11*) if test $with_gnu_ld = no; then _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: case $host_cpu in hppa*64*|ia64*) ;; *) _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' ;; esac fi case $host_cpu in hppa*64*|ia64*) _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, # but as the default # location of the library. ;; esac case $cc_basename in CC*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; aCC*) case $host_cpu in hppa*64*) _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; ia64*) _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; esac # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | $GREP "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test "$GXX" = yes; then if test $with_gnu_ld = no; then case $host_cpu in hppa*64*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; ia64*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $pic_flag ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; esac fi else # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; interix[[3-9]]*) _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; irix5* | irix6*) case $cc_basename in CC*) # SGI C++ _LT_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' # Archives containing C++ object files must be created using # "CC -ar", where "CC" is the IRIX C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. _LT_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs' ;; *) if test "$GXX" = yes; then if test "$with_gnu_ld" = no; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` -o $lib' fi fi _LT_TAGVAR(link_all_deplibs, $1)=yes ;; esac _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: _LT_TAGVAR(inherit_rpath, $1)=yes ;; linux* | k*bsd*-gnu | kopensolaris*-gnu) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | $GREP "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' # Archives containing C++ object files must be created using # "CC -Bstatic", where "CC" is the KAI C++ compiler. _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; icpc* | ecpc* ) # Intel C++ with_gnu_ld=yes # version 8.0 and above of icpc choke on multiply defined symbols # if we add $predep_objects and $postdep_objects, however 7.1 and # earlier do not add the objects themselves. case `$CC -V 2>&1` in *"Version 7."*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' ;; *) # Version 8.0 or newer tmp_idyn= case $host_cpu in ia64*) tmp_idyn=' -i_dynamic';; esac _LT_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' ;; esac _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' ;; pgCC* | pgcpp*) # Portland Group C++ compiler case `$CC -V` in *pgCC\ [[1-5]].* | *pgcpp\ [[1-5]].*) _LT_TAGVAR(prelink_cmds, $1)='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $objs $libobjs $compile_deplibs~ compile_command="$compile_command `find $tpldir -name \*.o | sort | $NL2SP`"' _LT_TAGVAR(old_archive_cmds, $1)='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $oldobjs$old_deplibs~ $AR $AR_FLAGS $oldlib$oldobjs$old_deplibs `find $tpldir -name \*.o | sort | $NL2SP`~ $RANLIB $oldlib' _LT_TAGVAR(archive_cmds, $1)='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='tpldir=Template.dir~ rm -rf $tpldir~ $CC --prelink_objects --instantiation_dir $tpldir $predep_objects $libobjs $deplibs $convenience $postdep_objects~ $CC -shared $pic_flag $predep_objects $libobjs $deplibs `find $tpldir -name \*.o | sort | $NL2SP` $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' ;; *) # Version 6 and above use weak symbols _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' ;; esac _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' ;; cxx*) # Compaq C++ _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib ${wl}-retain-symbols-file $wl$export_symbols' runpath_var=LD_RUN_PATH _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "X$list" | $Xsed' ;; xl* | mpixl* | bgxl*) # IBM XL 8.0 on PPC, with GNU ld _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' if test "x$supports_anon_versioning" = xyes; then _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ echo "local: *; };" >> $output_objdir/$libname.ver~ $CC -qmkshrobj $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' fi ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C++ 5.9 _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` ${wl}--no-whole-archive' _LT_TAGVAR(compiler_needs_object, $1)=yes # Not sure whether something based on # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 # would be better. output_verbose_link_cmd='func_echo_all' # Archives containing C++ object files must be created using # "CC -xar", where "CC" is the Sun C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' ;; esac ;; esac ;; lynxos*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; m88k*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; mvs*) case $cc_basename in cxx*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac ;; netbsd*) if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then _LT_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' wlarc= _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no fi # Workaround some broken pre-1.5 toolchains output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' ;; *nto* | *qnx*) _LT_TAGVAR(ld_shlibs, $1)=yes ;; openbsd2*) # C++ shared libraries are fairly broken _LT_TAGVAR(ld_shlibs, $1)=no ;; openbsd*) if test -f /usr/libexec/ld.so; then _LT_TAGVAR(hardcode_direct, $1)=yes _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=yes _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib' _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' _LT_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' fi output_verbose_link_cmd=func_echo_all else _LT_TAGVAR(ld_shlibs, $1)=no fi ;; osf3* | osf4* | osf5*) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. _LT_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo "$lib" | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: # Archives containing C++ object files must be created using # the KAI C++ compiler. case $host in osf3*) _LT_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; *) _LT_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;; esac ;; RCC*) # Rational C++ 2.4.1 # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; cxx*) case $host in osf3*) _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && func_echo_all "${wl}-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' ;; *) _LT_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' _LT_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ echo "-hidden">> $lib.exp~ $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname ${wl}-input ${wl}$lib.exp `test -n "$verstring" && $ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o $lib~ $RM $lib.exp' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' ;; esac _LT_TAGVAR(hardcode_libdir_separator, $1)=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP "ld" | $GREP -v "ld:"`; templist=`func_echo_all "$templist" | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; func_echo_all "$list"' ;; *) if test "$GXX" = yes && test "$with_gnu_ld" = no; then _LT_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' case $host in osf3*) _LT_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' ;; esac _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' else # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; psos*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; sunos4*) case $cc_basename in CC*) # Sun C++ 4.x # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; lcc*) # Lucid # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac ;; solaris*) case $cc_basename in CC* | sunCC*) # Sun C++ 4.2, 5.x and Centerline C++ _LT_TAGVAR(archive_cmds_need_lc,$1)=yes _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs' _LT_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_TAGVAR(hardcode_shlibpath_var, $1)=no case $host_os in solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands `-z linker_flag'. # Supported since Solaris 2.6 (maybe 2.5.1?) _LT_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' ;; esac _LT_TAGVAR(link_all_deplibs, $1)=yes output_verbose_link_cmd='func_echo_all' # Archives containing C++ object files must be created using # "CC -xar", where "CC" is the Sun C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. _LT_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' ;; gcx*) # Green Hills C++ Compiler _LT_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' # The C++ compiler must be used to create the archive. _LT_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs' ;; *) # GNU C++ compiler with Solaris linker if test "$GXX" = yes && test "$with_gnu_ld" = no; then _LT_TAGVAR(no_undefined_flag, $1)=' ${wl}-z ${wl}defs' if $CC --version | $GREP -v '^2\.7' > /dev/null; then _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -shared $pic_flag -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' else # g++ 2.7 appears to require `-G' NOT `-shared' on this # platform. _LT_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~ $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$RM $lib.exp' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -G $CFLAGS -v conftest.$objext 2>&1 | $GREP -v "^Configured with:" | $GREP "\-L"' fi _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $wl$libdir' case $host_os in solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; *) _LT_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' ;; esac fi ;; esac ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no runpath_var='LD_RUN_PATH' case $cc_basename in CC*) _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; esac ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We can NOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. _LT_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' _LT_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs' _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R,$libdir' _LT_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_TAGVAR(link_all_deplibs, $1)=yes _LT_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport' runpath_var='LD_RUN_PATH' case $cc_basename in CC*) _LT_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(old_archive_cmds, $1)='$CC -Tprelink_objects $oldobjs~ '"$_LT_TAGVAR(old_archive_cmds, $1)" _LT_TAGVAR(reload_cmds, $1)='$CC -Tprelink_objects $reload_objs~ '"$_LT_TAGVAR(reload_cmds, $1)" ;; *) _LT_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; esac ;; tandem*) case $cc_basename in NCC*) # NonStop-UX NCC 3.20 # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac ;; vxworks*) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_TAGVAR(ld_shlibs, $1)=no ;; esac AC_MSG_RESULT([$_LT_TAGVAR(ld_shlibs, $1)]) test "$_LT_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no _LT_TAGVAR(GCC, $1)="$GXX" _LT_TAGVAR(LD, $1)="$LD" ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... _LT_SYS_HIDDEN_LIBDEPS($1) _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_SYS_DYNAMIC_LINKER($1) _LT_LINKER_HARDCODE_LIBPATH($1) _LT_CONFIG($1) fi # test -n "$compiler" CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS LDCXX=$LD LD=$lt_save_LD GCC=$lt_save_GCC with_gnu_ld=$lt_save_with_gnu_ld lt_cv_path_LDCXX=$lt_cv_path_LD lt_cv_path_LD=$lt_save_path_LD lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld fi # test "$_lt_caught_CXX_error" != yes AC_LANG_POP ])# _LT_LANG_CXX_CONFIG # _LT_FUNC_STRIPNAME_CNF # ---------------------- # func_stripname_cnf prefix suffix name # strip PREFIX and SUFFIX off of NAME. # PREFIX and SUFFIX must not contain globbing or regex special # characters, hashes, percent signs, but SUFFIX may contain a leading # dot (in which case that matches only a dot). # # This function is identical to the (non-XSI) version of func_stripname, # except this one can be used by m4 code that may be executed by configure, # rather than the libtool script. m4_defun([_LT_FUNC_STRIPNAME_CNF],[dnl AC_REQUIRE([_LT_DECL_SED]) AC_REQUIRE([_LT_PROG_ECHO_BACKSLASH]) func_stripname_cnf () { case ${2} in .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;; *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;; esac } # func_stripname_cnf ])# _LT_FUNC_STRIPNAME_CNF # _LT_SYS_HIDDEN_LIBDEPS([TAGNAME]) # --------------------------------- # Figure out "hidden" library dependencies from verbose # compiler output when linking a shared library. # Parse the compiler output and extract the necessary # objects, libraries and library flags. m4_defun([_LT_SYS_HIDDEN_LIBDEPS], [m4_require([_LT_FILEUTILS_DEFAULTS])dnl AC_REQUIRE([_LT_FUNC_STRIPNAME_CNF])dnl # Dependencies to place before and after the object being linked: _LT_TAGVAR(predep_objects, $1)= _LT_TAGVAR(postdep_objects, $1)= _LT_TAGVAR(predeps, $1)= _LT_TAGVAR(postdeps, $1)= _LT_TAGVAR(compiler_lib_search_path, $1)= dnl we can't use the lt_simple_compile_test_code here, dnl because it contains code intended for an executable, dnl not a library. It's possible we should let each dnl tag define a new lt_????_link_test_code variable, dnl but it's only used here... m4_if([$1], [], [cat > conftest.$ac_ext <<_LT_EOF int a; void foo (void) { a = 0; } _LT_EOF ], [$1], [CXX], [cat > conftest.$ac_ext <<_LT_EOF class Foo { public: Foo (void) { a = 0; } private: int a; }; _LT_EOF ], [$1], [F77], [cat > conftest.$ac_ext <<_LT_EOF subroutine foo implicit none integer*4 a a=0 return end _LT_EOF ], [$1], [FC], [cat > conftest.$ac_ext <<_LT_EOF subroutine foo implicit none integer a a=0 return end _LT_EOF ], [$1], [GCJ], [cat > conftest.$ac_ext <<_LT_EOF public class foo { private int a; public void bar (void) { a = 0; } }; _LT_EOF ], [$1], [GO], [cat > conftest.$ac_ext <<_LT_EOF package foo func foo() { } _LT_EOF ]) _lt_libdeps_save_CFLAGS=$CFLAGS case "$CC $CFLAGS " in #( *\ -flto*\ *) CFLAGS="$CFLAGS -fno-lto" ;; *\ -fwhopr*\ *) CFLAGS="$CFLAGS -fno-whopr" ;; *\ -fuse-linker-plugin*\ *) CFLAGS="$CFLAGS -fno-use-linker-plugin" ;; esac dnl Parse the compiler output and extract the necessary dnl objects, libraries and library flags. if AC_TRY_EVAL(ac_compile); then # Parse the compiler output and extract the necessary # objects, libraries and library flags. # Sentinel used to keep track of whether or not we are before # the conftest object file. pre_test_object_deps_done=no for p in `eval "$output_verbose_link_cmd"`; do case ${prev}${p} in -L* | -R* | -l*) # Some compilers place space between "-{L,R}" and the path. # Remove the space. if test $p = "-L" || test $p = "-R"; then prev=$p continue fi # Expand the sysroot to ease extracting the directories later. if test -z "$prev"; then case $p in -L*) func_stripname_cnf '-L' '' "$p"; prev=-L; p=$func_stripname_result ;; -R*) func_stripname_cnf '-R' '' "$p"; prev=-R; p=$func_stripname_result ;; -l*) func_stripname_cnf '-l' '' "$p"; prev=-l; p=$func_stripname_result ;; esac fi case $p in =*) func_stripname_cnf '=' '' "$p"; p=$lt_sysroot$func_stripname_result ;; esac if test "$pre_test_object_deps_done" = no; then case ${prev} in -L | -R) # Internal compiler library paths should come after those # provided the user. The postdeps already come after the # user supplied libs so there is no need to process them. if test -z "$_LT_TAGVAR(compiler_lib_search_path, $1)"; then _LT_TAGVAR(compiler_lib_search_path, $1)="${prev}${p}" else _LT_TAGVAR(compiler_lib_search_path, $1)="${_LT_TAGVAR(compiler_lib_search_path, $1)} ${prev}${p}" fi ;; # The "-l" case would never come before the object being # linked, so don't bother handling this case. esac else if test -z "$_LT_TAGVAR(postdeps, $1)"; then _LT_TAGVAR(postdeps, $1)="${prev}${p}" else _LT_TAGVAR(postdeps, $1)="${_LT_TAGVAR(postdeps, $1)} ${prev}${p}" fi fi prev= ;; *.lto.$objext) ;; # Ignore GCC LTO objects *.$objext) # This assumes that the test object file only shows up # once in the compiler output. if test "$p" = "conftest.$objext"; then pre_test_object_deps_done=yes continue fi if test "$pre_test_object_deps_done" = no; then if test -z "$_LT_TAGVAR(predep_objects, $1)"; then _LT_TAGVAR(predep_objects, $1)="$p" else _LT_TAGVAR(predep_objects, $1)="$_LT_TAGVAR(predep_objects, $1) $p" fi else if test -z "$_LT_TAGVAR(postdep_objects, $1)"; then _LT_TAGVAR(postdep_objects, $1)="$p" else _LT_TAGVAR(postdep_objects, $1)="$_LT_TAGVAR(postdep_objects, $1) $p" fi fi ;; *) ;; # Ignore the rest. esac done # Clean up. rm -f a.out a.exe else echo "libtool.m4: error: problem compiling $1 test program" fi $RM -f confest.$objext CFLAGS=$_lt_libdeps_save_CFLAGS # PORTME: override above test on systems where it is broken m4_if([$1], [CXX], [case $host_os in interix[[3-9]]*) # Interix 3.5 installs completely hosed .la files for C++, so rather than # hack all around it, let's just trust "g++" to DTRT. _LT_TAGVAR(predep_objects,$1)= _LT_TAGVAR(postdep_objects,$1)= _LT_TAGVAR(postdeps,$1)= ;; linux*) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C++ 5.9 # The more standards-conforming stlport4 library is # incompatible with the Cstd library. Avoid specifying # it if it's in CXXFLAGS. Ignore libCrun as # -library=stlport4 depends on it. case " $CXX $CXXFLAGS " in *" -library=stlport4 "*) solaris_use_stlport4=yes ;; esac if test "$solaris_use_stlport4" != yes; then _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' fi ;; esac ;; solaris*) case $cc_basename in CC* | sunCC*) # The more standards-conforming stlport4 library is # incompatible with the Cstd library. Avoid specifying # it if it's in CXXFLAGS. Ignore libCrun as # -library=stlport4 depends on it. case " $CXX $CXXFLAGS " in *" -library=stlport4 "*) solaris_use_stlport4=yes ;; esac # Adding this requires a known-good setup of shared libraries for # Sun compiler versions before 5.6, else PIC objects from an old # archive will be linked into the output, leading to subtle bugs. if test "$solaris_use_stlport4" != yes; then _LT_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' fi ;; esac ;; esac ]) case " $_LT_TAGVAR(postdeps, $1) " in *" -lc "*) _LT_TAGVAR(archive_cmds_need_lc, $1)=no ;; esac _LT_TAGVAR(compiler_lib_search_dirs, $1)= if test -n "${_LT_TAGVAR(compiler_lib_search_path, $1)}"; then _LT_TAGVAR(compiler_lib_search_dirs, $1)=`echo " ${_LT_TAGVAR(compiler_lib_search_path, $1)}" | ${SED} -e 's! -L! !g' -e 's!^ !!'` fi _LT_TAGDECL([], [compiler_lib_search_dirs], [1], [The directories searched by this compiler when creating a shared library]) _LT_TAGDECL([], [predep_objects], [1], [Dependencies to place before and after the objects being linked to create a shared library]) _LT_TAGDECL([], [postdep_objects], [1]) _LT_TAGDECL([], [predeps], [1]) _LT_TAGDECL([], [postdeps], [1]) _LT_TAGDECL([], [compiler_lib_search_path], [1], [The library search path used internally by the compiler when linking a shared library]) ])# _LT_SYS_HIDDEN_LIBDEPS # _LT_LANG_F77_CONFIG([TAG]) # -------------------------- # Ensure that the configuration variables for a Fortran 77 compiler are # suitably defined. These variables are subsequently used by _LT_CONFIG # to write the compiler configuration to `libtool'. m4_defun([_LT_LANG_F77_CONFIG], [AC_LANG_PUSH(Fortran 77) if test -z "$F77" || test "X$F77" = "Xno"; then _lt_disable_F77=yes fi _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(allow_undefined_flag, $1)= _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(archive_expsym_cmds, $1)= _LT_TAGVAR(export_dynamic_flag_spec, $1)= _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_TAGVAR(hardcode_libdir_separator, $1)= _LT_TAGVAR(hardcode_minus_L, $1)=no _LT_TAGVAR(hardcode_automatic, $1)=no _LT_TAGVAR(inherit_rpath, $1)=no _LT_TAGVAR(module_cmds, $1)= _LT_TAGVAR(module_expsym_cmds, $1)= _LT_TAGVAR(link_all_deplibs, $1)=unknown _LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_TAGVAR(reload_flag, $1)=$reload_flag _LT_TAGVAR(reload_cmds, $1)=$reload_cmds _LT_TAGVAR(no_undefined_flag, $1)= _LT_TAGVAR(whole_archive_flag_spec, $1)= _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no # Source file extension for f77 test sources. ac_ext=f # Object file extension for compiled f77 test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # No sense in running all these tests if we already determined that # the F77 compiler isn't working. Some variables (like enable_shared) # are currently assumed to apply to all compilers on this platform, # and will be corrupted by setting them based on a non-working compiler. if test "$_lt_disable_F77" != yes; then # Code to be used in simple compile tests lt_simple_compile_test_code="\ subroutine t return end " # Code to be used in simple link tests lt_simple_link_test_code="\ program t end " # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC="$CC" lt_save_GCC=$GCC lt_save_CFLAGS=$CFLAGS CC=${F77-"f77"} CFLAGS=$FFLAGS compiler=$CC _LT_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) GCC=$G77 if test -n "$compiler"; then AC_MSG_CHECKING([if libtool supports shared libraries]) AC_MSG_RESULT([$can_build_shared]) AC_MSG_CHECKING([whether to build shared libraries]) test "$can_build_shared" = "no" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test "$enable_shared" = yes && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix[[4-9]]*) if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then test "$enable_shared" = yes && enable_static=no fi ;; esac AC_MSG_RESULT([$enable_shared]) AC_MSG_CHECKING([whether to build static libraries]) # Make sure either enable_shared or enable_static is yes. test "$enable_shared" = yes || enable_static=yes AC_MSG_RESULT([$enable_static]) _LT_TAGVAR(GCC, $1)="$G77" _LT_TAGVAR(LD, $1)="$LD" ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_SYS_DYNAMIC_LINKER($1) _LT_LINKER_HARDCODE_LIBPATH($1) _LT_CONFIG($1) fi # test -n "$compiler" GCC=$lt_save_GCC CC="$lt_save_CC" CFLAGS="$lt_save_CFLAGS" fi # test "$_lt_disable_F77" != yes AC_LANG_POP ])# _LT_LANG_F77_CONFIG # _LT_LANG_FC_CONFIG([TAG]) # ------------------------- # Ensure that the configuration variables for a Fortran compiler are # suitably defined. These variables are subsequently used by _LT_CONFIG # to write the compiler configuration to `libtool'. m4_defun([_LT_LANG_FC_CONFIG], [AC_LANG_PUSH(Fortran) if test -z "$FC" || test "X$FC" = "Xno"; then _lt_disable_FC=yes fi _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(allow_undefined_flag, $1)= _LT_TAGVAR(always_export_symbols, $1)=no _LT_TAGVAR(archive_expsym_cmds, $1)= _LT_TAGVAR(export_dynamic_flag_spec, $1)= _LT_TAGVAR(hardcode_direct, $1)=no _LT_TAGVAR(hardcode_direct_absolute, $1)=no _LT_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_TAGVAR(hardcode_libdir_separator, $1)= _LT_TAGVAR(hardcode_minus_L, $1)=no _LT_TAGVAR(hardcode_automatic, $1)=no _LT_TAGVAR(inherit_rpath, $1)=no _LT_TAGVAR(module_cmds, $1)= _LT_TAGVAR(module_expsym_cmds, $1)= _LT_TAGVAR(link_all_deplibs, $1)=unknown _LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_TAGVAR(reload_flag, $1)=$reload_flag _LT_TAGVAR(reload_cmds, $1)=$reload_cmds _LT_TAGVAR(no_undefined_flag, $1)= _LT_TAGVAR(whole_archive_flag_spec, $1)= _LT_TAGVAR(enable_shared_with_static_runtimes, $1)=no # Source file extension for fc test sources. ac_ext=${ac_fc_srcext-f} # Object file extension for compiled fc test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # No sense in running all these tests if we already determined that # the FC compiler isn't working. Some variables (like enable_shared) # are currently assumed to apply to all compilers on this platform, # and will be corrupted by setting them based on a non-working compiler. if test "$_lt_disable_FC" != yes; then # Code to be used in simple compile tests lt_simple_compile_test_code="\ subroutine t return end " # Code to be used in simple link tests lt_simple_link_test_code="\ program t end " # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC="$CC" lt_save_GCC=$GCC lt_save_CFLAGS=$CFLAGS CC=${FC-"f95"} CFLAGS=$FCFLAGS compiler=$CC GCC=$ac_cv_fc_compiler_gnu _LT_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) if test -n "$compiler"; then AC_MSG_CHECKING([if libtool supports shared libraries]) AC_MSG_RESULT([$can_build_shared]) AC_MSG_CHECKING([whether to build shared libraries]) test "$can_build_shared" = "no" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test "$enable_shared" = yes && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix[[4-9]]*) if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then test "$enable_shared" = yes && enable_static=no fi ;; esac AC_MSG_RESULT([$enable_shared]) AC_MSG_CHECKING([whether to build static libraries]) # Make sure either enable_shared or enable_static is yes. test "$enable_shared" = yes || enable_static=yes AC_MSG_RESULT([$enable_static]) _LT_TAGVAR(GCC, $1)="$ac_cv_fc_compiler_gnu" _LT_TAGVAR(LD, $1)="$LD" ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... _LT_SYS_HIDDEN_LIBDEPS($1) _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_SYS_DYNAMIC_LINKER($1) _LT_LINKER_HARDCODE_LIBPATH($1) _LT_CONFIG($1) fi # test -n "$compiler" GCC=$lt_save_GCC CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS fi # test "$_lt_disable_FC" != yes AC_LANG_POP ])# _LT_LANG_FC_CONFIG # _LT_LANG_GCJ_CONFIG([TAG]) # -------------------------- # Ensure that the configuration variables for the GNU Java Compiler compiler # are suitably defined. These variables are subsequently used by _LT_CONFIG # to write the compiler configuration to `libtool'. m4_defun([_LT_LANG_GCJ_CONFIG], [AC_REQUIRE([LT_PROG_GCJ])dnl AC_LANG_SAVE # Source file extension for Java test sources. ac_ext=java # Object file extension for compiled Java test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="class foo {}" # Code to be used in simple link tests lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_CFLAGS=$CFLAGS lt_save_GCC=$GCC GCC=yes CC=${GCJ-"gcj"} CFLAGS=$GCJFLAGS compiler=$CC _LT_TAGVAR(compiler, $1)=$CC _LT_TAGVAR(LD, $1)="$LD" _LT_CC_BASENAME([$compiler]) # GCJ did not exist at the time GCC didn't implicitly link libc in. _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_TAGVAR(reload_flag, $1)=$reload_flag _LT_TAGVAR(reload_cmds, $1)=$reload_cmds ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... if test -n "$compiler"; then _LT_COMPILER_NO_RTTI($1) _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_LINKER_HARDCODE_LIBPATH($1) _LT_CONFIG($1) fi AC_LANG_RESTORE GCC=$lt_save_GCC CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS ])# _LT_LANG_GCJ_CONFIG # _LT_LANG_GO_CONFIG([TAG]) # -------------------------- # Ensure that the configuration variables for the GNU Go compiler # are suitably defined. These variables are subsequently used by _LT_CONFIG # to write the compiler configuration to `libtool'. m4_defun([_LT_LANG_GO_CONFIG], [AC_REQUIRE([LT_PROG_GO])dnl AC_LANG_SAVE # Source file extension for Go test sources. ac_ext=go # Object file extension for compiled Go test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="package main; func main() { }" # Code to be used in simple link tests lt_simple_link_test_code='package main; func main() { }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_CFLAGS=$CFLAGS lt_save_GCC=$GCC GCC=yes CC=${GOC-"gccgo"} CFLAGS=$GOFLAGS compiler=$CC _LT_TAGVAR(compiler, $1)=$CC _LT_TAGVAR(LD, $1)="$LD" _LT_CC_BASENAME([$compiler]) # Go did not exist at the time GCC didn't implicitly link libc in. _LT_TAGVAR(archive_cmds_need_lc, $1)=no _LT_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_TAGVAR(reload_flag, $1)=$reload_flag _LT_TAGVAR(reload_cmds, $1)=$reload_cmds ## CAVEAT EMPTOR: ## There is no encapsulation within the following macros, do not change ## the running order or otherwise move them around unless you know exactly ## what you are doing... if test -n "$compiler"; then _LT_COMPILER_NO_RTTI($1) _LT_COMPILER_PIC($1) _LT_COMPILER_C_O($1) _LT_COMPILER_FILE_LOCKS($1) _LT_LINKER_SHLIBS($1) _LT_LINKER_HARDCODE_LIBPATH($1) _LT_CONFIG($1) fi AC_LANG_RESTORE GCC=$lt_save_GCC CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS ])# _LT_LANG_GO_CONFIG # _LT_LANG_RC_CONFIG([TAG]) # ------------------------- # Ensure that the configuration variables for the Windows resource compiler # are suitably defined. These variables are subsequently used by _LT_CONFIG # to write the compiler configuration to `libtool'. m4_defun([_LT_LANG_RC_CONFIG], [AC_REQUIRE([LT_PROG_RC])dnl AC_LANG_SAVE # Source file extension for RC test sources. ac_ext=rc # Object file extension for compiled RC test sources. objext=o _LT_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }' # Code to be used in simple link tests lt_simple_link_test_code="$lt_simple_compile_test_code" # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_TAG_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC="$CC" lt_save_CFLAGS=$CFLAGS lt_save_GCC=$GCC GCC= CC=${RC-"windres"} CFLAGS= compiler=$CC _LT_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) _LT_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes if test -n "$compiler"; then : _LT_CONFIG($1) fi GCC=$lt_save_GCC AC_LANG_RESTORE CC=$lt_save_CC CFLAGS=$lt_save_CFLAGS ])# _LT_LANG_RC_CONFIG # LT_PROG_GCJ # ----------- AC_DEFUN([LT_PROG_GCJ], [m4_ifdef([AC_PROG_GCJ], [AC_PROG_GCJ], [m4_ifdef([A][M_PROG_GCJ], [A][M_PROG_GCJ], [AC_CHECK_TOOL(GCJ, gcj,) test "x${GCJFLAGS+set}" = xset || GCJFLAGS="-g -O2" AC_SUBST(GCJFLAGS)])])[]dnl ]) # Old name: AU_ALIAS([LT_AC_PROG_GCJ], [LT_PROG_GCJ]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([LT_AC_PROG_GCJ], []) # LT_PROG_GO # ---------- AC_DEFUN([LT_PROG_GO], [AC_CHECK_TOOL(GOC, gccgo,) ]) # LT_PROG_RC # ---------- AC_DEFUN([LT_PROG_RC], [AC_CHECK_TOOL(RC, windres,) ]) # Old name: AU_ALIAS([LT_AC_PROG_RC], [LT_PROG_RC]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([LT_AC_PROG_RC], []) # _LT_DECL_EGREP # -------------- # If we don't have a new enough Autoconf to choose the best grep # available, choose the one first in the user's PATH. m4_defun([_LT_DECL_EGREP], [AC_REQUIRE([AC_PROG_EGREP])dnl AC_REQUIRE([AC_PROG_FGREP])dnl test -z "$GREP" && GREP=grep _LT_DECL([], [GREP], [1], [A grep program that handles long lines]) _LT_DECL([], [EGREP], [1], [An ERE matcher]) _LT_DECL([], [FGREP], [1], [A literal string matcher]) dnl Non-bleeding-edge autoconf doesn't subst GREP, so do it here too AC_SUBST([GREP]) ]) # _LT_DECL_OBJDUMP # -------------- # If we don't have a new enough Autoconf to choose the best objdump # available, choose the one first in the user's PATH. m4_defun([_LT_DECL_OBJDUMP], [AC_CHECK_TOOL(OBJDUMP, objdump, false) test -z "$OBJDUMP" && OBJDUMP=objdump _LT_DECL([], [OBJDUMP], [1], [An object symbol dumper]) AC_SUBST([OBJDUMP]) ]) # _LT_DECL_DLLTOOL # ---------------- # Ensure DLLTOOL variable is set. m4_defun([_LT_DECL_DLLTOOL], [AC_CHECK_TOOL(DLLTOOL, dlltool, false) test -z "$DLLTOOL" && DLLTOOL=dlltool _LT_DECL([], [DLLTOOL], [1], [DLL creation program]) AC_SUBST([DLLTOOL]) ]) # _LT_DECL_SED # ------------ # Check for a fully-functional sed program, that truncates # as few characters as possible. Prefer GNU sed if found. m4_defun([_LT_DECL_SED], [AC_PROG_SED test -z "$SED" && SED=sed Xsed="$SED -e 1s/^X//" _LT_DECL([], [SED], [1], [A sed program that does not truncate output]) _LT_DECL([], [Xsed], ["\$SED -e 1s/^X//"], [Sed that helps us avoid accidentally triggering echo(1) options like -n]) ])# _LT_DECL_SED m4_ifndef([AC_PROG_SED], [ ############################################################ # NOTE: This macro has been submitted for inclusion into # # GNU Autoconf as AC_PROG_SED. When it is available in # # a released version of Autoconf we should remove this # # macro and use it instead. # ############################################################ m4_defun([AC_PROG_SED], [AC_MSG_CHECKING([for a sed that does not truncate output]) AC_CACHE_VAL(lt_cv_path_SED, [# Loop through the user's path and test for sed and gsed. # Then use that list of sed's as ones to test for truncation. as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for lt_ac_prog in sed gsed; do for ac_exec_ext in '' $ac_executable_extensions; do if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext" fi done done done IFS=$as_save_IFS lt_ac_max=0 lt_ac_count=0 # Add /usr/xpg4/bin/sed as it is typically found on Solaris # along with /bin/sed that truncates output. for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do test ! -f $lt_ac_sed && continue cat /dev/null > conftest.in lt_ac_count=0 echo $ECHO_N "0123456789$ECHO_C" >conftest.in # Check for GNU sed and select it if it is found. if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then lt_cv_path_SED=$lt_ac_sed break fi while true; do cat conftest.in conftest.in >conftest.tmp mv conftest.tmp conftest.in cp conftest.in conftest.nl echo >>conftest.nl $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break cmp -s conftest.out conftest.nl || break # 10000 chars as input seems more than enough test $lt_ac_count -gt 10 && break lt_ac_count=`expr $lt_ac_count + 1` if test $lt_ac_count -gt $lt_ac_max; then lt_ac_max=$lt_ac_count lt_cv_path_SED=$lt_ac_sed fi done done ]) SED=$lt_cv_path_SED AC_SUBST([SED]) AC_MSG_RESULT([$SED]) ])#AC_PROG_SED ])#m4_ifndef # Old name: AU_ALIAS([LT_AC_PROG_SED], [AC_PROG_SED]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([LT_AC_PROG_SED], []) # _LT_CHECK_SHELL_FEATURES # ------------------------ # Find out whether the shell is Bourne or XSI compatible, # or has some other useful features. m4_defun([_LT_CHECK_SHELL_FEATURES], [AC_MSG_CHECKING([whether the shell understands some XSI constructs]) # Try some XSI features xsi_shell=no ( _lt_dummy="a/b/c" test "${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \ = c,a/b,b/c, \ && eval 'test $(( 1 + 1 )) -eq 2 \ && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \ && xsi_shell=yes AC_MSG_RESULT([$xsi_shell]) _LT_CONFIG_LIBTOOL_INIT([xsi_shell='$xsi_shell']) AC_MSG_CHECKING([whether the shell understands "+="]) lt_shell_append=no ( foo=bar; set foo baz; eval "$[1]+=\$[2]" && test "$foo" = barbaz ) \ >/dev/null 2>&1 \ && lt_shell_append=yes AC_MSG_RESULT([$lt_shell_append]) _LT_CONFIG_LIBTOOL_INIT([lt_shell_append='$lt_shell_append']) if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then lt_unset=unset else lt_unset=false fi _LT_DECL([], [lt_unset], [0], [whether the shell understands "unset"])dnl # test EBCDIC or ASCII case `echo X|tr X '\101'` in A) # ASCII based system # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr lt_SP2NL='tr \040 \012' lt_NL2SP='tr \015\012 \040\040' ;; *) # EBCDIC based system lt_SP2NL='tr \100 \n' lt_NL2SP='tr \r\n \100\100' ;; esac _LT_DECL([SP2NL], [lt_SP2NL], [1], [turn spaces into newlines])dnl _LT_DECL([NL2SP], [lt_NL2SP], [1], [turn newlines into spaces])dnl ])# _LT_CHECK_SHELL_FEATURES # _LT_PROG_FUNCTION_REPLACE (FUNCNAME, REPLACEMENT-BODY) # ------------------------------------------------------ # In `$cfgfile', look for function FUNCNAME delimited by `^FUNCNAME ()$' and # '^} FUNCNAME ', and replace its body with REPLACEMENT-BODY. m4_defun([_LT_PROG_FUNCTION_REPLACE], [dnl { sed -e '/^$1 ()$/,/^} # $1 /c\ $1 ()\ {\ m4_bpatsubsts([$2], [$], [\\], [^\([ ]\)], [\\\1]) } # Extended-shell $1 implementation' "$cfgfile" > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: ]) # _LT_PROG_REPLACE_SHELLFNS # ------------------------- # Replace existing portable implementations of several shell functions with # equivalent extended shell implementations where those features are available.. m4_defun([_LT_PROG_REPLACE_SHELLFNS], [if test x"$xsi_shell" = xyes; then _LT_PROG_FUNCTION_REPLACE([func_dirname], [dnl case ${1} in */*) func_dirname_result="${1%/*}${2}" ;; * ) func_dirname_result="${3}" ;; esac]) _LT_PROG_FUNCTION_REPLACE([func_basename], [dnl func_basename_result="${1##*/}"]) _LT_PROG_FUNCTION_REPLACE([func_dirname_and_basename], [dnl case ${1} in */*) func_dirname_result="${1%/*}${2}" ;; * ) func_dirname_result="${3}" ;; esac func_basename_result="${1##*/}"]) _LT_PROG_FUNCTION_REPLACE([func_stripname], [dnl # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are # positional parameters, so assign one to ordinary parameter first. func_stripname_result=${3} func_stripname_result=${func_stripname_result#"${1}"} func_stripname_result=${func_stripname_result%"${2}"}]) _LT_PROG_FUNCTION_REPLACE([func_split_long_opt], [dnl func_split_long_opt_name=${1%%=*} func_split_long_opt_arg=${1#*=}]) _LT_PROG_FUNCTION_REPLACE([func_split_short_opt], [dnl func_split_short_opt_arg=${1#??} func_split_short_opt_name=${1%"$func_split_short_opt_arg"}]) _LT_PROG_FUNCTION_REPLACE([func_lo2o], [dnl case ${1} in *.lo) func_lo2o_result=${1%.lo}.${objext} ;; *) func_lo2o_result=${1} ;; esac]) _LT_PROG_FUNCTION_REPLACE([func_xform], [ func_xform_result=${1%.*}.lo]) _LT_PROG_FUNCTION_REPLACE([func_arith], [ func_arith_result=$(( $[*] ))]) _LT_PROG_FUNCTION_REPLACE([func_len], [ func_len_result=${#1}]) fi if test x"$lt_shell_append" = xyes; then _LT_PROG_FUNCTION_REPLACE([func_append], [ eval "${1}+=\\${2}"]) _LT_PROG_FUNCTION_REPLACE([func_append_quoted], [dnl func_quote_for_eval "${2}" dnl m4 expansion turns \\\\ into \\, and then the shell eval turns that into \ eval "${1}+=\\\\ \\$func_quote_for_eval_result"]) # Save a `func_append' function call where possible by direct use of '+=' sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: else # Save a `func_append' function call even when '+=' is not available sed -e 's%func_append \([[a-zA-Z_]]\{1,\}\) "%\1="$\1%g' $cfgfile > $cfgfile.tmp \ && mv -f "$cfgfile.tmp" "$cfgfile" \ || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f "$cfgfile.tmp") test 0 -eq $? || _lt_function_replace_fail=: fi if test x"$_lt_function_replace_fail" = x":"; then AC_MSG_WARN([Unable to substitute extended shell functions in $ofile]) fi ]) # _LT_PATH_CONVERSION_FUNCTIONS # ----------------------------- # Determine which file name conversion functions should be used by # func_to_host_file (and, implicitly, by func_to_host_path). These are needed # for certain cross-compile configurations and native mingw. m4_defun([_LT_PATH_CONVERSION_FUNCTIONS], [AC_REQUIRE([AC_CANONICAL_HOST])dnl AC_REQUIRE([AC_CANONICAL_BUILD])dnl AC_MSG_CHECKING([how to convert $build file names to $host format]) AC_CACHE_VAL(lt_cv_to_host_file_cmd, [case $host in *-*-mingw* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32 ;; *-*-cygwin* ) lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32 ;; * ) # otherwise, assume *nix lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32 ;; esac ;; *-*-cygwin* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin ;; *-*-cygwin* ) lt_cv_to_host_file_cmd=func_convert_file_noop ;; * ) # otherwise, assume *nix lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin ;; esac ;; * ) # unhandled hosts (and "normal" native builds) lt_cv_to_host_file_cmd=func_convert_file_noop ;; esac ]) to_host_file_cmd=$lt_cv_to_host_file_cmd AC_MSG_RESULT([$lt_cv_to_host_file_cmd]) _LT_DECL([to_host_file_cmd], [lt_cv_to_host_file_cmd], [0], [convert $build file names to $host format])dnl AC_MSG_CHECKING([how to convert $build file names to toolchain format]) AC_CACHE_VAL(lt_cv_to_tool_file_cmd, [#assume ordinary cross tools, or native build. lt_cv_to_tool_file_cmd=func_convert_file_noop case $host in *-*-mingw* ) case $build in *-*-mingw* ) # actually msys lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32 ;; esac ;; esac ]) to_tool_file_cmd=$lt_cv_to_tool_file_cmd AC_MSG_RESULT([$lt_cv_to_tool_file_cmd]) _LT_DECL([to_tool_file_cmd], [lt_cv_to_tool_file_cmd], [0], [convert $build files to toolchain format])dnl ])# _LT_PATH_CONVERSION_FUNCTIONS ecm-6.4.4/m4/lt~obsolete.m40000644023561000001540000001375612106744307012367 00000000000000# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- # # Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc. # Written by Scott James Remnant, 2004. # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. # serial 5 lt~obsolete.m4 # These exist entirely to fool aclocal when bootstrapping libtool. # # In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN) # which have later been changed to m4_define as they aren't part of the # exported API, or moved to Autoconf or Automake where they belong. # # The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN # in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us # using a macro with the same name in our local m4/libtool.m4 it'll # pull the old libtool.m4 in (it doesn't see our shiny new m4_define # and doesn't know about Autoconf macros at all.) # # So we provide this file, which has a silly filename so it's always # included after everything else. This provides aclocal with the # AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything # because those macros already exist, or will be overwritten later. # We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. # # Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. # Yes, that means every name once taken will need to remain here until # we give up compatibility with versions before 1.7, at which point # we need to keep only those names which we still refer to. # This is to help aclocal find these macros, as it can't see m4_define. AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) ecm-6.4.4/m4/ltoptions.m40000644023561000001540000003007312106744307012037 00000000000000# Helper functions for option handling. -*- Autoconf -*- # # Copyright (C) 2004, 2005, 2007, 2008, 2009 Free Software Foundation, # Inc. # Written by Gary V. Vaughan, 2004 # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. # serial 7 ltoptions.m4 # This is to help aclocal find these macros, as it can't see m4_define. AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) # _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) # ------------------------------------------ m4_define([_LT_MANGLE_OPTION], [[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) # _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) # --------------------------------------- # Set option OPTION-NAME for macro MACRO-NAME, and if there is a # matching handler defined, dispatch to it. Other OPTION-NAMEs are # saved as a flag. m4_define([_LT_SET_OPTION], [m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), _LT_MANGLE_DEFUN([$1], [$2]), [m4_warning([Unknown $1 option `$2'])])[]dnl ]) # _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) # ------------------------------------------------------------ # Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. m4_define([_LT_IF_OPTION], [m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) # _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) # ------------------------------------------------------- # Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME # are set. m4_define([_LT_UNLESS_OPTIONS], [m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), [m4_define([$0_found])])])[]dnl m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 ])[]dnl ]) # _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) # ---------------------------------------- # OPTION-LIST is a space-separated list of Libtool options associated # with MACRO-NAME. If any OPTION has a matching handler declared with # LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about # the unknown option and exit. m4_defun([_LT_SET_OPTIONS], [# Set options m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), [_LT_SET_OPTION([$1], _LT_Option)]) m4_if([$1],[LT_INIT],[ dnl dnl Simply set some default values (i.e off) if boolean options were not dnl specified: _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no ]) _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no ]) dnl dnl If no reference was made to various pairs of opposing options, then dnl we run the default mode handler for the pair. For example, if neither dnl `shared' nor `disable-shared' was passed, we enable building of shared dnl archives by default: _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], [_LT_ENABLE_FAST_INSTALL]) ]) ])# _LT_SET_OPTIONS ## --------------------------------- ## ## Macros to handle LT_INIT options. ## ## --------------------------------- ## # _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) # ----------------------------------------- m4_define([_LT_MANGLE_DEFUN], [[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) # LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) # ----------------------------------------------- m4_define([LT_OPTION_DEFINE], [m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl ])# LT_OPTION_DEFINE # dlopen # ------ LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes ]) AU_DEFUN([AC_LIBTOOL_DLOPEN], [_LT_SET_OPTION([LT_INIT], [dlopen]) AC_DIAGNOSE([obsolete], [$0: Remove this warning and the call to _LT_SET_OPTION when you put the `dlopen' option into LT_INIT's first parameter.]) ]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) # win32-dll # --------- # Declare package support for building win32 dll's. LT_OPTION_DEFINE([LT_INIT], [win32-dll], [enable_win32_dll=yes case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) AC_CHECK_TOOL(AS, as, false) AC_CHECK_TOOL(DLLTOOL, dlltool, false) AC_CHECK_TOOL(OBJDUMP, objdump, false) ;; esac test -z "$AS" && AS=as _LT_DECL([], [AS], [1], [Assembler program])dnl test -z "$DLLTOOL" && DLLTOOL=dlltool _LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl test -z "$OBJDUMP" && OBJDUMP=objdump _LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl ])# win32-dll AU_DEFUN([AC_LIBTOOL_WIN32_DLL], [AC_REQUIRE([AC_CANONICAL_HOST])dnl _LT_SET_OPTION([LT_INIT], [win32-dll]) AC_DIAGNOSE([obsolete], [$0: Remove this warning and the call to _LT_SET_OPTION when you put the `win32-dll' option into LT_INIT's first parameter.]) ]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) # _LT_ENABLE_SHARED([DEFAULT]) # ---------------------------- # implement the --enable-shared flag, and supports the `shared' and # `disable-shared' LT_INIT options. # DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. m4_define([_LT_ENABLE_SHARED], [m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl AC_ARG_ENABLE([shared], [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], [p=${PACKAGE-default} case $enableval in yes) enable_shared=yes ;; no) enable_shared=no ;; *) enable_shared=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_shared=yes fi done IFS="$lt_save_ifs" ;; esac], [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) _LT_DECL([build_libtool_libs], [enable_shared], [0], [Whether or not to build shared libraries]) ])# _LT_ENABLE_SHARED LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) # Old names: AC_DEFUN([AC_ENABLE_SHARED], [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) ]) AC_DEFUN([AC_DISABLE_SHARED], [_LT_SET_OPTION([LT_INIT], [disable-shared]) ]) AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AM_ENABLE_SHARED], []) dnl AC_DEFUN([AM_DISABLE_SHARED], []) # _LT_ENABLE_STATIC([DEFAULT]) # ---------------------------- # implement the --enable-static flag, and support the `static' and # `disable-static' LT_INIT options. # DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. m4_define([_LT_ENABLE_STATIC], [m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl AC_ARG_ENABLE([static], [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], [p=${PACKAGE-default} case $enableval in yes) enable_static=yes ;; no) enable_static=no ;; *) enable_static=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_static=yes fi done IFS="$lt_save_ifs" ;; esac], [enable_static=]_LT_ENABLE_STATIC_DEFAULT) _LT_DECL([build_old_libs], [enable_static], [0], [Whether or not to build static libraries]) ])# _LT_ENABLE_STATIC LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) # Old names: AC_DEFUN([AC_ENABLE_STATIC], [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) ]) AC_DEFUN([AC_DISABLE_STATIC], [_LT_SET_OPTION([LT_INIT], [disable-static]) ]) AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AM_ENABLE_STATIC], []) dnl AC_DEFUN([AM_DISABLE_STATIC], []) # _LT_ENABLE_FAST_INSTALL([DEFAULT]) # ---------------------------------- # implement the --enable-fast-install flag, and support the `fast-install' # and `disable-fast-install' LT_INIT options. # DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. m4_define([_LT_ENABLE_FAST_INSTALL], [m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl AC_ARG_ENABLE([fast-install], [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], [p=${PACKAGE-default} case $enableval in yes) enable_fast_install=yes ;; no) enable_fast_install=no ;; *) enable_fast_install=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_fast_install=yes fi done IFS="$lt_save_ifs" ;; esac], [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) _LT_DECL([fast_install], [enable_fast_install], [0], [Whether or not to optimize for fast installation])dnl ])# _LT_ENABLE_FAST_INSTALL LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) # Old names: AU_DEFUN([AC_ENABLE_FAST_INSTALL], [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) AC_DIAGNOSE([obsolete], [$0: Remove this warning and the call to _LT_SET_OPTION when you put the `fast-install' option into LT_INIT's first parameter.]) ]) AU_DEFUN([AC_DISABLE_FAST_INSTALL], [_LT_SET_OPTION([LT_INIT], [disable-fast-install]) AC_DIAGNOSE([obsolete], [$0: Remove this warning and the call to _LT_SET_OPTION when you put the `disable-fast-install' option into LT_INIT's first parameter.]) ]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) # _LT_WITH_PIC([MODE]) # -------------------- # implement the --with-pic flag, and support the `pic-only' and `no-pic' # LT_INIT options. # MODE is either `yes' or `no'. If omitted, it defaults to `both'. m4_define([_LT_WITH_PIC], [AC_ARG_WITH([pic], [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@], [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], [lt_p=${PACKAGE-default} case $withval in yes|no) pic_mode=$withval ;; *) pic_mode=default # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for lt_pkg in $withval; do IFS="$lt_save_ifs" if test "X$lt_pkg" = "X$lt_p"; then pic_mode=yes fi done IFS="$lt_save_ifs" ;; esac], [pic_mode=default]) test -z "$pic_mode" && pic_mode=m4_default([$1], [default]) _LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl ])# _LT_WITH_PIC LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) # Old name: AU_DEFUN([AC_LIBTOOL_PICMODE], [_LT_SET_OPTION([LT_INIT], [pic-only]) AC_DIAGNOSE([obsolete], [$0: Remove this warning and the call to _LT_SET_OPTION when you put the `pic-only' option into LT_INIT's first parameter.]) ]) dnl aclocal-1.4 backwards compatibility: dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) ## ----------------- ## ## LTDL_INIT Options ## ## ----------------- ## m4_define([_LTDL_MODE], []) LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], [m4_define([_LTDL_MODE], [nonrecursive])]) LT_OPTION_DEFINE([LTDL_INIT], [recursive], [m4_define([_LTDL_MODE], [recursive])]) LT_OPTION_DEFINE([LTDL_INIT], [subproject], [m4_define([_LTDL_MODE], [subproject])]) m4_define([_LTDL_TYPE], []) LT_OPTION_DEFINE([LTDL_INIT], [installable], [m4_define([_LTDL_TYPE], [installable])]) LT_OPTION_DEFINE([LTDL_INIT], [convenience], [m4_define([_LTDL_TYPE], [convenience])]) ecm-6.4.4/m4/ltsugar.m40000644023561000001540000001042412106744307011463 00000000000000# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- # # Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc. # Written by Gary V. Vaughan, 2004 # # This file is free software; the Free Software Foundation gives # unlimited permission to copy and/or distribute it, with or without # modifications, as long as this notice is preserved. # serial 6 ltsugar.m4 # This is to help aclocal find these macros, as it can't see m4_define. AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) # lt_join(SEP, ARG1, [ARG2...]) # ----------------------------- # Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their # associated separator. # Needed until we can rely on m4_join from Autoconf 2.62, since all earlier # versions in m4sugar had bugs. m4_define([lt_join], [m4_if([$#], [1], [], [$#], [2], [[$2]], [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) m4_define([_lt_join], [m4_if([$#$2], [2], [], [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) # lt_car(LIST) # lt_cdr(LIST) # ------------ # Manipulate m4 lists. # These macros are necessary as long as will still need to support # Autoconf-2.59 which quotes differently. m4_define([lt_car], [[$1]]) m4_define([lt_cdr], [m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], [$#], 1, [], [m4_dquote(m4_shift($@))])]) m4_define([lt_unquote], $1) # lt_append(MACRO-NAME, STRING, [SEPARATOR]) # ------------------------------------------ # Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'. # Note that neither SEPARATOR nor STRING are expanded; they are appended # to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). # No SEPARATOR is output if MACRO-NAME was previously undefined (different # than defined and empty). # # This macro is needed until we can rely on Autoconf 2.62, since earlier # versions of m4sugar mistakenly expanded SEPARATOR but not STRING. m4_define([lt_append], [m4_define([$1], m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) # lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) # ---------------------------------------------------------- # Produce a SEP delimited list of all paired combinations of elements of # PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list # has the form PREFIXmINFIXSUFFIXn. # Needed until we can rely on m4_combine added in Autoconf 2.62. m4_define([lt_combine], [m4_if(m4_eval([$# > 3]), [1], [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl [[m4_foreach([_Lt_prefix], [$2], [m4_foreach([_Lt_suffix], ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) # lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) # ----------------------------------------------------------------------- # Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited # by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. m4_define([lt_if_append_uniq], [m4_ifdef([$1], [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], [lt_append([$1], [$2], [$3])$4], [$5])], [lt_append([$1], [$2], [$3])$4])]) # lt_dict_add(DICT, KEY, VALUE) # ----------------------------- m4_define([lt_dict_add], [m4_define([$1($2)], [$3])]) # lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) # -------------------------------------------- m4_define([lt_dict_add_subkey], [m4_define([$1($2:$3)], [$4])]) # lt_dict_fetch(DICT, KEY, [SUBKEY]) # ---------------------------------- m4_define([lt_dict_fetch], [m4_ifval([$3], m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) # lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) # ----------------------------------------------------------------- m4_define([lt_if_dict_fetch], [m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], [$5], [$6])]) # lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) # -------------------------------------------------------------- m4_define([lt_dict_filter], [m4_if([$5], [], [], [lt_join(m4_quote(m4_default([$4], [[, ]])), lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl ]) ecm-6.4.4/configure.in0000644023561000001540000006037212113201454011525 00000000000000m4_define([ECM_VERSION], [6.4.4]) AC_PREREQ([2.57]) AC_INIT([ecm], ECM_VERSION, [ecm-discuss@lists.gforge.inria.fr]) AC_CONFIG_HEADER([config.h]) AC_CONFIG_MACRO_DIR([m4]) GMP_INIT([config.m4]) AM_INIT_AUTOMAKE([1.10]) AM_INIT_AUTOMAKE([ecm], ECM_VERSION) AC_CANONICAL_HOST dnl Copied from MPFR 2.4.2: unset GMP_CFLAGS GMP_CC user_redefine_cc dnl Check if user request his CC and CFLAGS if test -n "$CFLAGS" || test -n "$CC" ; then user_redefine_cc=yes fi dnl the following is required to compile auxi.c according to autoconf 2.61 AC_PROG_EGREP AC_PROG_SED AC_ARG_WITH([gmp], [ --with-gmp=DIR GMP install directory ], [with_gmp_include=$withval/include with_gmp_lib=$withval/lib]) AC_ARG_WITH([gmp_include], [ --with-gmp-include=DIR GMP include directory ], [with_gmp_include=$withval]) AC_ARG_WITH([gmp_lib], [ --with-gmp-lib=DIR GMP lib directory ], [with_gmp_lib=$withval]) AC_ARG_WITH([gwnum], [ --with-gwnum=DIR GWNUM source directory ], [with_gwnum=$withval]) if test x"$with_gmp_include" != "x" then if ! test -d "$with_gmp_include" then AC_MSG_ERROR([Specified GMP include directory "$with_gmp_include" does not exist]) fi CPPFLAGS="-I$with_gmp_include" fi if test x"$with_gmp_lib" != "x" then if ! test -d "$with_gmp_lib" then AC_MSG_ERROR([Specified GMP library directory "$with_gmp_lib" does not exist]) fi fi ############################ # Parse --enable-* options # ############################ dnl Assertions are enabled by default for beta/rc releases. The last parameter dnl of AC_ARG_ENABLE() sets the default value (change also default=...). AC_ARG_ENABLE([assert], [AS_HELP_STRING([--enable-assert], [enable ASSERT checking [[default=no]]])],[],[enable_assert=no]) if test "x$enable_assert" = xyes; then AC_DEFINE([WANT_ASSERT],1,[Define to 1 if you want assertions enabled]) GMP_DEFINE([WANT_ASSERT], 1) fi AC_ARG_ENABLE([shellcmd], [AS_HELP_STRING([--enable-shellcmd], [enable shell command execution [[default=no]]])]) if test "x$enable_shellcmd" = xyes; then AC_DEFINE([WANT_SHELLCMD],1,[Define to 1 if you want shell command execution]) fi AC_ARG_ENABLE([gmp-cflags], [AS_HELP_STRING([--enable-gmp-cflags], [enable importing CFLAGS from gmp.h [[default=yes]]])],[],[enable_gmp_cflags=yes]) AC_ARG_ENABLE([openmp], [AS_HELP_STRING([--enable-openmp], [enable OpenMP multi-threading [[default=no]]])]) AC_ARG_ENABLE([sse2], [AS_HELP_STRING([--enable-sse2], [use SSE2 instructions in NTT code (default=yes for 32-bit x86 systems, if supported)])]) AC_ARG_ENABLE([asm-redc], [AS_HELP_STRING([--enable-asm-redc], [use an asm redc (default=yes on x86_64 and powerpc64, no on others)])]) AC_ARG_ENABLE([memory-debug], [AS_HELP_STRING([--enable-memory-debug], [enable memory debugging [[default=no]]])]) if test "x$enable_memory_debug" = xyes; then AC_DEFINE([MEMORY_DEBUG], 1, [Define to 1 if you want memory debugging]) fi AM_CONDITIONAL([MEMORY_DEBUG], [test "x$enable_memory_debug" = xyes]) AC_ARG_ENABLE([mulredc-svoboda], [AS_HELP_STRING([--enable-mulredc-svoboda], [enable Svoboda mulredc [[default=no]]])]) if test "x$enable_mulredc_svoboda" = xyes; then AC_DEFINE([MULREDC_SVOBODA],1,[Define to 1 if you want Svoboda mulredc]) GMP_DEFINE([MULREDC_SVOBODA], 1) fi dnl Use C language for test programs AC_LANG([C]) dnl Copied from MPFR 2.4.1 and modified dnl We need to guess the C preprocessor instead of using AC_PROG_CPP, dnl since AC_PROG_CPP implies AC_PROG_CC, which chooses a compiler dnl (before we have the chance to get it from gmp.h) and does some dnl checking related to this compiler (such as dependency tracking dnl options); if the compiler changes due to __GMP_CC in gmp.h, one dnl would have incorrect settings. dnl FIXME: Move this in aclocal ? if test "x$user_redefine_cc" = x && test "x$enable_gmp_cflags" = xyes && test "x$cross_compiling" != xyes; then if test "x$GMP_CC$GMP_CFLAGS" = x; then AC_MSG_CHECKING([for CC and CFLAGS in gmp.h]) GMP_CC=__GMP_CC GMP_CFLAGS=__GMP_CFLAGS for cpp in /lib/cpp gcc cc c99 do test $cpp = /lib/cpp || cpp="$cpp -E" echo "Trying to run $cpp" >&AS_MESSAGE_LOG_FD AC_LANG_CONFTEST([AC_LANG_SOURCE([foo])]) if $cpp $CPPFLAGS conftest.$ac_ext > /dev/null 2> /dev/null ; then # Get CC and CFLAGS AC_LANG_CONFTEST([AC_LANG_SOURCE([#include "gmp.h" MPFR_OPTION_CC __GMP_CC MPFR_OPTION_CFLAGS __GMP_CFLAGS])]) echo "Trying to parse gmp.h with: $cpp $CPPFLAGS conftest.$ac_ext" >&AS_MESSAGE_LOG_FD if $cpp $CPPFLAGS conftest.$ac_ext 2> /dev/null > conftest.out; then GMP_CC="`$EGREP MPFR_OPTION_CC conftest.out | $SED -e 's/MPFR_OPTION_CC //g' | $SED -e 's/"//g'`" GMP_CFLAGS="`$EGREP MPFR_OPTION_CFLAGS conftest.out | $SED -e 's/MPFR_OPTION_CFLAGS //g'| $SED -e 's/"//g'`" echo "Success, GMP_CC=$GMP_CC, GMP_CFLAGS=$GMP_CFLAGS" >&AS_MESSAGE_LOG_FD break else echo "Could not parse gmp.h with $cpp" >&AS_MESSAGE_LOG_FD fi else echo "Could not run $cpp" >&AS_MESSAGE_LOG_FD fi done rm -f conftest* if test "x$GMP_CC" = "x__GMP_CC" || test "x$GMP_CFLAGS" = "x__GMP_CFLAGS" ; then AC_MSG_RESULT([no]) GMP_CFLAGS= GMP_CC= else AC_MSG_RESULT([yes CC=$GMP_CC CFLAGS=$GMP_CFLAGS]) fi fi dnl But these variables may be invalid, so we must check them first. dnl Note: we do not use AC_RUN_IFELSE, as it implies AC_PROG_CC. if test "x$GMP_CC$GMP_CFLAGS" != x; then AC_MSG_CHECKING([whether CC=$GMP_CC and CFLAGS=$GMP_CFLAGS works]) AC_LANG_CONFTEST([AC_LANG_SOURCE([int main (void) { return 0; }])]) if $GMP_CC $GMP_CFLAGS -o conftest conftest.$ac_ext 2> /dev/null ; then AC_MSG_RESULT([yes]) CFLAGS=$GMP_CFLAGS CC=$GMP_CC else AC_MSG_RESULT([no, reverting to default]) fi rm -f conftest* fi fi dnl Checks for programs. AC_PROG_CC AM_PROG_AS AM_PROG_CC_C_O # Now that we have decided on CC and CFLAGS, init libtool # Don't make a shared library by default. Enable building a shared library # by specifying "--enable-shared" on the ./configure command line LT_PREREQ([2.2.6]) LT_INIT([disable-shared]) dnl Checks for typedefs, structures, and compiler characteristics. AC_TYPE_INT64_T AC_TYPE_UINT64_T AC_TYPE_LONG_LONG_INT dnl Check if compiler supports "const," if not define it to empty string AC_C_CONST dnl Check if compiler supports "inline," if not define it to dnl __inline__ or __inline or the empty string AC_C_INLINE dnl Check if both time.h and sys/time.h can be included AC_HEADER_TIME dnl Define size_t to something useable if standard headers don't AC_TYPE_SIZE_T dnl If OpenMP is enabled, check which command line parameter (if any) dnl if required to make the compiler enable OpenMP if test "x$enable_openmp" = xyes; then AC_OPENMP fi ######################## # Enable asm redc code # ######################## # If --(en|dis)able-asm-redc not specified, choose default value # Test if asm redc code is available for this cpu. # Point ASMPATH to the correct subdirectory. # asm_redc enabled by default for x86_64 and 64 bit PowerPC if test "x$enable_asm_redc" = x; then case $host in x86_64*-*-* | powerpc-apple-darwin* | powerpc64-*-linux*) enable_asm_redc=yes;; *) enable_asm_redc=no;; esac fi if test "x$enable_asm_redc" = xyes; then case $host in pentium4-*-* | pentium3-*-* | viac7-*-* | i786-*-*) ASMPATH=pentium4;; x86_64*-*-*) # In case GMP has been compiled with a 32-bit ABI... # Use AC_COMPILE_IFELSE instead of AC_PREPROC_IFELSE, otherwise # GMP's CFLAGS doesn't seem to be taken into account. AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ #if defined(__i386__) #error #endif])], [], [AC_MSG_NOTICE([32-bit ABI (i386), disabling asm-redc]) enable_asm_redc=no]) ASMPATH=x86_64;; # warning: with powerpc-apple-darwin* we can have ABI=32 # see bug #10646 on the bug tracker, where config.guess says # powerpc-apple-darwin8.11.0 (this a 64-bit machine, but most applications # are compiled in 32 bits). It works with --disable-asm-redc. powerpc-apple-darwin*) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ #if defined(__ppc__) #error #endif])], [], [AC_MSG_NOTICE([32-bit PowerPC, disabling asm-redc]) enable_asm_redc=no]) ASMPATH=powerpc64;; powerpc64-*-linux*) ECM_INCLUDE([<"$srcdir"/powerpc64/powerpc-defs.m4>]) ASMPATH=powerpc64;; i[[56]]86-*-* | k[[78]]*-*-* | athlon*-*-* | pentiumpro-*-* | \ pentium2-*-* | viac3*-*-* | i686-apple-darwin*) ASMPATH=athlon;; *) AC_MSG_ERROR([[asm redc not available on this machine $host]]);; esac fi if test "x$enable_asm_redc" = xyes; then # do the necessary definitions and includes AC_DEFINE([USE_ASM_REDC],1,[Define to 1 to use asm redc]) test "x$CCAS" != x || CCAS="$CC -c" AC_SUBST([CCAS]) GMP_PROG_M4 GMP_ASM_UNDERSCORE GMP_ASM_TEXT GMP_ASM_GLOBL GMP_ASM_TYPE case $host in *-*-mingw32) GMP_DEFINE([WINDOWS64_ABI], 1) AC_DEFINE([WINDOWS64_ABI], 1,[Define to 1 if x86_64 mulredc*() functions should be called with Windows ABI]);; *) ;; esac case $host in pentium3-*-*) echo "WARNING: Your processor is recognized as Pentium3." echo " The asm code uses SSE2, and therefore it might" echo " fail if your proc is indeed a P3, and not a" echo " Pentium M. If you have compilation problems," echo " consider using --disable-asm-redc." ;; *) esac fi AM_CONDITIONAL([ENABLE_ASM_REDC], [test "x$enable_asm_redc" = xyes]) ############################ # Enable SSE2 instructions # ############################ # Test if we should use SSE2 instructions and if the cpu supports them if test "x$enable_sse2" = "x"; then dnl Default: if we build for Pentium 4, enable SSE2 code for the NTT dnl Some SSE2 enabled cpus are identified as i686, we enables SSE2 dnl for them by default and let the tests below check if it works case $host in pentium4-*-* | viac7-*-* | i686-*-* | i786-*-*) enable_sse2=yes ;; esac fi # If the necessary predefines (__GNUC__ or __ICL, and __i386__) # are not set, SSE2 will never be compiled in, and we switch off # the SSE2 flag if test "x$enable_sse2" = xyes; then AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ #if !defined(__GNUC__) && !defined(__ICL) || !defined(__i386__) #error #IRIXdoesnotexitaterrordirective #endif])], [], dnl Necessary predefines are present. Nothing to do [ dnl Necessary predefines are not present. Switch off SSE2 AC_MSG_NOTICE([Not using GCC or ICC, or not a 32-bit x86. SSE2 disabled]) enable_sse2=no]) fi if test "x$enable_sse2" = xyes; then # See if we need -msse2 to enable SSE2 instructions AC_MSG_CHECKING([for SSE2 support]) m4_define([SSE2_TEST_PROG], [AC_LANG_PROGRAM([], dnl [#if (defined(__GNUC__) || defined(__ICL)) && defined(__i386__) /* On some machines, a program without constraints may pass without -msse2 but those with constraints in spv.c fail, thus we test with constraints here. */ asm volatile ("pmuludq %%xmm2, %%xmm0" : : :"%xmm0"); #else #error #IRIXdoesnotexitaterrordirective #endif])]) AC_COMPILE_IFELSE([SSE2_TEST_PROG], dnl [ dnl SSE2 works, nothing to be done AC_MSG_RESULT([yes])], dnl [ dnl SSE2 does not work, try again with -msse2 OLDCFLAGS="$CFLAGS" CFLAGS="$CFLAGS -msse2" AC_COMPILE_IFELSE([SSE2_TEST_PROG], dnl [ dnl works now, keep CFLAGS like this AC_MSG_RESULT([yes, with -msse2])], dnl [ dnl still didn't work, switch off SSE2 CFLAGS="$OLDCFLAGS" enable_sse2=no AC_MSG_RESULT([not supported, SSE2 disabled]) ])]) fi if test "x$enable_sse2" = xyes; then AC_DEFINE([HAVE_SSE2],1,[Define to 1 to enable SSE2 instructions in NTT code]) fi ######################## # Add GWNUM if desired # ######################## dnl If user wants GWNUM, check if the file exists (either as .a or .lib) if test "x$with_gwnum" != "x"; then if test "x$enable_openmp" = xyes; then AC_MSG_ERROR([Woltman's GWNUM currently cannot be used together with OpenMP]) fi AC_CHECK_FILE([$with_gwnum/gwnum.a], [ AC_DEFINE([HAVE_GWNUM], 1, [Define to 1 if gwnum.a or gwnum.lib exist]) GWLIB="$with_gwnum/gwnum.a -lpthread" CPPFLAGS="$CPPFLAGS -I$with_gwnum" ],[ AC_CHECK_FILE([$with_gwnum/gwnum.lib], [ AC_DEFINE([HAVE_GWNUM], 1, [Define to 1 if gwnum.a or gwnum.lib exist]) GWLIB="$with_gwnum/gwnum.lib -lpthread" CPPFLAGS="$CPPFLAGS -I$with_gwnum" ],[ with_gwnum= AC_MSG_ERROR([Woltman's GWNUM library not found]) ]) ]) fi AM_CONDITIONAL([WITH_GWNUM], [test "x$with_gwnum" != "x"]) dnl Checks for header files. AC_FUNC_ALLOCA AC_HEADER_STDC AC_CHECK_HEADERS([math.h limits.h malloc.h strings.h sys/time.h unistd.h io.h signal.h fcntl.h]) AC_CHECK_HEADERS([windows.h]) AC_CHECK_HEADERS([ctype.h sys/types.h sys/resource.h]) dnl Checks for library functions that are not in GMP AC_FUNC_STRTOD dnl Check functions in the math library AC_CHECK_LIB(m,pow,,AC_MSG_ERROR(required function missing)) AC_CHECK_LIB(m,floor,,AC_MSG_ERROR(required function missing)) AC_CHECK_LIB(m,sqrt,,AC_MSG_ERROR(required function missing)) AC_CHECK_LIB(m,fmod,,AC_MSG_ERROR(required function missing)) AC_CHECK_LIB(m,cos) dnl Check for GSL but don't add it to LIBS, since only rho uses it and dnl we don't want all other binaries to depend on it. dnl If found, pass link flags to Makefile via GSL_LD_FLAGS GSL_LD_FLAGS= dnl Check if "-lgslcblas" works. If yes, check if "-lgsl -lgslcblas" works. dnl If both work, put "-lgsl -lgslcblas" in GSL_LD_FLAGS AC_CHECK_LIB([gslcblas],[cblas_dgemm], dnl [AC_CHECK_LIB([gsl],[gsl_blas_dgemm], dnl [AC_DEFINE([HAVE_LIBGSL],1,[Define to 1 if you have the `gsl' library (-lgsl).]) AC_DEFINE([HAVE_LIBGSLCBLAS],1,[Define to 1 if you have the `gslcblas' library (-lgslcblas).]) GSL_LD_FLAGS="-lgsl -lgslcblas"], [], dnl dnl Here comes the "OTHER-LIBRARIES" field for AC_CHECK_LIB [gsl]: [-lgslcblas])]) AC_CHECK_FUNCS([isascii memset strchr strlen strncasecmp strstr], [], [AC_MSG_ERROR([required function missing])]) AC_CHECK_FUNCS([access unlink], [], [AC_MSG_ERROR([required function missing])]) AC_CHECK_FUNCS([isspace isdigit isxdigit], [], [AC_MSG_ERROR([required function missing])]) AC_CHECK_FUNCS([time ctime], [], [AC_MSG_ERROR([required function missing])]) AC_CHECK_FUNCS([setpriority nice gethostname gettimeofday getrusage memmove signal fcntl fileno]) dnl FIXME: which win32 library contains these functions? dnl AC_CHECK_FUNCS([GetCurrentProcess GetProcessTimes]) AC_CHECK_FUNCS([malloc_usable_size]) dnl If we use GCC and user has not specified his own CFLAGS, dnl add some warning flags, avoiding duplication if test "x$GCC" = xyes && test "x$user_redefine_cc" != xyes; then case $CFLAGS in "-pedantic "* | *" -pedantic "* | *" -pedantic") ;; *) CFLAGS="-pedantic $CFLAGS" esac case $CFLAGS in "-Wundef "* | *" -Wundef "* | *" -Wundef") ;; *) CFLAGS="-Wundef $CFLAGS" esac case $CFLAGS in "-Wall "* | *" -Wall "* | *" -Wall") ;; *) CFLAGS="-Wall $CFLAGS" esac case $CFLAGS in "-W "* | *" -W "* | *" -W") ;; *) CFLAGS="-W $CFLAGS" esac # CFLAGS="-Wall -W -Wundef -pedantic $CFLAGS" fi dnl Find GMP and do some sanity checks dnl Tests concerning the include directories. if test -d "$with_gmp_include"; then dnl AC_CHECK_HEADERS and AC_PREPROC_IFELSE uses CPPFLAGS but not CFLAGS CPPFLAGS="-I$with_gmp_include $CPPFLAGS" else with_gmp_include= fi AC_CHECK_HEADERS([gmp.h], [], AC_MSG_ERROR([required header file missing])) dnl This needs to find only the header file so we can do it here, before dnl we start looking for libgmp.a AC_MSG_CHECKING([for recent GMP]) AC_PREPROC_IFELSE([AC_LANG_SOURCE([[ #include #if (__GNU_MP_VERSION <= 4) #error #IRIXdoesnotexitaterrordirective #endif ]])], [AC_MSG_RESULT(yes)], [AC_MSG_RESULT(no) AC_MSG_ERROR([GMP 5.0.0 or newer is required])] ) dnl Figure out if GMP is MPIR AC_MSG_CHECKING([if GMP is MPIR]) AC_PREPROC_IFELSE([AC_LANG_SOURCE([[ #include #ifndef __MPIR_VERSION #error #IRIXdoesnotexitaterrordirective #endif ]])], [AC_MSG_RESULT(yes) AC_DEFINE([HAVE_MPIR],1,[Define to 1 if GMP is MPIR])], [AC_MSG_RESULT(no)]) dnl Now choose how to link the GMP library. If we can, we'd prefer to dnl link it statically since that makes for faster function calls. To dnl link it statically, we mustn't build a dynamic GMP-ECM library and dnl we need to find the libgmp.a file. At the moment, we only look for dnl it at the path specified by the user (i.e. --with-gmp) but make no dnl attempt to find it in the default system lib directories. dnl If GMP is linked statically, we pass its path/filename to the Makefile dnl via GMPLIB, otherwise -lgmp is passed via GMPLIB. dnl The search path to the dynamic GMP library is added to LDFLAGS, dnl if GMP is not specified by full pathname. GMPLDFLAGS="" if test -d "$with_gmp_lib"; then GMPLDFLAGS="-L$with_gmp_lib" fi GMPLIB="-lgmp" if test "x$enable_shared" != xyes; then if test -r "$with_gmp_lib/libgmp.a"; then GMPLIB="$with_gmp_lib/libgmp.a" dnl Don't need -L flag since we give full path to libgmp.a GMPLDFLAGS="" fi fi AC_SUBST([GMPLIB]) LDFLAGS="$LDFLAGS $GMPLDFLAGS" dnl Test linking against GMP. This tests, for example, that the compiler dnl and GMP agree on the ABI (32 or 64 bit). AC_CHECK_LIB() does not work, dnl as it requires linking the library with -lgmp, whereas we may want dnl to specify it by full pathname as we do in the Makefile AC_MSG_CHECKING([whether we can link against GMP]) LIBS_BACKUP="$LIBS" LIBS="$LIBS $GMPLIB" AC_LINK_IFELSE([AC_LANG_PROGRAM([[ #ifdef HAVE_GMP_H #include #endif]],[[ mpz_t t; mpz_init(t); mpz_clear(t); return 0; ]])], [AC_MSG_RESULT(yes)], [AC_MSG_RESULT(no) AC_MSG_ERROR([Could not link against GMP library.])] ) dnl Check for corresponding 'gmp.h' and libgmp.a dnl This requires running a program linked against GMP, dnl so is done after the link-only test. AC_MSG_CHECKING([if gmp.h version and libgmp version are the same]) AC_RUN_IFELSE([AC_LANG_PROGRAM([[ #include #include #include ]],[[ char buffer[100]; if (__GNU_MP_VERSION == 4 && __GNU_MP_VERSION_MINOR <= 2 && __GNU_MP_VERSION_PATCHLEVEL == 0) sprintf (buffer, "%d.%d", __GNU_MP_VERSION, __GNU_MP_VERSION_MINOR); else sprintf (buffer, "%d.%d.%d", __GNU_MP_VERSION, __GNU_MP_VERSION_MINOR, __GNU_MP_VERSION_PATCHLEVEL); printf ("(%s/%s) ", buffer, gmp_version); return strcmp (buffer, gmp_version); ]])], [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) AC_MSG_ERROR(['gmp.h' and 'libgmp' have different versions, you have to reinstall GMP properly.])], [AC_MSG_RESULT([cross-compiling: cannot test])] ) AC_CHECK_FUNCS([__gmpn_add_nc __gmpn_mod_34lsub1 __gmpn_redc_1 __gmpn_redc_2]) AC_CHECK_FUNCS([__gmpn_mullo_n __gmpn_redc_n __gmpn_preinv_mod_1]) LIBS="$LIBS_BACKUP" dnl Check if the compiler understands some __attribute__ directives AC_MSG_CHECKING([whether compiler knows __attribute__((hot))]) dnl The AC_LANG_WERROR directive causes configure to consider a test dnl compilation unsuccessful if it produced any output on stderr. dnl We use it since unknown __attribute__ only cause a warning, not an dnl error. Unfortunately there seems to be no way to switch it off again, dnl so this test is at the end of the configure script AC_LANG_WERROR AC_COMPILE_IFELSE([AC_LANG_PROGRAM([void foo() __attribute__ ((hot)); void foo() {return;}], [foo()])], [AC_DEFINE([ATTRIBUTE_HOT],[__attribute__ ((hot))], [How to specify hot-spot attribute, if available]) AC_MSG_RESULT([yes]) ], [AC_DEFINE([ATTRIBUTE_HOT],[ ], [How to specify hot-spot attribute, if available]) AC_MSG_RESULT([no]) ]) dnl Check for xsltproc AC_CHECK_PROG([XSLTPROC],[xsltproc],[xsltproc]) if test "x$XSLTPROC" != x; then AC_MSG_CHECKING([for docbook.xsl]) if test "x$XSLDIR" = x; then if test -d "/usr/local/share/sgml/docbook/xsl-stylesheets"; then XSLDIR="/usr/local/share/sgml/docbook/xsl-stylesheets" elif test -d "/usr/share/sgml/docbook/xsl-stylesheets"; then XSLDIR="/usr/share/sgml/docbook/xsl-stylesheets" elif test -d "/usr/local/share/docbook/"; then XSLDIR="/usr/local/share/docbook/" elif test -d "/usr/share/docbook/"; then XSLDIR="/usr/share/docbook/" fi fi if test -r "$XSLDIR/manpages/docbook.xsl"; then AC_MSG_RESULT([yes]) make_manpage="yes" else AC_MSG_RESULT([no]) fi fi AM_CONDITIONAL([MAKE_MANPAGE], [test "x$make_manpage" = xyes]) dnl Check for valgrind. GMP-ECM uses exit code 1 to signal error, dnl so we make valgrind use that code to signal error, too AC_CHECK_PROG([VALGRIND], [valgrind], [valgrind -q --error-exitcode=1]) case $host in athlon*-*-*) config_arch="athlon" ;; x86_64*-*-*) config_arch="athlon64" ;; pentium3-*-*) config_arch="pentium3" ;; pentium4-*-*) config_arch="pentium4" ;; pentium-m-*-*) config_arch="pentium-m" ;; alphaev6*-*-*) config_arch="alpha-ev6" ;; alphaev56*-*-*) config_arch="alpha-ev56" ;; alphaev5*-*-*) config_arch="alpha-ev5" ;; powerpc7450-*-*) config_arch="powerpc7450" ;; powerpc-apple-darwin* | powerpc64-*-*) config_arch="powerpc970" ;; mips64el-*-*) config_arch="mips64el" ;; armv5tel-*-*) config_arch="armv5tel" ;; sparc64-*-*) config_arch="sparc64" ;; ia64-*-*) config_arch="ia64" ;; hppa2.0-*-*) config_arch="hppa2.0" ;; *) config_arch="default" ;; esac # See if this is a Core 2, if we have /proc/cpuinfo core2warn=no if test x"$config_arch" = xathlon64; then if test -f /proc/cpuinfo; then if $EGREP -q "Core\(TM\)2" /proc/cpuinfo; then config_arch=core2 elif $EGREP -q "Core\(TM\) i5" /proc/cpuinfo; then config_arch=corei5 fi; else # No /proc/cpuinfo, tell user about ecm-params.h.core2 core2warn=yes fi; fi # See if this is a Pentium 4, if we have /proc/cpuinfo pentium4warn=no if test x"$config_arch" = xdefault; then if test -f /proc/cpuinfo; then if $EGREP -q "Pentium\(R\) 4" /proc/cpuinfo; then config_arch=pentium4 fi; else # No /proc/cpuinfo, tell user about ecm-params.h.pentium4 pentium4warn=yes fi; fi LIBS="$LIBS $GWLIB" GMP_FINISH AC_CONFIG_FILES([Makefile athlon/Makefile pentium4/Makefile x86_64/Makefile powerpc64/Makefile build.vc10/Makefile build.vc10/assembler/Makefile build.vc10/ecm/Makefile build.vc10/libecm/Makefile build.vc10/tune/Makefile build.vc10/bench_mulredc/Makefile]) AC_CONFIG_LINKS([ecm-params.h:ecm-params.h.$config_arch]) MUL_FFT_PARAMS="mul_fft-params.h.$config_arch" if ! test -f "$MUL_FFT_PARAMS" then MUL_FFT_PARAMS="mul_fft-params.h.default" fi AC_CONFIG_LINKS([mul_fft-params.h:"$MUL_FFT_PARAMS"]) AC_SUBST([XSLDIR]) AC_SUBST([ASMPATH]) AC_SUBST([GSL_LD_FLAGS]) AC_OUTPUT AC_MSG_NOTICE([Configuration:]) AC_MSG_NOTICE([Build for host type $host]) AC_MSG_NOTICE([CC=$CC, CFLAGS=$CFLAGS]) AC_MSG_NOTICE([Linking GMP with $GMPLIB]) if test "x$enable_asm_redc" = xyes; then AC_MSG_NOTICE([Using asm redc code from directory $ASMPATH]) else AC_MSG_NOTICE([Not using asm redc code]) fi if test "x$enable_sse2" = xyes; then AC_MSG_NOTICE([Using SSE2 instructions in NTT code]) else AC_MSG_NOTICE([Not using SSE2 instructions in NTT code]) fi if test "x$with_gwnum" != "x"; then AC_MSG_NOTICE([Linking with George Woltman's GWNUM]) fi if test "x$enable_assert" = xyes; then AC_MSG_NOTICE([Assertions enabled]) else AC_MSG_NOTICE([Assertions disabled]) fi if test "x$enable_shellcmd" = xyes; then AC_MSG_NOTICE([Shell command execution enabled]) else AC_MSG_NOTICE([Shell command execution disabled]) fi if test "x$enable_openmp" = xyes; then AC_MSG_NOTICE([OpenMP enabled]) else AC_MSG_NOTICE([OpenMP disabled]) fi if test "x$enable_memory_debug" = xyes; then AC_MSG_NOTICE([Memory debugging enabled]) else AC_MSG_NOTICE([Memory debugging disabled]) fi if test x"$core2warn" = xyes; then AC_MSG_NOTICE([Your cpu was detected as x86_64; if it is a Core 2, please either use the ecm-params.h.core2 file by executing the commands:]) AC_MSG_NOTICE([rm ecm-params.h]) AC_MSG_NOTICE([ln -s ecm-params.h.core2 ecm-params.h]) AC_MSG_NOTICE([or generate a custom ecm-params.h file for your system as described in INSTALL.]) fi if test x"$pentium4warn" = xyes; then AC_MSG_NOTICE([Your cpu was detected as default; if it is a Pentium 4, please either use the ecm-params.h.pentium4 file by executing the commands:]) AC_MSG_NOTICE([rm ecm-params.h]) AC_MSG_NOTICE([ln -s ecm-params.h.pentium4 ecm-params.h]) AC_MSG_NOTICE([or generate a custom ecm-params.h file for your system as described in INSTALL.]) fi ecm-6.4.4/ecm.xml0000644023561000001540000005751012113417004010502 00000000000000 ECM 1 April 22, 2003 ecm integer factorization using ECM, P-1 or P+1 ecm B1 B2min-B2maxB2 DESCRIPTION ecm is an integer factoring program using the Elliptic Curve Method (ECM), the P-1 method, or the P+1 method. The following sections describe parameters relevant to these algorithms. STEP 1 AND STEP 2 BOUND PARAMETERS B1 B1 is the step 1 bound. It is a mandatory parameter. It can be given either in integer format (for example 3000000) or in floating-point format (3000000.0 or 3e6). The largest possible B1 value is 9007199254740996 for P-1, and ULONG_MAX or 9007199254740996 (whichever is smaller) for ECM and P+1. All primes 2 <= p <= B1 are processed in step 1. B2 B2 is the step 2 bound. It is optional: if omitted, a default value is computed from B1, which should be close to optimal. Like B1, it can be given either in integer or in floating-point format. The largest possible value of B2 is approximately 9e23, but depends on the number of blocks k if you specify the option. All primes B1 <= p <= B2 are processed in step 2. If B2 < B1, no step 2 is performed. B2min-B2max alternatively one may use the B2min-B2max form, which means that all primes B2min <= p <= B2max should be processed. Thus specifying B2 only corresponds to B1-B2. The values of B2min and B2max may be arbitrarily large, but their difference must not exceed approximately 9e23, subject to the number of blocks k. FACTORING METHOD Perform P-1 instead of the default method (ECM). Perform P+1 instead of the default method (ECM). GROUP AND INITIAL POINT PARAMETERS [ECM, P-1, P+1] Use x (arbitrary-precision integer or rational) as initial point. For example, is valid. If not given, x is generated from the sigma value for ECM, or at random for P-1 and P+1. [ECM] Use s (arbitrary-precision integer) as curve generator. If omitted, s is generated at random. [ECM] Use a (arbitrary-precision integer) as curve parameter. If omitted, is it generated from the sigma value. [ECM, P-1, P+1] Multiply the initial point by val, which can any valid expression, possibly containing the special character N as place holder for the current input number. Example: ecm -pp1 -go "N^2-1" 1e6 < composite2000 STEP 2 PARAMETERS [ECM, P-1, P+1] Perform k blocks in step 2. For a given B2 value, increasing k decreases the memory usage of step 2, at the expense of more cpu time. Stores some tables of data in disk files to reduce the amount of memory occupied in step 2, at the expense of disk I/O. Data will be written to files file.1, file.2 etc. Does not work with fast stage 2 for P+1 and P-1. [ECM, P-1] Use x^n for Brent-Suyama's extension ( disables Brent-Suyama's extension). The default polynomial is chosen depending on the method and B2. For P-1 and P+1, disables the fast stage 2. For P-1, n must be even. [ECM, P-1] Use degree-n Dickson's polynomial for Brent-Suyama's extension. For P-1 and P+1, disables the fast stage 2. Like for , n must be even for P-1. Use at most n megabytes of memory in stage 2. Enable or disable the Number-Theoretic Transform code for polynomial arithmetic in stage 2. With NTT, dF is chosen to be a power of 2, and is limited by the number suitable primes that fit in a machine word (which is a limitation only on 32 bit systems). The -no-ntt variant uses more memory, but is faster than NTT with large input numbers. By default, NTT is used for P-1, P+1 and for ECM on numbers of size at most 30 machine words. OUTPUT Quiet mode. Found factorizations are printed on standard output, with factors separated by white spaces, one line per input number (if no factor was found, the input number is simply copied). Verbose mode. More information is printed, more options increase verbosity. With one , the kind of modular multiplication used, initial x0 value, step 2 parameters and progress, and expected curves and time to find factors of different sizes for ECM are printed. With , the A value for ECM and residues at the end of step 1 and step 2 are printed. More print internal data for debugging. Print a time stamp whenever a new ECM curve or P+1 or P-1 run is processed. MODULAR ARITHMETIC OPTIONS Several algorithms are available for modular multiplication. The program tries to find the best one for each input; one can force a given method with the following options. Use GMP's mpz_mod function (sub-quadratic for large inputs, but induces some overhead for small ones). Use Montgomery's multiplication (quadratic version). Usually best method for small input. Use Montgomery's multiplication (sub-quadratic version). Theoretically optimal for large input. Disable special base-2 code (which is used when the input number is a large factor of 2^n+1 or 2^n-1, see ). n Force use of special base-2 code, input number must divide 2^n+1 if n > 0, or 2^|n|-1 if n < 0. FILE I/O The following options enable one to perform step 1 and step 2 separately, either on different machines, at different times, or using different software (in particular, George Woltman's Prime95/mprime program can produce step 1 output suitable for resuming with GMP-ECM). It can also be useful to split step 2 into several runs, using the B2min-B2max option. Take input from file file instead of from standard input. Save result of step 1 in file. If file exists, an error is raised. Example: to perform only step 1 with B1=1000000 on the composite number in the file "c155" and save its result in file "foo", use ecm -save foo 1e6 1 < c155 Like , but appends to existing files. Resume residues from file, reads from standard input if file is "-". Example: to perform step 2 following the above step 1 computation, use ecm -resume foo 1e6 Periodically write the current residue in stage 1 to file. In case of a power failure, etc., the computation can be continued with the option. ecm -chkpnt foo -pm1 1e10 < largenumber.txt LOOP MODE The loop mode (option ) enables one to run several curves on each input number. The following options control its behavior. Perform n runs on each input number (default is one). This option is mainly useful for P+1 (for example with n=3) or for ECM, where n could be set to the expected number of curves to find a d-digit factor with a given step 1 bound. This option is incompatible with . Giving produces an infinite loop until a factor is found. In loop mode, stop when a factor is found; the default is to continue until the cofactor is prime or the specified number of runs are done. Breadth-first processing: in loop mode, run one curve for each input number, then a second curve for each one, and so on. This is the default mode with . Depth-first processing: in loop mode, run n curves for the first number, then n curves for the second one and so on. This is the default mode with standard input. In loop mode, in the second and following runs, output only expressions that have at most n characters. Default is . In loop mode, increment B1 by n after each curve. In loop mode, multiply B1 by a factor depending on n after each curve. Default is one which should be optimal on one machine, while could be used when trying to factor the same number simultaneously on 10 identical machines. SHELL COMMAND EXECUTION These optins allow for executing shell commands to supplement functionality to GMP-ECM. Execute command cmd to test primality if factors and cofactors instead of GMP-ECM's own functions. The number to test is passed via stdin. An exit code of 0 is interpreted as probably prime, a non-zero exit code as composite. Executes command cmd whenever a factor is found by P-1, P+1 or ECM. The input number, factor and cofactor are passed via stdin, each on a line. This could be used i.e. to mail new factors automatically: ecm -faccmd 'mail -s $HOSTNAME found a factor me@myaddress.com' 11e6 < cunningham.in Executes command cmd before each ECM curve, P-1 or P+1 attempt on a number is started. If the exit status of cmd is non-zero, GMP-ECM terminates immediately, otherwise it continues normally. GMP-ECM is stopped while cmd runs, offering a way for letting GMP-ECM sleep for example while the system is otherwise busy. MISCELLANEOUS Run the program in nice mode (below normal priority). Run the program in very nice mode (idle priority). Multiply the default step 2 bound B2 by the floating-point value f. Example: divides the default B2 by 2. Add n seconds to stage 1 time. This is useful to get correct expected time with -v if part of stage 1 was done in another run. Force cofactor output in decimal (even if expressions are used). , Display a short description of ecm usage, parameters and command line options. Prints configuration parameters used for the compilation and exits. INPUT SYNTAX The input numbers can have several forms: Raw decimal numbers like 123456789. Comments can be placed in the file: everything after // is ignored, up to the end of line. Line continuation. If a line ends with a backslash character \, it is considered to continue on the next line. Common arithmetic expressions can be used. Example: 3*5+2^10. Factorial: example 53!. Multi-factorial: example 15!3 means 15*12*9*6*3. Primorial: example 11# means 2*3*5*7*11. Reduced primorial: example 17#5 means 5*7*11*13*17. Functions: currently, the only available function is Phi(x,n). EXIT STATUS The exit status reflects the result of the last ECM curve or P-1/P+1 attempt the program performed. Individual bits signify particular events, specifically: Bit 0 0 if normal program termination, 1 if error occured Bit 1 0 if no proper factor was found, 1 otherwise Bit 2 0 if factor is composite, 1 if factor is a probable prime Bit 3 0 if cofactor is composite, 1 if cofactor is a probable prime Thus, the following exit status values may occur: 0 Normal program termination, no factor found 1 Error 2 Composite factor found, cofactor is composite 6 Probable prime factor found, cofactor is composite 8 Input number found 10 Composite factor found, cofactor is a probable prime 14 Probable prime factor found, cofactor is a probable prime BUGS Report bugs to <ecm-discuss@lists.gforge.inria.fr>, after checking <http://www.loria.fr/~zimmerma/records/ecmnet.html> for bug fixes or new versions. AUTHORS Pierrick Gaudry <gaudry at lix dot polytechnique dot fr> contributed efficient assembly code for combined mul/redc; Jim Fougeron <jfoug at cox dot net> contributed the expression parser and several command-line options; Laurent Fousse <laurent at komite dot net> contributed the middle product code, the autoconf/automake tools, and is the maintainer of the Debian package; Alexander Kruppa <(lastname)al@loria.fr> contributed estimates for probability of success for ECM, the new P+1 and P-1 stage 2 (with P. L. Montgomery), new AMD64 asm mulredc code, and some other things; Dave Newman <david.(lastname)@jesus.ox.ac.uk> contributed the Kronecker-Schoenhage and NTT multiplication code; Jason S. Papadopoulos contributed a speedup of the NTT code Paul Zimmermann <zimmerma at loria dot fr> is the author of the first version of the program and chief maintainer of GMP-ECM. Note: email addresses have been obscured, the required substitutions should be obvious. ecm-6.4.4/spm.c0000644023561000001540000001566612106741273010177 00000000000000/* spm.c - "small prime modulus" functions to precompute an inverse and a primitive root for a small prime Copyright 2005, 2006, 2008, 2009, 2010, 2012 Dave Newman, Jason Papadopoulos, Paul Zimmermann, Alexander Kruppa. The SP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The SP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the SP Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include "sp.h" /* Returns the exponent of $q$ in the factorisation of $n$ */ static int exponent (const sp_t q, sp_t n) { int i; for (i = 0; n % q == (sp_t) 0; i++, n /= q); return i; } /* Returns i so that ord(a) = q^i. This assumes that ord(a) is indeed a low power of q. */ static int ordpow (const sp_t q, sp_t a, const sp_t sp, const sp_t mul_c) { int i = 0; for (i = 0; a != (sp_t) 1; i++, a = sp_pow (a, q, sp, mul_c)); return i; } /* initialize roots of unity and twiddle factors for one NTT. If successful, returns 1. If unsuccessful, returns 0 (and frees allocated memory) */ static int nttdata_init (const sp_t sp, const sp_t mul_c, const sp_t prim_root, const spv_size_t log2_len, sp_nttdata_t data, spv_size_t breakover) { spv_t r, t; spv_size_t i, j, k; r = data->ntt_roots = (spv_t) sp_aligned_malloc ((log2_len + 1) * sizeof(sp_t)); if (r == NULL) return 0; i = log2_len; r[i] = prim_root; for (i--; (int)i >= 0; i--) r[i] = sp_sqr (r[i+1], sp, mul_c); k = MIN(log2_len, breakover); t = data->twiddle = (spv_t) sp_aligned_malloc (sizeof(sp_t) << k); if (t == NULL) { sp_aligned_free (r); return 0; } data->twiddle_size = 1 << k; for (i = k; i; i--) { sp_t w = r[i]; for (j = t[0] = 1; j < ((spv_size_t) 1 << (i-1)); j++) t[j] = sp_mul (t[j-1], w, sp, mul_c); t += j; } return 1; } static void nttdata_clear(sp_nttdata_t data) { sp_aligned_free(data->ntt_roots); sp_aligned_free(data->twiddle); } /* Compute some constants, including a primitive n'th root of unity. Returns NULL in case of error. k is the number of limbs of the number to factor */ spm_t spm_init (spv_size_t n, sp_t sp, mp_size_t k) { sp_t a, b, bd, sc; spv_size_t q, nc, ntt_power; spm_t spm = (spm_t) malloc (sizeof (__spm_struct)); if (spm == NULL) return NULL; ASSERT (sp % (sp_t) n == (sp_t) 1); spm->sp = sp; sp_reciprocal (spm->mul_c, sp); /* compute spm->invm = -1/p mod B where B = 2^GMP_NUMB_BITS */ a = sp_pow (2, GMP_NUMB_BITS, sp, spm->mul_c); /* a = B mod p */ a = sp_inv (a, sp, spm->mul_c); /* a = 1/B mod p */ /* a = 1/B mod p thus B*a - 1 = invm*p */ a --; b = GMP_NUMB_MASK; #if SP_NUMB_BITS == W_TYPE_SIZE - 2 a = (a << 2) + (b >> (GMP_NUMB_BITS - 2)); b = (b << 2) & GMP_NUMB_MASK; udiv_qrnnd (bd, sc, a, b, sp << 2); #else a = (a << 1) + (b >> (GMP_NUMB_BITS - 1)); b = (b << 1) & GMP_NUMB_MASK; udiv_qrnnd (bd, sc, a, b, sp << 1); #endif spm->invm = bd; /* compute spm->Bpow = B^(k+1) mod p */ spm->Bpow = sp_pow (2, GMP_NUMB_BITS * (k + 1), sp, spm->mul_c); /* find an $n$-th primitive root $a$ of unity $(mod sp)$. */ /* Construct a $b$ whose order $(mod sp)$ is equal to $n$. We try different $a$ values and test if the exponent of $q$ in $ord(a)$ is at least as large as in $n$. If it isn't, we move to another $a$. If it is, we optionally exponentiate to make the exponents equal and test for the remaining $q$'s. We assume that the largest prime dividing $n$ is very small, so no optimizations in factoring n are made. */ a = 2; b = a; nc = n; /* nc is remaining cofactor of n */ q = 2; sc = sp - 1; #ifdef PARI printf ("/* spm_init */ n = %lu; sp = %lu; /* PARI */\n", n, sp); printf ("exponent(a,b) = {local(i); while(b%%a == 0,i++;b/=a); " "return(i)} /* PARI */\n"); #endif for ( ; nc != (spv_size_t) 1; q++) { if (nc % q == (spv_size_t) 0) { const int k = exponent (q, n); /* q^k || n */ sp_t d; int l; #ifdef PARI printf ("exponent(%lu, n) == %d /* PARI */\n", q, k); #endif /* Remove all factors of $q$ from $sp-1$ */ for (d = sp - 1; d % q == (spv_size_t) 0; d /= q); bd = sp_pow (b, d, sp, spm->mul_c); /* Now ord(bd) = q^l, q^l || ord(a) */ l = ordpow (q, bd, sp, spm->mul_c); #ifdef PARI printf ("exponent(%lu, znorder(Mod(%lu, sp))) == %d /* PARI */\n", q, b, l); #endif if (l < k) { /* No good, q appears in ord(a) in a lower power than in n. Try next $a$ */ a++; b = a; nc = n; q = 1; /* Loop increment following "continue" will make q=2 */ sc = sp - 1; continue; } else { /* Reduce the exponent of $q$ in $ord(b)$ until is it equal to that in $n$ */ for ( ; l > k; l--) { #ifdef PARI printf ("Exponentiating %lu by %lu\n", b, q); #endif b = sp_pow (b, q, sp, spm->mul_c); } #ifdef PARI printf ("New b = %lu\n", b); #endif } do {nc /= q;} while (nc % q == 0); /* Divide out all q from nc */ while (sc % q == (sp_t) 0) /* Divide out all q from sc */ sc /= q; } } b = sp_pow (b, sc, sp, spm->mul_c); #ifdef PARI printf ("znorder(Mod(%lu, sp)) == n /* PARI */\n", b, sp, n); #endif /* turn this into a primitive n'th root of unity mod p */ spm->prim_root = b; spm->inv_prim_root = sp_inv (b, sp, spm->mul_c); /* initialize auxiliary data for all supported power-of-2 NTT sizes */ ntt_power = 0; while (1) { if (n & (1 << ntt_power)) break; ntt_power++; } if (!nttdata_init (sp, spm->mul_c, sp_pow (spm->prim_root, n >> ntt_power, sp, spm->mul_c), ntt_power, spm->nttdata, NTT_GFP_TWIDDLE_DIF_BREAKOVER)) goto free_spm; if (!nttdata_init (sp, spm->mul_c, sp_pow (spm->inv_prim_root, n >> ntt_power, sp, spm->mul_c), ntt_power, spm->inttdata, NTT_GFP_TWIDDLE_DIT_BREAKOVER)) goto free_nttdata; spm->scratch = (spv_t) sp_aligned_malloc ( MAX_NTT_BLOCK_SIZE * sizeof(sp_t)); if (spm->scratch == NULL) goto free_inttdata; return spm; free_inttdata: nttdata_clear (spm->inttdata); free_nttdata: nttdata_clear (spm->nttdata); free_spm: free (spm); return NULL; } void spm_clear (spm_t spm) { nttdata_clear (spm->nttdata); nttdata_clear (spm->inttdata); sp_aligned_free (spm->scratch); free (spm); } ecm-6.4.4/tune.c0000644023561000001540000003454112106741273010344 00000000000000/* Tune program for GMP-ECM. Copyright 2003, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Paul Zimmermann, Alexander Kruppa, Dave Newman and Jason Papadopoulos. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include "ecm-gmp.h" #include "ecm-impl.h" /* 250ms, we (probably) don't need any more precision */ #define GRANULARITY 250 #define MAX_LOG2_LEN 18 /* 2 * 131072 */ #define MAX_LEN (1U << max_log2_len) #define MAX_LOG2_MPZSPV_NORMALISE_STRIDE (MIN (12, max_log2_len)) #define M_str "95209938255048826235189575712705128366296557149606415206280987204268594538412191641776798249266895999715600261737863698825644292938050707507901970225804581" #define ELAPSED elltime (__st, cputime () ) #define TUNE_FUNC_START(x) \ double x (size_t n) \ { unsigned int __i, __k = 1; long __st; /* Keep doubling the number of iterations until the timing is at least GRANULARITY */ #define TUNE_FUNC_LOOP(x) \ do { \ do { \ __st = cputime (); \ for (__i = 0; __i < __k; __i++) { x; } \ __k *= 2; \ } while (ELAPSED < GRANULARITY); \ __k /= 2; \ __st = ELAPSED; \ } while (0) #define TUNE_FUNC_END(x) \ if (tune_verbose) \ fprintf (stderr, #x "(%2ld) = %f\n", (long)n, (double) __k / (double) __st); \ return (double) __k / (double) __st; } /* Throughout, each function pointer points to a function * * double f0 (size_t n); * * that runs for at least GRANULARITY ms and then returns the number of * iterations performed per ms. * * X_Y_THRESHOLD denotes the threshold at which to start using Y for X. */ mpz_t M; /* yes, global variables */ gmp_randstate_t gmp_randstate; size_t mp_size; mpzspm_t mpzspm; mpzv_t x, y, z, t; spm_t spm; spv_t spv; mpzspv_t mpzspv; int tune_verbose; int max_log2_len = MAX_LOG2_LEN; int min_log2_len = 3; size_t MPZMOD_THRESHOLD; size_t REDC_THRESHOLD; size_t NTT_GFP_TWIDDLE_DIF_BREAKOVER = MAX_LOG2_LEN; size_t NTT_GFP_TWIDDLE_DIT_BREAKOVER = MAX_LOG2_LEN; size_t MUL_NTT_THRESHOLD; size_t PREREVERTDIVISION_NTT_THRESHOLD; size_t POLYINVERT_NTT_THRESHOLD; size_t POLYEVALT_NTT_THRESHOLD; size_t MPZSPV_NORMALISE_STRIDE = 256; void mpz_quick_random (mpz_t x, mpz_t M, unsigned long b) { mpz_urandomb (x, gmp_randstate, b); if (mpz_cmp (x, M) >= 0) mpz_sub (x, x, M); } double tune_mpres_mul (mp_size_t limbs, int repr) { mpmod_t modulus; mpres_t x, y, z; mpz_t N, p, q; unsigned int __k = 1, __i; long __st; mpz_init (N); mpz_init (p); mpz_init (q); /* No need to generate a probable prime, just ensure N is not divisible by 2 or 3 */ do { mpz_urandomb (N, gmp_randstate, limbs * GMP_NUMB_BITS); while (mpz_gcd_ui (NULL, N, 6) != 1) mpz_add_ui (N, N, 1); } while ((mp_size_t) mpz_size (N) != limbs); if (repr == ECM_MOD_MPZ) mpmod_init_MPZ (modulus, N); else if (repr == ECM_MOD_MODMULN) mpmod_init_MODMULN (modulus, N); else if (repr == ECM_MOD_REDC) mpmod_init_REDC (modulus, N); mpz_urandomm (p, gmp_randstate, N); mpz_urandomm (q, gmp_randstate, N); mpres_init (x, modulus); mpres_init (y, modulus); mpres_init (z, modulus); mpres_set_z (x, p, modulus); mpres_set_z (y, q, modulus); TUNE_FUNC_LOOP (mpres_mul (z, x, y, modulus)); mpres_clear (x, modulus); mpres_clear (y, modulus); mpres_clear (z, modulus); mpmod_clear (modulus); mpz_clear (N); mpz_clear (p); mpz_clear (q); return (double) __k / (double) __st; } double tune_mpres_sqr (mp_size_t limbs, int repr) { mpmod_t modulus; mpres_t x, z; mpz_t N, p; unsigned int __k = 1, __i; long __st; mpz_init (N); mpz_init (p); /* No need to generate a probable prime, just ensure N is not divisible by 2 or 3 */ do { mpz_urandomb (N, gmp_randstate, limbs * GMP_NUMB_BITS); while (mpz_gcd_ui (NULL, N, 6) != 1) mpz_add_ui (N, N, 1); } while ((mp_size_t) mpz_size (N) != limbs); if (repr == ECM_MOD_MPZ) mpmod_init_MPZ (modulus, N); else if (repr == ECM_MOD_MODMULN) mpmod_init_MODMULN (modulus, N); else if (repr == ECM_MOD_REDC) mpmod_init_REDC (modulus, N); mpz_urandomm (p, gmp_randstate, N); mpres_init (x, modulus); mpres_init (z, modulus); mpres_set_z (x, p, modulus); TUNE_FUNC_LOOP (mpres_sqr (z, x, modulus)); mpres_clear (x, modulus); mpres_clear (z, modulus); mpmod_clear (modulus); mpz_clear (N); mpz_clear (p); return (double) __k / (double) __st; } double tune_mpres_mul_mpz (size_t n) { return tune_mpres_mul (n, ECM_MOD_MPZ); } double tune_mpres_mul_modmuln (size_t n) { return tune_mpres_mul (n, ECM_MOD_MODMULN); } double tune_mpres_mul_redc (size_t n) { return tune_mpres_mul (n, ECM_MOD_REDC); } TUNE_FUNC_START (tune_spv_ntt_gfp_dif) NTT_GFP_TWIDDLE_DIF_BREAKOVER = n; TUNE_FUNC_LOOP (spv_ntt_gfp_dif (spv, max_log2_len, spm)); TUNE_FUNC_END (tune_spv_ntt_gfp_dif) TUNE_FUNC_START (tune_spv_ntt_gfp_dit) NTT_GFP_TWIDDLE_DIT_BREAKOVER = n; TUNE_FUNC_LOOP (spv_ntt_gfp_dit (spv, max_log2_len, spm)); TUNE_FUNC_END (tune_spv_ntt_gfp_dit_recursive) TUNE_FUNC_START (tune_ntt_mul) MUL_NTT_THRESHOLD = 0; TUNE_FUNC_LOOP (ntt_mul (z, x, y, 1 << n, NULL, 1, mpzspm)); TUNE_FUNC_END (tune_ntt_mul) TUNE_FUNC_START (tune_list_mul) TUNE_FUNC_LOOP (list_mul (z, x, 1 << n, 1, y, 1 << n, 1, t)); TUNE_FUNC_END (tune_list_mul) TUNE_FUNC_START (tune_ntt_PrerevertDivision) PREREVERTDIVISION_NTT_THRESHOLD = 0; TUNE_FUNC_LOOP (ntt_PrerevertDivision (z, x, y, mpzspv, mpzspv, 1 << n, t, mpzspm)); TUNE_FUNC_END (tune_ntt_PrerevertDivision) TUNE_FUNC_START (tune_PrerevertDivision) TUNE_FUNC_LOOP (PrerevertDivision (z, x, y, 1 << n, t, mpzspm->modulus)); TUNE_FUNC_END (tune_PrerevertDivision) TUNE_FUNC_START (tune_ntt_PolyInvert) POLYINVERT_NTT_THRESHOLD = 1 << n; TUNE_FUNC_LOOP (ntt_PolyInvert (z, x, 1 << n, t, mpzspm)); TUNE_FUNC_END (tune_ntt_PolyInvert) TUNE_FUNC_START (tune_PolyInvert) TUNE_FUNC_LOOP (PolyInvert (z, x, 1 << n, t, mpzspm->modulus)); TUNE_FUNC_END (tune_PolyInvert) TUNE_FUNC_START (tune_ntt_polyevalT) unsigned int i; mpzv_t *Tree = (mpzv_t *) malloc ((n + 1) * sizeof (mpzv_t)); if (Tree == NULL) { fprintf (stderr, "Cannot allocate memory in tune_ntt_polyevalT\n"); exit (1); } for (i = 0; i <= n; i++) Tree[i] = x; POLYEVALT_NTT_THRESHOLD = 1 << n; TUNE_FUNC_LOOP (ntt_polyevalT (z, 1 << n, Tree, t, mpzspv, mpzspm, NULL)); free (Tree); TUNE_FUNC_END (tune_ntt_polyevalT) TUNE_FUNC_START (tune_polyevalT) unsigned int i; mpzv_t *Tree = (mpzv_t *) malloc ((n + 1) * sizeof (mpzv_t)); if (Tree == NULL) { fprintf (stderr, "Cannot allocate memory in tune_polyevalT\n"); exit (1); } for (i = 0; i <= n; i++) Tree[i] = x; TUNE_FUNC_LOOP (polyeval_tellegen (z, 1 << n, Tree, t, 3 * (1 << n), x, mpzspm->modulus, NULL)); free (Tree); TUNE_FUNC_END (tune_polyevalT) TUNE_FUNC_START (tune_mpzspv_normalise) MPZSPV_NORMALISE_STRIDE = 1 << n; TUNE_FUNC_LOOP (mpzspv_normalise (mpzspv, 0, 1 << MAX_LOG2_MPZSPV_NORMALISE_STRIDE, mpzspm)); TUNE_FUNC_END (tune_mpzspv_normalise) TUNE_FUNC_START (tune_ecm_mul_lo_n) mp_limb_t rp[2 * MPN_MUL_LO_THRESHOLD]; mp_limb_t xp[MPN_MUL_LO_THRESHOLD]; mp_limb_t yp[MPN_MUL_LO_THRESHOLD]; if (n > 1 && n < (mp_size + 1) / 2) return 0.0; mpn_random (xp, mp_size); mpn_random (yp, mp_size); mpn_mul_lo_threshold[mp_size] = n; TUNE_FUNC_LOOP (ecm_mul_lo_n (rp, xp, yp, mp_size)); TUNE_FUNC_END (tune_ecm_mul_lo_n) /* Return the lowest n with min_n <= n < max_n such that * f1(t) >= f0(t) for all t in [n, n + k), or return max_n if no such * n exists. This function will typically return high values if there * is no 'clean' threshold between f0(n) and f1(n). */ size_t crossover2 (double (*f0)(size_t), double (*f1)(size_t), size_t min_n, size_t max_n, size_t k) { size_t n = min_n; size_t t; while (n < max_n) { for (t = MIN (max_n, n + k); t > n; t--) { if ((f0)(t - 1) > (f1)(t - 1)) break; } if (t == n) return n; n = t; }; return max_n; } /* Assume f0 and f1 are monotone decreasing. Return the first n in the range * [min_n, max_n) for which f1(n) >= f0(n), or return max_n if no such n * exists. We use a bisection algorithm so the function is fast but * may give slightly varied results. */ size_t crossover (double (*f0)(size_t), double (*f1)(size_t), size_t min_n, size_t max_n) { size_t mid_n; #ifdef TUNE_SLOW return crossover2 (f0, f1, min_n, max_n, 1); #endif if (min_n == max_n) return min_n; mid_n = (max_n + min_n) / 2; return ((f0)(mid_n) > (f1)(mid_n)) ? crossover (f0, f1, mid_n + 1, max_n) : crossover (f0, f1, min_n, mid_n); } /* Return the n in the range [min_n, max_n) that maximises f(n). * We make no assumptions about the shape of f(n) and so evaluate * f at every point. */ size_t maximise (double (*f)(size_t), size_t min_n, size_t max_n) { size_t n, best_n = 0; double f_n, f_best_n = -1.0; for (n = min_n; n < max_n; n++) { f_n = f (n); if (f_n > f_best_n) { f_best_n = f_n; best_n = n; } } return best_n; } /* Debugging. Print the value of f0(n) and f1(n) and which is fastest. */ void print_timings (double (*f0)(size_t), double (*f1)(size_t), size_t min_n, size_t max_n) { size_t n; double f0_n, f1_n; for (n = min_n; n < max_n; n++) { f0_n = (f0)(n); f1_n = (f1)(n); printf ("n=%2ld: %8.2f %8.2f (f%d)\n", (long) n, f0_n, f1_n, (f0_n <= f1_n) ? 1 : 0); } } int main (int argc, char **argv) { spv_size_t i; unsigned long b; while (argc > 1) { if (strcmp (argv[1], "-v") == 0) { tune_verbose = 1; argc --; argv ++; } else if (argc > 2 && strcmp (argv[1], "-max_log2_len") == 0) { max_log2_len = atoi (argv[2]); if (max_log2_len < min_log2_len) max_log2_len = min_log2_len; argc -= 2; argv += 2; } else { fprintf (stderr, "Usage: tune [-v] [-max_log2_len nnn]\n"); exit (1); } } gmp_randinit_default (gmp_randstate); mpz_init_set_str (M, M_str, 10); b = (unsigned long) mpz_sizeinbase (M, 2); x = init_list (MAX_LEN); y = init_list (MAX_LEN); z = init_list (MAX_LEN); t = init_list (list_mul_mem (MAX_LEN / 2) + 3 * MAX_LEN / 2); mpzspm = mpzspm_init (MAX_LEN, M); if (mpzspm == NULL) { fprintf (stderr, "Error, cannot allocate memory in mpzspm_init\n"); exit (1); } mpzspv = mpzspv_init (MAX_LEN, mpzspm); if (mpzspv == NULL) { fprintf (stderr, "Error, cannot allocate memory in mpzspv_init\n"); exit (1); } mpzspv_random (mpzspv, 0, MAX_LEN, mpzspm); for (i = 0; i < MAX_LEN; i++) mpz_quick_random (x[i], M, b); for (i = 0; i < MAX_LEN; i++) mpz_quick_random (y[i], M, b); for (i = 0; i < MAX_LEN; i++) mpz_quick_random (z[i], M, b); spm = mpzspm->spm[0]; spv = mpzspv[0]; MPZMOD_THRESHOLD = crossover2 (tune_mpres_mul_modmuln, tune_mpres_mul_mpz, 1, 512, 10); printf ("#define MPZMOD_THRESHOLD %lu\n", (unsigned long) MPZMOD_THRESHOLD); REDC_THRESHOLD = crossover2 (tune_mpres_mul_mpz, tune_mpres_mul_redc, MPZMOD_THRESHOLD, 512, 10); printf ("#define REDC_THRESHOLD %lu\n", (unsigned long) REDC_THRESHOLD); mpn_mul_lo_threshold[0] = 0; mpn_mul_lo_threshold[1] = 0; printf ("#define MPN_MUL_LO_THRESHOLD_TABLE {0, 0"); for (mp_size = 2; mp_size < MPN_MUL_LO_THRESHOLD; mp_size++) { mpn_mul_lo_threshold[mp_size] = maximise (tune_ecm_mul_lo_n, 0, mp_size); printf (", %lu", (unsigned long) mpn_mul_lo_threshold[mp_size]); fflush (stdout); } printf ("}\n"); NTT_GFP_TWIDDLE_DIF_BREAKOVER = maximise (tune_spv_ntt_gfp_dif, min_log2_len, max_log2_len); printf ("#define NTT_GFP_TWIDDLE_DIF_BREAKOVER %lu\n", (unsigned long) NTT_GFP_TWIDDLE_DIF_BREAKOVER); NTT_GFP_TWIDDLE_DIT_BREAKOVER = maximise (tune_spv_ntt_gfp_dit, min_log2_len, max_log2_len); printf ("#define NTT_GFP_TWIDDLE_DIT_BREAKOVER %lu\n", (unsigned long) NTT_GFP_TWIDDLE_DIT_BREAKOVER); MUL_NTT_THRESHOLD = 1 << crossover2 (tune_list_mul, tune_ntt_mul, 1, max_log2_len, 2); printf ("#define MUL_NTT_THRESHOLD %lu\n", (unsigned long) MUL_NTT_THRESHOLD); PREREVERTDIVISION_NTT_THRESHOLD = 1 << crossover2 (tune_PrerevertDivision, tune_ntt_PrerevertDivision, 1, max_log2_len, 2); printf ("#define PREREVERTDIVISION_NTT_THRESHOLD %lu\n", (unsigned long) PREREVERTDIVISION_NTT_THRESHOLD); POLYINVERT_NTT_THRESHOLD = 1 << crossover (tune_PolyInvert, tune_ntt_PolyInvert, 5, max_log2_len); printf ("#define POLYINVERT_NTT_THRESHOLD %lu\n", (unsigned long) POLYINVERT_NTT_THRESHOLD); POLYEVALT_NTT_THRESHOLD = 1 << crossover (tune_polyevalT, tune_ntt_polyevalT, 5, max_log2_len); printf ("#define POLYEVALT_NTT_THRESHOLD %lu\n", (unsigned long) POLYEVALT_NTT_THRESHOLD); MPZSPV_NORMALISE_STRIDE = 1 << maximise (tune_mpzspv_normalise, 1, MAX_LOG2_MPZSPV_NORMALISE_STRIDE); printf ("#define MPZSPV_NORMALISE_STRIDE %lu\n", (unsigned long) MPZSPV_NORMALISE_STRIDE); mpzspv_clear (mpzspv, mpzspm); mpzspm_clear (mpzspm); clear_list (x, MAX_LEN); clear_list (y, MAX_LEN); clear_list (z, MAX_LEN); clear_list (t, list_mul_mem (MAX_LEN / 2) + 3 * MAX_LEN / 2); mpz_clear (M); gmp_randclear (gmp_randstate); return 0; } ecm-6.4.4/INSTALL-ecm0000644023561000001540000002113312111113424010774 00000000000000Instructions to install GMP-ECM: 0) you first need to install the GNU MP (GMP) library. GNU MP is available from . Remark: GNU MP is already installed in most Linux distributions. However it is often an old version, moreover without processor-specific optimizations. If you care about efficiency, be sure to install the latest version of GNU MP, and to compile it for your particular processor. Warning: make sure you have only one version of GMP installed on your system at a given time. Frequently, after compiling GMP from source and installing it without removing the distribution's GMP package, later attempts to build software that uses GMP find the GMP header file from the distribution's GMP package and the library from the newly compiled GMP (or vice versa). GMP-ECM tries to detect this by comparing the version number from header and library; if this test fails, you should remove the obsolete GMP installation. 1) check your configuration with: $ ./configure The configure script accepts several options (see ./configure --help). In particular you can specify the GMP installation directory with: $ ./configure --with-gmp= where /include contains the header file gmp.h, and /lib contains the static or dynamic libraries (libgmp.a, libgmp.so, libgmp.lib). To compile the GMP-ECM library as a shared library, use the --enable-shared parameter for ./configure. Building a shared library is disabled by default. Note: the configure script will first search for a static GMP library, which makes GMP-ECM more efficient. When only a dynamic library is available, make sure to correctly set your dynamic libraries search path (LD_LIBRARY_PATH on Unix systems), otherwise the configure script may fail. Warning: it is recommended to use the same compiler and options as those used to compile GMP, otherwise the compilation may fail, or you may get poor performance. In the GMP build directory, simply type: $ egrep -w '(CC|CFLAGS)' config.log to see which compiler and options were used to build GMP. For example on a Sparc v9 you may have to type: $ ./configure CC=cc CFLAGS="-fast -fns=no -fsimple=1 -xarch=v9" Note 2: On x86, x86-64, and 64 bit PowerPC systems, using GMP-ECM's own modular multiplication code usually gives better performance than the GMP-based functions. On these systems, configure enables it by default. If the system is not identified correctly, you can enable it by adding the command line parameter "--enable-asm-redc" to configure. To disable it, add "--disable-asm-redc". On 32-bit x86 systems that have SSE2 (e.g., Pentium 4, some Celeron, some Sempron, Via C7), use of SSE2 instructions in stage 2 of P-1, P+1, and ECM is enabled by default. You can enable it manually by adding the command line parameter "--enable-sse2" and disable it by adding "--disable-sse2" to ./configure. The SSE2 code is not used in 64-bit builds, regardless of these parameters. Note 3: If you want to use George Woltman's GWNUM library for speeding up factoring base 2 numbers, obtain the source file from (on December 2011 the latest source is source272.zip), build the gwnum library for your operating system, then use $ ./configure --with-gwnum= The directory must include the gwnum.a or gwnum.lib file as well as gwnum.h and related header files. The source file of the gwnum library is available at . 2) compile the program with: $ make This will create the 'libecm.a' library, the 'libecm.so' shared library if --enable-shared was used, the 'ecm' binary file, the 'ecmfactor' binary file (sample use of libecm.a), and 'tune', a tuning program. 3) to check that the program works correctly, type: $ make check This will run several tests for P+1, P-1, ECM. These tests take a few minutes. It should normally end with "All ECM tests are ok." 4) (optional) to tune GMP-ECM, simply type: $ make ecm-params; make See also README ("How to get the best of GMP-ECM?"). Note: if your machine has not enough memory for the tune program, you can run it manually with ./tune -max_log2_len 16 for example (the default is 18). 5) (optional) you can then install the ecm binary and its man page: $ make install By default, installation will be done in /usr/local. You can change with the --prefix option of configure at step 1: $ ./configure --prefix= The ecm binary will go in /bin, its man page in /share/man/man1, the ecm library in /lib, and the corresponding header file in /include. You can also do "make uninstall" to remove those files. 6) If you like GMP-ECM, please help us factoring Cunningham numbers. First download "cunningham.in" on , then perform one ecm test with B1=110e6 on each number of this file: $ ./ecm 110e6 < cunningham.in > cunningham.out & If you find any factor (grep found cunningham.out), please submit it using the report form on . ============================================================================ Known problems: * [reported by Sam Rawlins] with MinGW under Windows XP (32-bit), the compilation fails in spv.c. A fix seems to add -msse2 to CFLAGS. See http://lists.gforge.inria.fr/pipermail/ecm-discuss/2010-June/004077.html * GCC 4.4 might miscompile GMP-ECM on Sparc, see http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45559 for more details. The problem is due in fact to a bug in the Linux kernel. A fix is to use -mcpu=v8 with GCC 4.4 if the bug occurs. ============================================================================ For Windows users: Windows users have two options for building GMP-ECM: (a) the use of a number of Unix on Windows environments, or (b) the use of Microsoft Visual Studio C/C++ 2008. The former is described here while the latter is described in the readme.txt file within the build.vc10 subdirectory. (a) For Windows users with a Unix-like environment: Before you can compile GMP-ECM, you will need a compiler. Several suitable compilers are freely available, for example as part of MinGW, CygWin and Microsoft's Services for Unix (SFU). We recommend MinGW as it is a smaller download than the others and generates binaries that run on any Windows system, even if they don't have MinGW installed themselves. Step-by-step instructions, courtesy of Jes Hansen: 1) Download the current MinGW from http://prdownloads.sf.net/mingw/MinGW-3.1.0-1.exe?download and MSYS from http://prdownloads.sf.net/mingw/MSYS-1.0.10.exe?download 2) Create a folder, for example C:\GNU, and install MinGW (execute the MinGW-3.1.0-1.exe file) into C:\GNU\MinGW 3) Install MSYS (execute the MSYS-1.0.10.exe file) into C:\GNU\msys Now you get an icon on the desktop where you can start the MinSys. Do this, because is creates your home folder. Then exit it again. 4) Download the latest version of GMP (in February 2013 the latest version is 5.1.1, this will be assumed for the rest of this document) in .tar.bz2 format from http://gmplib.org/ and place it in your newly created home folder. The home folder is in C:\GNU\msys\home and has the same name as your Windows login name. 5) Download GMP-ECM (if you do not have it already) from http://ecm.gforge.inria.fr/ and place it in your home folder as well. 6) Start the MinSys up again from the desktop and type tar -xvjf gmp-5.1.1.tar.bz2 cd gmp-5.1.1 ./configure make install cd ~ 7) You are back in your home directory. Now type tar -xvzf ecm-6.4.4.tar.gz cd ecm-6.4.4 ./configure --with-gmp=/usr/local make 8) Four executables should have appeared. The main application is ecm.exe, which can be run from the Windows command line. ============================================================================ In case of a problem, report it to us, with: - the output of the config.log file - the versions of GMP-ECM and GMP used (first output line), for example: GMP-ECM 6.4.4 [configured with GMP 5.1.1, --enable-asm-redc] [P+1] - the detailed input enabling us to reproduce the problem, for example: $ echo 328006342451 | ./ecm -pp1 -x0 5 120 7043 - the output you get. Then send your bug report at . This is a public list, with archives available at . ecm-6.4.4/sets_long.c0000644023561000001540000004405712106741273011371 00000000000000/* Functions for sets of long ints, to factor (Z/NZ)* into a set of sums as described in section 5 of "Improved Stage 2 to $P\pm{}1$ Factoring Algorithms" by Peter L. Montgomery and Alexander Kruppa, ANTS 2008 (8th Algorithmic Number Theory Symposium). Copyright 2007, 2008, 2009, 2012 Alexander Kruppa, Paul Zimmermann. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "config.h" #include "ecm-impl.h" #include #ifdef HAVE_ALLOCA_H #include #endif #ifdef TESTDRIVE #include FILE *ECM_STDOUT, *ECM_STDERR; #endif /***************************************************************** Functions for processing sets A set is a cardinality of unsigned long type and an array of long ints. A set of sets is an unsigned long telling the number of sets, an array that has several sets stored back-to-back. *****************************************************************/ /* Copy a set from "*S" to "*T". Assumes that the sets do not overlap, or that T < S. */ static void set_copy (set_long_t *T, set_long_t *S) { unsigned long i; const unsigned long c = S->card; /* We might overwrite S->card */ T->card = c; for (i = 0UL; i < c; i++) T->elem[i] = S->elem[i]; } /* Exchange two adjacent sets in memory. Since all "elem" arrays are stored in the same chunk of allocated memory, and not in different chunks, we cannot simply swap the "elem" pointers. If the set T has size c and the next has size d, after the swap the set T will have size d and the next will have size c. */ static void set_swap (set_long_t *T) { set_long_t *next, *tmp; next = sets_nextset (T); tmp = alloca (set_sizeof (T->card)); ASSERT(tmp != NULL); set_copy (tmp, T); set_copy (T, next); /* warning: sets_nextset(T) might differ from next, if T and next had different sizes */ set_copy (sets_nextset(T), tmp); } /* Functions for sorting an array of longs */ static inline void swap_long (long *a, long *b) { long t; t = *a; *a = *b; *b = t; } static inline void swapsort_long (long *a, long *b) { if (*a > *b) swap_long (a, b); } void quicksort_long (long *a, unsigned long l) { unsigned long i, j; long pivot; if (l < 2) return; j = l - 1; swapsort_long (a, a+j); if (l == 2) return; i = j / 2; swapsort_long (a, a+i); swapsort_long (a+i, a+j); if (l == 3) return; pivot = a[i]; /* Median of three */ /* Stuff <= pivot goes in first list */ /* Invariant: a[0 ... i-1] <= pivot, a[j+1 ... l-1] > pivot */ for (i = 1; i < j;) if (a[i] > pivot) { for (; a[j] > pivot; j--); if (i < j) swap_long (a+(i++), a+j); } else i++; #ifdef WANT_ASSERT for (j = 0; j < i; j++) ASSERT (a[j] <= pivot); for (j = i; j < l; j++) ASSERT(a[j] > pivot); #endif quicksort_long (a, i); quicksort_long (a + i, l - i); #ifdef WANT_ASSERT for (j = 0; i < l - 1; i++) ASSERT (a[j] <= a[j + 1]); #endif } /* Returns max(S), where S == (Z/\beta Z)* as chosen by sets_get_factored_sorted() */ /* Assumes that S == 0 at recursion entry */ static void sets_max_recurse (mpz_t S, const unsigned long beta) { unsigned long P = beta, p, pk; unsigned int k; if (beta == 1UL) return; p = find_factor (P); k = 1; pk = p; P /= p; while (P % p == 0) { k++; pk *= p; P /= p; /* P*pk=beta is invariant */ } sets_max_recurse (S, P); mpz_mul_ui (S, S, pk); if (p == 2UL && k == 1) mpz_add_ui (S, S, P); else if (p == 2UL) mpz_add_ui (S, S, P * (pk / 2UL - 1UL)); else if (p % 4UL == 1UL) mpz_add_ui (S, S, P * ((pk + p) / 2UL - 2UL)); else if (p % 4UL == 3UL) mpz_add_ui (S, S, P * ((pk - 1UL) / 2UL)); else abort(); } void sets_max (mpz_t S, const unsigned long beta) { mpz_set_ui (S, 0UL); sets_max_recurse (S, beta); } /* Compute the set of sums over the "nr_sets" different sets in "*sets". The value of "add" is added to each element of the set of sums. "*sum" will have {\prod_{S \in "*sets"} #S} entries and must have enough memory allocated. This number of elements in the set of sums is the return value. In case of nr_sets == 0, "add" is written to *sets and 1 is returned. The sets in "*sets" are assumed to be non-empty. If "*sum" is NULL, nothing is written, but the return value is computed correctly. */ static unsigned long sets_sumset_recurse (long *sum, const set_long_t *sets, const unsigned long nr_sets, const long add) { unsigned long i, j = 0UL; if (nr_sets == 0UL) { if (sum != NULL) sum[0] = add; return 1UL; } ASSERT (sets->card > 0UL); for (i = 0UL; i < sets->card; i++) { /* Test for overflow */ ASSERT_ALWAYS (add <= 0 || add + sets->elem[i] > sets->elem[i]); ASSERT_ALWAYS (add >= 0 || add + sets->elem[i] < sets->elem[i]); j += sets_sumset_recurse (sum + j, sets_nextset(sets), nr_sets - 1UL, add + sets->elem[i]); } return j; } void sets_sumset (set_long_t *sum, const sets_long_t *sets) { sum->card = sets_sumset_recurse (sum->elem, sets->sets, sets->nr, 0L); } /* Returns the minimal (if minmax == -1) or maximal (minmax == 1) value in the set of sums over the sets in "*sets". */ void sets_sumset_minmax (mpz_t sum, const sets_long_t *sets, const int minmax) { unsigned long i, nr; const set_long_t *set = sets->sets; long extremum; ASSERT (minmax == 1 || minmax == -1); mpz_set_ui (sum, 0UL); for (nr = 0; nr < sets->nr; nr++) { ASSERT (set->card > 0UL); extremum = set->elem[0]; for (i = 1UL; i < set->card; i++) if ((minmax == -1 && set->elem[i] < extremum) || (minmax == 1 && set->elem[i] > extremum)) extremum = set->elem[i]; if (extremum >= 0) mpz_add_ui (sum, sum, extremum); else mpz_sub_ui (sum, sum, -extremum); set = sets_nextset (set); } return; } /* Store in (**L) arithmetic progressions of prime length whose sumset is k/2*R_n, an arithmetic progression centered at 0 of common difference k and cardinality n. If n is even, k must be as well to ensure integer results. I.e. n = 1: k/2*R_n = {0}, n = 2: k/2*R_n = k/2 * {1, -1}, n = 3: k/2*R_n = k * {-1, 0, 1}, n = 4: k/2*R_n = k/2 * {-3, -1, 1, 3}, n = 5: k/2*R_n = k * {-2, -1, 0, 1, 2} etc. _ADDS_ the size in bytes of the set to "*sets_size" */ static unsigned long sets_factored_Rn2 (set_long_t **L, size_t *sets_size, const long n, const long k) { unsigned long nr = 0UL; long i, m, q, r; size_t size = 0; /* n must be odd, or n and k both even */ ASSERT_ALWAYS(n % 2L == 1L || k % 2L == 0L); ASSERT(L != NULL); m = k; /* The multiplier accumulated so far, init to k */ r = n; /* The remaining cofactor of n */ for (q = 2L; r > 1L; q = (q + 1L) | 1L) /* Find prime factors of n */ { ASSERT (q <= r); while (r % q == 0L) { if (*L != NULL) { /* Add m*R_q/2 to list */ (*L)->card = q; for (i = 0L; i < q; i++) { const long t = m * (2L * i - q + 1L); ASSERT(t % 2L == 0L); (*L)->elem[i] = t / 2L; } *L = sets_nextset (*L); nr++; } size += set_sizeof ((unsigned long) q); /* Multiply this t to multiplier and treat remaining factors of the set */ m *= q; r /= q; } } if (sets_size != NULL) *sets_size += size; return nr; } /* Return a set L of sets M_i so that M_1 + ... + M_k is congruent to (Z/nZ)*, which is the set of residue classes coprime to n. The M_i all have prime cardinality. The size of the set of sets "*L" in bytes is computed and stored in "*sets_size" unless "*sets_size" is NULL. Return the number of sets in L. If L is the NULL pointer, nothing will be stored in L. The correct return value (number of set in L) and "*sets_size" value will still be computed, for example so that the correct amount of space can be allocated and factor_coprimeset() be called again. */ static unsigned long sets_factor_coprime (sets_long_t *sets, size_t *sets_size, const unsigned long n) { unsigned long r, k, nr = 0UL; long p, np; size_t size = sizeof (unsigned long); set_long_t *set = NULL; ASSERT (n > 0UL); if (sets != NULL) set = sets->sets; r = n; while (r > 1UL) { for (p = 2L; r % p > 0L; p++); /* Find smallest prime p that divides r */ for (k = 0UL; r % p == 0UL; k++, r /= p); /* Find p^k || r */ np = n/p; if (p == 2L && k == 1UL) /* Case 2^1. Deal with it before the */ { /* while loop below decreases k. */ if (set != NULL) { set->card = 1UL; set->elem[0] = np; set = sets_nextset (set); } size += set_sizeof (1UL); nr++; } /* If k > 1, do the \sum_{i=1}^{k-1} p^i (Z/pZ) part here. (Z/pZ) is represented by an arithmetic progression of common difference 1 and length p. */ while (k-- > 1UL) { nr += sets_factored_Rn2 (&set, &size, p, np); np /= p; } if (p % 4L == 3L) { /* We can use \hat{S}_p. Factor as {-(p+1)/4, (p+1)/4} + C_{(p-1)/2} */ /* Add the {-(p+1)/4, (p+1)/4} set to L */ nr += sets_factored_Rn2 (&set, &size, 2L, (p + 1L) / 2L * np); /* Add the np / 2 * R_{(p-1)/2} set to L */ nr += sets_factored_Rn2 (&set, &size, (p - 1L) / 2L, np); } else if (p % 4L == 1L) { /* Factor into arithmetic progressions of prime length. R_{p} = {-p+1, -p+3, ..., p-3, p+1}, i.e. R_2 = {-1, 1}, R_3 = {-2, 0, 2}, R_4 = {-3, -1, 1, 3} We have R_{sq} = R_q + q*R_s */ nr += sets_factored_Rn2 (&set, &size, p - 1L, 2L * np); } } if (sets_size != NULL) *sets_size = size; if (sets != NULL) sets->nr = nr; return nr; } /* Sort the sets in F into order of ascending cardinality. Uses a simple Bubble sort. */ static void sets_sort (sets_long_t *sets) { unsigned long i, nr_unsorted, highest_swap; set_long_t *set; /* The last sets->nr - nr_unsorted sets in "*sets" are known to be sorted and each one larger than any of the first nr_unsorted sets in "*sets". */ nr_unsorted = sets->nr; while (nr_unsorted > 1UL) { outputf (OUTPUT_TRACE, "nr_unsorted = %lu. ", nr_unsorted); sets_print (OUTPUT_TRACE, sets); set = sets->sets; highest_swap = 1UL; for (i = 1UL; i < nr_unsorted; i++) { if (set->card > sets_nextset(set)->card) { outputf (OUTPUT_TRACE, "sets_sort: swapping %lu and %lu\n", i - 1, i); set_swap (set); highest_swap = i; } set = sets_nextset (set); } nr_unsorted = highest_swap; } #ifdef WANT_ASSERT set = sets->sets; for (i = 0UL; i + 1UL < sets->nr; i++) { ASSERT(set->card <= sets_nextset (set)->card); set = sets_nextset (set); } #endif } /* Print all the sets in "*sets", formatted as a sum of sets */ void sets_print (const int verbosity, sets_long_t *sets) { unsigned long i, j; set_long_t *set = sets->sets; for (i = 0UL; i < sets->nr; i++) { if (i == 0UL) outputf (verbosity, "{"); else outputf (verbosity, " + {"); ASSERT(set->card > 0UL); outputf (verbosity, "%ld", set->elem[0]); for (j = 1UL; j < set->card; j++) outputf (verbosity, ", %ld", set->elem[j]); outputf (verbosity, "}"); set = sets_nextset (set); } outputf (verbosity, "\n"); } /* Extract sets whose set of sums has cardinality "d". We expect that "d" divides the cardinality of the set of sums of "sets" and that the cardinalities of the sets in "sets" are all prime. The amount of memory in bytes needed to store the extracted sets in "*extracted" is stored at "*extr_size". The number of sets extracted is returned. (If d = p_1 * ... * p_k, the return value is k and "*extr_size" is set_sizeof(p_1) + ... + set_sizeof(p_k).) If "*extracted" is NULL, nothing is written and no sets are removed from "*sets", but "*extr_size" is computed as if they were. */ void sets_extract (sets_long_t *extracted, size_t *extr_size, sets_long_t *sets, const unsigned long d) { unsigned long i, c, remaining_d = d; set_long_t *readfrom, *readnext, *moveto, *extractto = NULL; size_t extracted_size = sizeof (unsigned long); ASSERT_ALWAYS (d > 0UL); if (d == 1UL) { /* d == 1 means we need to extract a set of cardinality 1, which we most likely don't have in "*sets". (FIXME: check for set of cardinality 1?) We return the set containing only zero, which can be added to any set of sets without changing the set of sums */ if (extracted != NULL) { extracted->nr = 1; extractto = extracted->sets; extractto->card = 1UL; extractto->elem[0] = 0L; } if (extr_size != NULL) *extr_size = sizeof (unsigned long) + set_sizeof (1UL); return; } if (extracted != NULL) { extracted->nr = 0UL; extractto = extracted->sets; } /* All sets from *sets are read via *readfrom, and (assuming we actually extract them) are either copied to *extractto to *moveto */ readfrom = moveto = sets->sets; for (i = 0UL; i < sets->nr; i++) { c = readfrom->card; /* readfrom->card may get garbled */ readnext = sets_nextset (readfrom); if (remaining_d % c == 0UL) { if (extracted != NULL) { /* Copy this set to extractto */ set_copy (extractto, readfrom); extractto = sets_nextset (extractto); extracted->nr++; } remaining_d /= c; extracted_size += set_sizeof (c); } else { if (extracted != NULL) { /* Move this set within "*sets", filling the gaps left by extracted sets */ set_copy (moveto, readfrom); moveto = sets_nextset (moveto); } } readfrom = readnext; } ASSERT_ALWAYS (remaining_d == 1UL); if (extr_size != NULL) *extr_size = extracted_size; if (extracted != NULL) sets->nr -= extracted->nr; } sets_long_t * sets_get_factored_sorted (const unsigned long beta) { sets_long_t *sets; size_t size; sets_factor_coprime (NULL, &size, beta); sets = malloc (size); if (sets == NULL) return NULL; sets_factor_coprime (sets, NULL, beta); if (test_verbose (OUTPUT_TRACE)) { outputf (OUTPUT_TRACE, "sets_get_factored_sorted: Factored sets before sorting are "); sets_print (OUTPUT_TRACE, sets); } sets_sort (sets); if (test_verbose (OUTPUT_TRACE)) { outputf (OUTPUT_TRACE, "Factored sets after sorting are "); sets_print (OUTPUT_TRACE, sets); } return sets; } #ifdef TESTDRIVE static void selftest (const unsigned long beta) { sets_long_t *sets; set_long_t *sumset; unsigned long i, j, phibeta; mpz_t max; ASSERT_ALWAYS (beta > 0); sets = sets_get_factored_sorted (beta); /* Test that the sumset % beta is equal to (Z/betaZ)* % beta */ phibeta = eulerphi (beta); sumset = malloc (set_sizeof (phibeta)); if (sumset == NULL) { fprintf (stderr, "Cannot allocate memory in selftest\n"); exit (1); } sets_sumset (sumset, sets); ASSERT_ALWAYS (sumset->card = phibeta); /* Also test that max (sumset) == sets_max (beta) */ mpz_init (max); sets_max (max, beta); if (phibeta > 0) { long maxelem; maxelem = sumset->elem[0]; for (i = 1; i < phibeta; i++) if (maxelem < sumset->elem[i]) maxelem = sumset->elem[i]; ASSERT_ALWAYS (mpz_cmp_si (max, maxelem) == 0); } else { ASSERT_ALWAYS (mpz_cmp_ui (max, 0UL) == 0); } mpz_clear (max); /* printf ("sumset, before reduction: "); for (i = 0; i < phibeta; i++) printf ("%ld%s", sumset->elem[i], i < phibeta-1 ? ", " : "\n"); */ for (i = 0; i < phibeta; i++) { sumset->elem[i] = (sumset->elem[i] < 0L) ? beta - (long) ((unsigned long) (-sumset->elem[i]) % beta) : (unsigned long) sumset->elem[i] % beta; ASSERT_ALWAYS (sumset->elem[i] >= 0L); ASSERT_ALWAYS (sumset->elem[i] < (long) beta); } /* printf ("sumset, after reduction: "); for (i = 0; i < phibeta; i++) printf ("%ld%s", sumset->elem[i], i < phibeta-1 ? ", " : "\n"); */ quicksort_long (sumset->elem, sumset->card); /* printf ("sumset, after sorting: "); for (i = 0; i < phibeta; i++) printf ("%ld%s", sumset->elem[i], i < phibeta-1 ? ", " : "\n"); */ j = 0; for (i = 1; i < beta; i++) { if (gcd (i, beta) == 1) { if (sumset->elem[j] != (long) i) { printf ("sumset->elem[%ld] = %ld != %ld\n", j, sumset->elem[j], i); abort(); } j++; } } free (sumset); free (sets); } int main (int argc, char **argv) { unsigned long beta; const unsigned long selftest_max = 1000; int loop = 1; ECM_STDOUT = stdout; ECM_STDERR = stderr; if (argc > 1) { beta = atol (argv[1]); loop = 0; } if (!loop) set_verbose (OUTPUT_TRACE); if (!loop) selftest (beta); else { printf ("Testing beta = 1, ..., %lu\n", selftest_max); for (beta = 1; beta < selftest_max; beta++) selftest (beta); } return 0; } #endif ecm-6.4.4/champions.h0000644023561000001540000000131112111113424011327 00000000000000/* champions.h: defines the keepers of Top-10 lists for P-1, P+1, and ECM factors, and the size that is currently needed to enter the Top-10 */ /* people keeping track of champions and corresponding url's: ECM, P-1, P+1 */ static char *champion_keeper[3] = { "Richard Brent ", "Paul Zimmermann ", "Paul Zimmermann "}; static char *champion_url[3] = {"http://wwwmaths.anu.edu.au/~brent/ftp/champs.txt", "http://www.loria.fr/~zimmerma/records/Pminus1.html", "http://www.loria.fr/~zimmerma/records/Pplus1.html"}; /* minimal number of digits to enter the champions table for ECM, P-1, P+1 */ static unsigned int champion_digits[3] = { 70, 54, 48 }; ecm-6.4.4/mpzspm.c0000644023561000001540000002772112106741273010721 00000000000000/* mpzspm.c - "mpz small prime moduli" - pick a set of small primes large enough to represent a mpzv Copyright 2005, 2006, 2007, 2008, 2009, 2010 Dave Newman, Jason Papadopoulos, Paul Zimmermann, Alexander Kruppa. The SP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The SP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the SP Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include /* for printf */ #include #include "sp.h" #include "ecm-impl.h" /* Tables for the maximum possible modulus (in bit size) for different transform lengths l. The modulus is limited by the condition that primes must be p_i == 1 (mod l), and \Prod_i p_i >= 4l (modulus * S)^2, where S=\Sum_i p_i. Hence for each l=2^k, we take the product P and sum S of primes p_i, SP_MIN <= p_i <= SP_MAX and p_i == 1 (mod l), and store floor (log_2 (sqrt (P / (4l S^2)))) in the table. We only consider power-of-two transform lengths <= 2^31 here. Table entries generated with l=2^k;p=1;P=1;S=0;while(p<=SP_MAX, if(p>=SP_MIN && isprime(p), S+=p; P*=p); \ p+=l);print(floor (log2 (sqrt (P / (4*l * S^2))))) in Pari/GP for k=9 ... 24. k<9 simply were doubled and rounded down in each step. We curently assume that SP_MIN == 2^(SP_NUMB_BITS-1) and SP_MAX == 2^(SP_NUMB_BITS). */ #if (SP_NUMB_BITS == 30) static unsigned long sp_max_modulus_bits[32] = {0, 380000000, 190000000, 95000000, 48000000, 24000000, 12000000, 6000000, 3000000, 1512786, 756186, 378624, 188661, 93737, 46252, 23342, 11537, 5791, 3070, 1563, 782, 397, 132, 43, 0, 0, 0, 0, 0, 0, 0, 0}; #elif (SP_NUMB_BITS == 31) static unsigned long sp_max_modulus_bits[32] = {0, 750000000, 380000000, 190000000, 95000000, 48000000, 24000000, 12000000, 6000000, 3028766, 1512573, 756200, 379353, 190044, 94870, 47414, 23322, 11620, 5891, 2910, 1340, 578, 228, 106, 60, 30, 0, 0, 0, 0, 0, 0}; #elif (SP_NUMB_BITS == 32) static unsigned long sp_max_modulus_bits[32] = {0, 1520000000, 760000000, 380000000, 190000000, 95000000, 48000000, 24000000, 12000000, 6041939, 3022090, 1509176, 752516, 376924, 190107, 95348, 47601, 24253, 11971, 6162, 3087, 1557, 833, 345, 172, 78, 46, 15, 0, 0, 0, 0}; #elif (SP_NUMB_BITS >= 60) /* There are so many primes, we can do pretty much any modulus with any transform length. I didn't bother computing the actual values. */ static unsigned long sp_max_modulus_bits[32] = {0, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX, ULONG_MAX}; #else #error Table of maximal modulus for transform lengths not defined for this SP_MIN ; #endif /* Returns the largest possible transform length we can do for modulus without running out of primes */ spv_size_t mpzspm_max_len (mpz_t modulus) { int i; size_t b; b = mpz_sizeinbase (modulus, 2); /* b = floor (log_2 (modulus)) + 1 */ /* Transform length 2^k is ok if log2(modulus) <= sp_max_modulus_bits[k] <==> ceil(log2(modulus)) <= sp_max_modulus_bits[k] <==> floor(log_2(modulus)) + 1 <= sp_max_modulus_bits[k] if modulus isn't a power of 2 */ for (i = 0; i < 30; i++) { if (b > sp_max_modulus_bits[i + 1]) break; } return (spv_size_t)1 << i; } /* initialize mpzspm->T such that with m[j] := mpzspm->spm[j]->sp T[0][0] = m[0], ..., T[0][n-1] = m[n-1] ... T[d-1][0] = m[0]*...*m[ceil(n/2)-1], T[d-1][1] = m[ceil(n/2)] * ... * m[n-1] T[d][0] = m[0] * ... * m[n-1] where d = ceil(log(n)/log(2)). If n = 5, T[0]: 1, 1, 1, 1, 1 T[1]: 2, 2, 1 T[2]: 4, 1 */ static void mpzspm_product_tree_init (mpzspm_t mpzspm) { unsigned int d, i, j, oldn; unsigned int n = mpzspm->sp_num; mpzv_t *T; for (i = n, d = 0; i > 1; i = (i + 1) / 2, d ++); if (d <= I0_THRESHOLD) { mpzspm->T = NULL; return; } T = (mpzv_t*) malloc ((d + 1) * sizeof (mpzv_t)); T[0] = (mpzv_t) malloc (n * sizeof (mpz_t)); for (j = 0; j < n; j++) { mpz_init (T[0][j]); mpz_set_sp (T[0][j], mpzspm->spm[j]->sp); } for (i = 1; i <= d; i++) { oldn = n; n = (n + 1) / 2; T[i] = (mpzv_t) malloc (n * sizeof (mpz_t)); for (j = 0; j < n; j++) { mpz_init (T[i][j]); if (2 * j + 1 < oldn) mpz_mul (T[i][j], T[i-1][2*j], T[i-1][2*j+1]); else /* oldn is odd */ mpz_set (T[i][j], T[i-1][2*j]); } } mpzspm->T = T; mpzspm->d = d; } /* This function initializes a mpzspm_t structure which contains the number of small primes, the small primes with associated primitive roots and precomputed data for the CRT to allow convolution products of length up to "max_len" with modulus "modulus". Returns NULL in case of an error. */ mpzspm_t mpzspm_init (spv_size_t max_len, mpz_t modulus) { unsigned int ub, i, j; mpz_t P, S, T, mp, mt; /* mp is p as mpz_t, mt is a temp mpz_t */ sp_t p, a; mpzspm_t mpzspm; long st; st = cputime (); mpzspm = (mpzspm_t) malloc (sizeof (__mpzspm_struct)); if (mpzspm == NULL) return NULL; /* Upper bound for the number of primes we need. * Let minp, maxp denote the min, max permissible prime, * S the sum of p_1, p_2, ..., p_ub, * P the product of p_1, p_2, ..., p_ub/ * * Choose ub s.t. * * ub * log(minp) >= log(4 * max_len * modulus^2 * maxp^4) * * => P >= minp ^ ub >= 4 * max_len * modulus^2 * maxp^4 * >= 4 * max_len * modulus^2 * (ub * maxp)^2 * >= 4 * max_len * modulus^2 * S^2 * * So we need at most ub primes to satisfy this condition. */ ub = (2 + 2 * mpz_sizeinbase (modulus, 2) + ceil_log_2 (max_len) + \ 4 * SP_NUMB_BITS) / (SP_NUMB_BITS - 1); mpzspm->spm = (spm_t *) malloc (ub * sizeof (spm_t)); if (mpzspm->spm == NULL) goto error_clear_mpzspm; mpzspm->sp_num = 0; /* product of primes selected so far */ mpz_init_set_ui (P, 1UL); /* sum of primes selected so far */ mpz_init (S); /* T is len*modulus^2, the upper bound on output coefficients of a convolution */ mpz_init (T); mpz_mul (T, modulus, modulus); mpz_mul_ui (T, T, max_len); mpz_init (mp); mpz_init (mt); /* find primes congruent to 1 mod max_len so we can do * a ntt of size max_len */ /* Find the largest p <= SP_MAX that is p == 1 (mod max_len) */ p = (SP_MAX / (sp_t) max_len) * (sp_t) max_len; if (p == SP_MAX) /* If max_len | SP_MAX, the +1 might cause overflow */ p = p - (sp_t) max_len + (sp_t) 1; else p++; do { while (p >= SP_MIN && p > (sp_t) max_len && !sp_prime(p)) p -= (sp_t) max_len; /* all primes must be in range */ if (p < SP_MIN || p <= (sp_t) max_len) { outputf (OUTPUT_ERROR, "not enough primes == 1 (mod %lu) in interval\n", (unsigned long) max_len); goto error_clear_mpzspm_spm; } mpzspm->spm[mpzspm->sp_num] = spm_init (max_len, p, mpz_size (modulus)); if (mpzspm->spm[mpzspm->sp_num] == NULL) { outputf (OUTPUT_ERROR, "Out of memory in mpzspm_init()\n"); goto error_clear_mpzspm_spm; } mpzspm->sp_num++; mpz_set_sp (mp, p); mpz_mul (P, P, mp); mpz_add (S, S, mp); /* we want P > 4 * max_len * (modulus * S)^2. The S^2 term is due to theorem 3.1 in Bernstein and Sorenson's paper */ mpz_mul (T, S, modulus); mpz_mul (T, T, T); mpz_mul_ui (T, T, max_len); mpz_mul_2exp (T, T, 2UL); p -= (sp_t) max_len; } while (mpz_cmp (P, T) <= 0); outputf (OUTPUT_DEVVERBOSE, "mpzspm_init: finding %u primes took %lums\n", mpzspm->sp_num, cputime() - st); mpz_init_set (mpzspm->modulus, modulus); mpzspm->max_ntt_size = max_len; mpzspm->crt1 = (mpzv_t) malloc (mpzspm->sp_num * sizeof (mpz_t)); mpzspm->crt2 = (mpzv_t) malloc ((mpzspm->sp_num + 2) * sizeof (mpz_t)); mpzspm->crt3 = (spv_t) malloc (mpzspm->sp_num * sizeof (sp_t)); mpzspm->crt4 = (spv_t *) malloc (mpzspm->sp_num * sizeof (spv_t)); mpzspm->crt5 = (spv_t) malloc (mpzspm->sp_num * sizeof (sp_t)); if (mpzspm->crt1 == NULL || mpzspm->crt2 == NULL || mpzspm->crt3 == NULL || mpzspm->crt4 == NULL || mpzspm->crt5 == NULL) { outputf (OUTPUT_ERROR, "Out of memory in mpzspm_init()\n"); goto error_clear_crt; } for (i = 0; i < mpzspm->sp_num; i++) mpzspm->crt4[i] = NULL; for (i = 0; i < mpzspm->sp_num; i++) { mpzspm->crt4[i] = (spv_t) malloc (mpzspm->sp_num * sizeof (sp_t)); if (mpzspm->crt4[i] == NULL) goto error_clear_crt4; } for (i = 0; i < mpzspm->sp_num; i++) { p = mpzspm->spm[i]->sp; mpz_set_sp (mp, p); /* crt3[i] = (P / p)^{-1} mod p */ mpz_fdiv_q (T, P, mp); mpz_fdiv_r (mt, T, mp); a = mpz_get_sp (mt); mpzspm->crt3[i] = sp_inv (a, p, mpzspm->spm[i]->mul_c); /* crt1[i] = (P / p) mod modulus */ mpz_init (mpzspm->crt1[i]); mpz_mod (mpzspm->crt1[i], T, modulus); /* crt4[i][j] = ((P / p[i]) mod modulus) mod p[j] */ for (j = 0; j < mpzspm->sp_num; j++) { mpz_set_sp (mp, mpzspm->spm[j]->sp); mpz_fdiv_r (mt, mpzspm->crt1[i], mp); mpzspm->crt4[j][i] = mpz_get_sp (mt); } /* crt5[i] = (-P mod modulus) mod p */ mpz_mod (T, P, modulus); mpz_sub (T, modulus, T); mpz_set_sp (mp, p); mpz_fdiv_r (mt, T, mp); mpzspm->crt5[i] = mpz_get_sp (mt); } mpz_set_ui (T, 0); for (i = 0; i < mpzspm->sp_num + 2; i++) { mpz_mod (T, T, modulus); mpz_init_set (mpzspm->crt2[i], T); mpz_sub (T, T, P); } mpz_clear (mp); mpz_clear (mt); mpz_clear (P); mpz_clear (S); mpz_clear (T); mpzspm_product_tree_init (mpzspm); outputf (OUTPUT_DEVVERBOSE, "mpzspm_init took %lums\n", cputime() - st); return mpzspm; /* Error cases: free memory we allocated so far */ error_clear_crt4: for (i = 0; i < mpzspm->sp_num; i++) free (mpzspm->crt4[i]); error_clear_crt: free (mpzspm->crt1); free (mpzspm->crt2); free (mpzspm->crt3); free (mpzspm->crt4); free (mpzspm->crt5); error_clear_mpzspm_spm: for (i = 0; i < mpzspm->sp_num; i++) free(mpzspm->spm[i]); free (mpzspm->spm); error_clear_mpzspm: free (mpzspm); return NULL; } /* clear the product tree T */ static void mpzspm_product_tree_clear (mpzspm_t mpzspm) { unsigned int i, j; unsigned int n = mpzspm->sp_num; unsigned int d = mpzspm->d; mpzv_t *T = mpzspm->T; if (T == NULL) /* use the slow method */ return; for (i = 0; i <= d; i++) { for (j = 0; j < n; j++) mpz_clear (T[i][j]); free (T[i]); n = (n + 1) / 2; } free (T); } void mpzspm_clear (mpzspm_t mpzspm) { unsigned int i; mpzspm_product_tree_clear (mpzspm); for (i = 0; i < mpzspm->sp_num; i++) { mpz_clear (mpzspm->crt1[i]); free (mpzspm->crt4[i]); spm_clear (mpzspm->spm[i]); } for (i = 0; i < mpzspm->sp_num + 2; i++) mpz_clear (mpzspm->crt2[i]); free (mpzspm->crt1); free (mpzspm->crt2); free (mpzspm->crt3); free (mpzspm->crt4); free (mpzspm->crt5); mpz_clear (mpzspm->modulus); free (mpzspm->spm); free (mpzspm); } ecm-6.4.4/aclocal.m40000644023561000001540000012125412113353764011065 00000000000000# generated automatically by aclocal 1.11.3 -*- Autoconf -*- # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, # 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, # Inc. # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. m4_ifndef([AC_AUTOCONF_VERSION], [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],, [m4_warning([this file was generated for autoconf 2.69. You have another version of autoconf. It may work, but is not guaranteed to. If you have problems, you may need to regenerate the build system entirely. To do so, use the procedure documented by the package, typically `autoreconf'.])]) # longlong.m4 serial 14 dnl Copyright (C) 1999-2007, 2009-2010 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. dnl From Paul Eggert. # Define HAVE_LONG_LONG_INT if 'long long int' works. # This fixes a bug in Autoconf 2.61, but can be removed once we # assume 2.62 everywhere. # Note: If the type 'long long int' exists but is only 32 bits large # (as on some very old compilers), HAVE_LONG_LONG_INT will not be # defined. In this case you can treat 'long long int' like 'long int'. AC_DEFUN([AC_TYPE_LONG_LONG_INT], [ AC_CACHE_CHECK([for long long int], [ac_cv_type_long_long_int], [AC_LINK_IFELSE( [_AC_TYPE_LONG_LONG_SNIPPET], [dnl This catches a bug in Tandem NonStop Kernel (OSS) cc -O circa 2004. dnl If cross compiling, assume the bug isn't important, since dnl nobody cross compiles for this platform as far as we know. AC_RUN_IFELSE( [AC_LANG_PROGRAM( [[@%:@include @%:@ifndef LLONG_MAX @%:@ define HALF \ (1LL << (sizeof (long long int) * CHAR_BIT - 2)) @%:@ define LLONG_MAX (HALF - 1 + HALF) @%:@endif]], [[long long int n = 1; int i; for (i = 0; ; i++) { long long int m = n << i; if (m >> i != n) return 1; if (LLONG_MAX / 2 < m) break; } return 0;]])], [ac_cv_type_long_long_int=yes], [ac_cv_type_long_long_int=no], [ac_cv_type_long_long_int=yes])], [ac_cv_type_long_long_int=no])]) if test $ac_cv_type_long_long_int = yes; then AC_DEFINE([HAVE_LONG_LONG_INT], [1], [Define to 1 if the system has the type `long long int'.]) fi ]) # Define HAVE_UNSIGNED_LONG_LONG_INT if 'unsigned long long int' works. # This fixes a bug in Autoconf 2.61, but can be removed once we # assume 2.62 everywhere. # Note: If the type 'unsigned long long int' exists but is only 32 bits # large (as on some very old compilers), AC_TYPE_UNSIGNED_LONG_LONG_INT # will not be defined. In this case you can treat 'unsigned long long int' # like 'unsigned long int'. AC_DEFUN([AC_TYPE_UNSIGNED_LONG_LONG_INT], [ AC_CACHE_CHECK([for unsigned long long int], [ac_cv_type_unsigned_long_long_int], [AC_LINK_IFELSE( [_AC_TYPE_LONG_LONG_SNIPPET], [ac_cv_type_unsigned_long_long_int=yes], [ac_cv_type_unsigned_long_long_int=no])]) if test $ac_cv_type_unsigned_long_long_int = yes; then AC_DEFINE([HAVE_UNSIGNED_LONG_LONG_INT], [1], [Define to 1 if the system has the type `unsigned long long int'.]) fi ]) # Expands to a C program that can be used to test for simultaneous support # of 'long long' and 'unsigned long long'. We don't want to say that # 'long long' is available if 'unsigned long long' is not, or vice versa, # because too many programs rely on the symmetry between signed and unsigned # integer types (excluding 'bool'). AC_DEFUN([_AC_TYPE_LONG_LONG_SNIPPET], [ AC_LANG_PROGRAM( [[/* For now, do not test the preprocessor; as of 2007 there are too many implementations with broken preprocessors. Perhaps this can be revisited in 2012. In the meantime, code should not expect #if to work with literals wider than 32 bits. */ /* Test literals. */ long long int ll = 9223372036854775807ll; long long int nll = -9223372036854775807LL; unsigned long long int ull = 18446744073709551615ULL; /* Test constant expressions. */ typedef int a[((-9223372036854775807LL < 0 && 0 < 9223372036854775807ll) ? 1 : -1)]; typedef int b[(18446744073709551615ULL <= (unsigned long long int) -1 ? 1 : -1)]; int i = 63;]], [[/* Test availability of runtime routines for shift and division. */ long long int llmax = 9223372036854775807ll; unsigned long long int ullmax = 18446744073709551615ull; return ((ll << 63) | (ll >> 63) | (ll < i) | (ll > i) | (llmax / ll) | (llmax % ll) | (ull << 63) | (ull >> 63) | (ull << i) | (ull >> i) | (ullmax / ull) | (ullmax % ull));]]) ]) # Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008, 2011 Free Software # Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 1 # AM_AUTOMAKE_VERSION(VERSION) # ---------------------------- # Automake X.Y traces this macro to ensure aclocal.m4 has been # generated from the m4 files accompanying Automake X.Y. # (This private macro should not be called outside this file.) AC_DEFUN([AM_AUTOMAKE_VERSION], [am__api_version='1.11' dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to dnl require some minimum version. Point them to the right macro. m4_if([$1], [1.11.3], [], [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl ]) # _AM_AUTOCONF_VERSION(VERSION) # ----------------------------- # aclocal traces this macro to find the Autoconf version. # This is a private macro too. Using m4_define simplifies # the logic in aclocal, which can simply ignore this definition. m4_define([_AM_AUTOCONF_VERSION], []) # AM_SET_CURRENT_AUTOMAKE_VERSION # ------------------------------- # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], [AM_AUTOMAKE_VERSION([1.11.3])dnl m4_ifndef([AC_AUTOCONF_VERSION], [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) # Figure out how to run the assembler. -*- Autoconf -*- # Copyright (C) 2001, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 5 # AM_PROG_AS # ---------- AC_DEFUN([AM_PROG_AS], [# By default we simply use the C compiler to build assembly code. AC_REQUIRE([AC_PROG_CC]) test "${CCAS+set}" = set || CCAS=$CC test "${CCASFLAGS+set}" = set || CCASFLAGS=$CFLAGS AC_ARG_VAR([CCAS], [assembler compiler command (defaults to CC)]) AC_ARG_VAR([CCASFLAGS], [assembler compiler flags (defaults to CFLAGS)]) _AM_IF_OPTION([no-dependencies],, [_AM_DEPENDENCIES([CCAS])])dnl ]) # AM_AUX_DIR_EXPAND -*- Autoconf -*- # Copyright (C) 2001, 2003, 2005, 2011 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 1 # For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets # $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to # `$srcdir', `$srcdir/..', or `$srcdir/../..'. # # Of course, Automake must honor this variable whenever it calls a # tool from the auxiliary directory. The problem is that $srcdir (and # therefore $ac_aux_dir as well) can be either absolute or relative, # depending on how configure is run. This is pretty annoying, since # it makes $ac_aux_dir quite unusable in subdirectories: in the top # source directory, any form will work fine, but in subdirectories a # relative path needs to be adjusted first. # # $ac_aux_dir/missing # fails when called from a subdirectory if $ac_aux_dir is relative # $top_srcdir/$ac_aux_dir/missing # fails if $ac_aux_dir is absolute, # fails when called from a subdirectory in a VPATH build with # a relative $ac_aux_dir # # The reason of the latter failure is that $top_srcdir and $ac_aux_dir # are both prefixed by $srcdir. In an in-source build this is usually # harmless because $srcdir is `.', but things will broke when you # start a VPATH build or use an absolute $srcdir. # # So we could use something similar to $top_srcdir/$ac_aux_dir/missing, # iff we strip the leading $srcdir from $ac_aux_dir. That would be: # am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` # and then we would define $MISSING as # MISSING="\${SHELL} $am_aux_dir/missing" # This will work as long as MISSING is not called from configure, because # unfortunately $(top_srcdir) has no meaning in configure. # However there are other variables, like CC, which are often used in # configure, and could therefore not use this "fixed" $ac_aux_dir. # # Another solution, used here, is to always expand $ac_aux_dir to an # absolute PATH. The drawback is that using absolute paths prevent a # configured tree to be moved without reconfiguration. AC_DEFUN([AM_AUX_DIR_EXPAND], [dnl Rely on autoconf to set up CDPATH properly. AC_PREREQ([2.50])dnl # expand $ac_aux_dir to an absolute path am_aux_dir=`cd $ac_aux_dir && pwd` ]) # AM_CONDITIONAL -*- Autoconf -*- # Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005, 2006, 2008 # Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 9 # AM_CONDITIONAL(NAME, SHELL-CONDITION) # ------------------------------------- # Define a conditional. AC_DEFUN([AM_CONDITIONAL], [AC_PREREQ(2.52)dnl ifelse([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl AC_SUBST([$1_TRUE])dnl AC_SUBST([$1_FALSE])dnl _AM_SUBST_NOTMAKE([$1_TRUE])dnl _AM_SUBST_NOTMAKE([$1_FALSE])dnl m4_define([_AM_COND_VALUE_$1], [$2])dnl if $2; then $1_TRUE= $1_FALSE='#' else $1_TRUE='#' $1_FALSE= fi AC_CONFIG_COMMANDS_PRE( [if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then AC_MSG_ERROR([[conditional "$1" was never defined. Usually this means the macro was only invoked conditionally.]]) fi])]) # Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009, # 2010, 2011 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 12 # There are a few dirty hacks below to avoid letting `AC_PROG_CC' be # written in clear, in which case automake, when reading aclocal.m4, # will think it sees a *use*, and therefore will trigger all it's # C support machinery. Also note that it means that autoscan, seeing # CC etc. in the Makefile, will ask for an AC_PROG_CC use... # _AM_DEPENDENCIES(NAME) # ---------------------- # See how the compiler implements dependency checking. # NAME is "CC", "CXX", "GCJ", or "OBJC". # We try a few techniques and use that to set a single cache variable. # # We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was # modified to invoke _AM_DEPENDENCIES(CC); we would have a circular # dependency, and given that the user is not expected to run this macro, # just rely on AC_PROG_CC. AC_DEFUN([_AM_DEPENDENCIES], [AC_REQUIRE([AM_SET_DEPDIR])dnl AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl AC_REQUIRE([AM_MAKE_INCLUDE])dnl AC_REQUIRE([AM_DEP_TRACK])dnl ifelse([$1], CC, [depcc="$CC" am_compiler_list=], [$1], CXX, [depcc="$CXX" am_compiler_list=], [$1], OBJC, [depcc="$OBJC" am_compiler_list='gcc3 gcc'], [$1], UPC, [depcc="$UPC" am_compiler_list=], [$1], GCJ, [depcc="$GCJ" am_compiler_list='gcc3 gcc'], [depcc="$$1" am_compiler_list=]) AC_CACHE_CHECK([dependency style of $depcc], [am_cv_$1_dependencies_compiler_type], [if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named `D' -- because `-MD' means `put the output # in D'. rm -rf conftest.dir mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_$1_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` fi am__universal=false m4_case([$1], [CC], [case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac], [CXX], [case " $depcc " in #( *\ -arch\ *\ -arch\ *) am__universal=true ;; esac]) for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with # Solaris 8's {/usr,}/bin/sh. touch sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf # We check with `-c' and `-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle `-M -o', and we need to detect this. Also, some Intel # versions had trouble with output in subdirs am__obj=sub/conftest.${OBJEXT-o} am__minus_obj="-o $am__obj" case $depmode in gcc) # This depmode causes a compiler race in universal mode. test "$am__universal" = false || continue ;; nosideeffect) # after this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; msvc7 | msvc7msys | msvisualcpp | msvcmsys) # This compiler won't grok `-c -o', but also, the minuso test has # not run yet. These depmodes are late enough in the game, and # so weak that their functioning should not be impacted. am__obj=conftest.${OBJEXT-o} am__minus_obj= ;; none) break ;; esac if depmode=$depmode \ source=sub/conftest.c object=$am__obj \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep $am__obj sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_$1_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_$1_dependencies_compiler_type=none fi ]) AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) AM_CONDITIONAL([am__fastdep$1], [ test "x$enable_dependency_tracking" != xno \ && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) ]) # AM_SET_DEPDIR # ------------- # Choose a directory name for dependency files. # This macro is AC_REQUIREd in _AM_DEPENDENCIES AC_DEFUN([AM_SET_DEPDIR], [AC_REQUIRE([AM_SET_LEADING_DOT])dnl AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl ]) # AM_DEP_TRACK # ------------ AC_DEFUN([AM_DEP_TRACK], [AC_ARG_ENABLE(dependency-tracking, [ --disable-dependency-tracking speeds up one-time build --enable-dependency-tracking do not reject slow dependency extractors]) if test "x$enable_dependency_tracking" != xno; then am_depcomp="$ac_aux_dir/depcomp" AMDEPBACKSLASH='\' am__nodep='_no' fi AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) AC_SUBST([AMDEPBACKSLASH])dnl _AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl AC_SUBST([am__nodep])dnl _AM_SUBST_NOTMAKE([am__nodep])dnl ]) # Generate code to set up dependency tracking. -*- Autoconf -*- # Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008 # Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. #serial 5 # _AM_OUTPUT_DEPENDENCY_COMMANDS # ------------------------------ AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], [{ # Autoconf 2.62 quotes --file arguments for eval, but not when files # are listed without --file. Let's play safe and only enable the eval # if we detect the quoting. case $CONFIG_FILES in *\'*) eval set x "$CONFIG_FILES" ;; *) set x $CONFIG_FILES ;; esac shift for mf do # Strip MF so we end up with the name of the file. mf=`echo "$mf" | sed -e 's/:.*$//'` # Check whether this is an Automake generated Makefile or not. # We used to match only the files named `Makefile.in', but # some people rename them; so instead we look at the file content. # Grep'ing the first line is not enough: some people post-process # each Makefile.in and add a new line on top of each file to say so. # Grep'ing the whole file is not good either: AIX grep has a line # limit of 2048, but all sed's we know have understand at least 4000. if sed -n 's,^#.*generated by automake.*,X,p' "$mf" | grep X >/dev/null 2>&1; then dirpart=`AS_DIRNAME("$mf")` else continue fi # Extract the definition of DEPDIR, am__include, and am__quote # from the Makefile without running `make'. DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` test -z "$DEPDIR" && continue am__include=`sed -n 's/^am__include = //p' < "$mf"` test -z "am__include" && continue am__quote=`sed -n 's/^am__quote = //p' < "$mf"` # When using ansi2knr, U may be empty or an underscore; expand it U=`sed -n 's/^U = //p' < "$mf"` # Find all dependency output files, they are included files with # $(DEPDIR) in their names. We invoke sed twice because it is the # simplest approach to changing $(DEPDIR) to its actual value in the # expansion. for file in `sed -n " s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do # Make sure the directory exists. test -f "$dirpart/$file" && continue fdir=`AS_DIRNAME(["$file"])` AS_MKDIR_P([$dirpart/$fdir]) # echo "creating $dirpart/$file" echo '# dummy' > "$dirpart/$file" done done } ])# _AM_OUTPUT_DEPENDENCY_COMMANDS # AM_OUTPUT_DEPENDENCY_COMMANDS # ----------------------------- # This macro should only be invoked once -- use via AC_REQUIRE. # # This code is only required when automatic dependency tracking # is enabled. FIXME. This creates each `.P' file that we will # need in order to bootstrap the dependency handling code. AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], [AC_CONFIG_COMMANDS([depfiles], [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"]) ]) # Do all the work for Automake. -*- Autoconf -*- # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, # 2005, 2006, 2008, 2009 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 16 # This macro actually does too much. Some checks are only needed if # your package does certain things. But this isn't really a big deal. # AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) # AM_INIT_AUTOMAKE([OPTIONS]) # ----------------------------------------------- # The call with PACKAGE and VERSION arguments is the old style # call (pre autoconf-2.50), which is being phased out. PACKAGE # and VERSION should now be passed to AC_INIT and removed from # the call to AM_INIT_AUTOMAKE. # We support both call styles for the transition. After # the next Automake release, Autoconf can make the AC_INIT # arguments mandatory, and then we can depend on a new Autoconf # release and drop the old call support. AC_DEFUN([AM_INIT_AUTOMAKE], [AC_PREREQ([2.62])dnl dnl Autoconf wants to disallow AM_ names. We explicitly allow dnl the ones we care about. m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl AC_REQUIRE([AC_PROG_INSTALL])dnl if test "`cd $srcdir && pwd`" != "`pwd`"; then # Use -I$(srcdir) only when $(srcdir) != ., so that make's output # is not polluted with repeated "-I." AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl # test to see if srcdir already configured if test -f $srcdir/config.status; then AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) fi fi # test whether we have cygpath if test -z "$CYGPATH_W"; then if (cygpath --version) >/dev/null 2>/dev/null; then CYGPATH_W='cygpath -w' else CYGPATH_W=echo fi fi AC_SUBST([CYGPATH_W]) # Define the identity of the package. dnl Distinguish between old-style and new-style calls. m4_ifval([$2], [m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl AC_SUBST([PACKAGE], [$1])dnl AC_SUBST([VERSION], [$2])], [_AM_SET_OPTIONS([$1])dnl dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. m4_if(m4_ifdef([AC_PACKAGE_NAME], 1)m4_ifdef([AC_PACKAGE_VERSION], 1), 11,, [m4_fatal([AC_INIT should be called with package and version arguments])])dnl AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl _AM_IF_OPTION([no-define],, [AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package]) AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl # Some tools Automake needs. AC_REQUIRE([AM_SANITY_CHECK])dnl AC_REQUIRE([AC_ARG_PROGRAM])dnl AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version}) AM_MISSING_PROG(AUTOCONF, autoconf) AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version}) AM_MISSING_PROG(AUTOHEADER, autoheader) AM_MISSING_PROG(MAKEINFO, makeinfo) AC_REQUIRE([AM_PROG_INSTALL_SH])dnl AC_REQUIRE([AM_PROG_INSTALL_STRIP])dnl AC_REQUIRE([AM_PROG_MKDIR_P])dnl # We need awk for the "check" target. The system "awk" is bad on # some platforms. AC_REQUIRE([AC_PROG_AWK])dnl AC_REQUIRE([AC_PROG_MAKE_SET])dnl AC_REQUIRE([AM_SET_LEADING_DOT])dnl _AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], [_AM_PROG_TAR([v7])])]) _AM_IF_OPTION([no-dependencies],, [AC_PROVIDE_IFELSE([AC_PROG_CC], [_AM_DEPENDENCIES(CC)], [define([AC_PROG_CC], defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl AC_PROVIDE_IFELSE([AC_PROG_CXX], [_AM_DEPENDENCIES(CXX)], [define([AC_PROG_CXX], defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl AC_PROVIDE_IFELSE([AC_PROG_OBJC], [_AM_DEPENDENCIES(OBJC)], [define([AC_PROG_OBJC], defn([AC_PROG_OBJC])[_AM_DEPENDENCIES(OBJC)])])dnl ]) _AM_IF_OPTION([silent-rules], [AC_REQUIRE([AM_SILENT_RULES])])dnl dnl The `parallel-tests' driver may need to know about EXEEXT, so add the dnl `am__EXEEXT' conditional if _AM_COMPILER_EXEEXT was seen. This macro dnl is hooked onto _AC_COMPILER_EXEEXT early, see below. AC_CONFIG_COMMANDS_PRE(dnl [m4_provide_if([_AM_COMPILER_EXEEXT], [AM_CONDITIONAL([am__EXEEXT], [test -n "$EXEEXT"])])])dnl ]) dnl Hook into `_AC_COMPILER_EXEEXT' early to learn its expansion. Do not dnl add the conditional right here, as _AC_COMPILER_EXEEXT may be further dnl mangled by Autoconf and run in a shell conditional statement. m4_define([_AC_COMPILER_EXEEXT], m4_defn([_AC_COMPILER_EXEEXT])[m4_provide([_AM_COMPILER_EXEEXT])]) # When config.status generates a header, we must update the stamp-h file. # This file resides in the same directory as the config header # that is generated. The stamp files are numbered to have different names. # Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the # loop where config.status creates the headers, so we can generate # our stamp files there. AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], [# Compute $1's index in $config_headers. _am_arg=$1 _am_stamp_count=1 for _am_header in $config_headers :; do case $_am_header in $_am_arg | $_am_arg:* ) break ;; * ) _am_stamp_count=`expr $_am_stamp_count + 1` ;; esac done echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count]) # Copyright (C) 2001, 2003, 2005, 2008, 2011 Free Software Foundation, # Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 1 # AM_PROG_INSTALL_SH # ------------------ # Define $install_sh. AC_DEFUN([AM_PROG_INSTALL_SH], [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl if test x"${install_sh}" != xset; then case $am_aux_dir in *\ * | *\ *) install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;; *) install_sh="\${SHELL} $am_aux_dir/install-sh" esac fi AC_SUBST(install_sh)]) # Copyright (C) 2003, 2005 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 2 # Check whether the underlying file-system supports filenames # with a leading dot. For instance MS-DOS doesn't. AC_DEFUN([AM_SET_LEADING_DOT], [rm -rf .tst 2>/dev/null mkdir .tst 2>/dev/null if test -d .tst; then am__leading_dot=. else am__leading_dot=_ fi rmdir .tst 2>/dev/null AC_SUBST([am__leading_dot])]) # Check to see how 'make' treats includes. -*- Autoconf -*- # Copyright (C) 2001, 2002, 2003, 2005, 2009 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 4 # AM_MAKE_INCLUDE() # ----------------- # Check to see how make treats includes. AC_DEFUN([AM_MAKE_INCLUDE], [am_make=${MAKE-make} cat > confinc << 'END' am__doit: @echo this is the am__doit target .PHONY: am__doit END # If we don't find an include directive, just comment out the code. AC_MSG_CHECKING([for style of include used by $am_make]) am__include="#" am__quote= _am_result=none # First try GNU make style include. echo "include confinc" > confmf # Ignore all kinds of additional output from `make'. case `$am_make -s -f confmf 2> /dev/null` in #( *the\ am__doit\ target*) am__include=include am__quote= _am_result=GNU ;; esac # Now try BSD make style include. if test "$am__include" = "#"; then echo '.include "confinc"' > confmf case `$am_make -s -f confmf 2> /dev/null` in #( *the\ am__doit\ target*) am__include=.include am__quote="\"" _am_result=BSD ;; esac fi AC_SUBST([am__include]) AC_SUBST([am__quote]) AC_MSG_RESULT([$_am_result]) rm -f confinc confmf ]) # Copyright (C) 1999, 2000, 2001, 2003, 2004, 2005, 2008 # Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 6 # AM_PROG_CC_C_O # -------------- # Like AC_PROG_CC_C_O, but changed for automake. AC_DEFUN([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC_C_O])dnl AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl AC_REQUIRE_AUX_FILE([compile])dnl # FIXME: we rely on the cache variable name because # there is no other way. set dummy $CC am_cc=`echo $[2] | sed ['s/[^a-zA-Z0-9_]/_/g;s/^[0-9]/_/']` eval am_t=\$ac_cv_prog_cc_${am_cc}_c_o if test "$am_t" != yes; then # Losing compiler, so override with the script. # FIXME: It is wrong to rewrite CC. # But if we don't then we get into trouble of one sort or another. # A longer-term fix would be to have automake use am__CC in this case, # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)" CC="$am_aux_dir/compile $CC" fi dnl Make sure AC_PROG_CC is never called again, or it will override our dnl setting of CC. m4_define([AC_PROG_CC], [m4_fatal([AC_PROG_CC cannot be called after AM_PROG_CC_C_O])]) ]) # Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- # Copyright (C) 1997, 1999, 2000, 2001, 2003, 2004, 2005, 2008 # Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 6 # AM_MISSING_PROG(NAME, PROGRAM) # ------------------------------ AC_DEFUN([AM_MISSING_PROG], [AC_REQUIRE([AM_MISSING_HAS_RUN]) $1=${$1-"${am_missing_run}$2"} AC_SUBST($1)]) # AM_MISSING_HAS_RUN # ------------------ # Define MISSING if not defined so far and test if it supports --run. # If it does, set am_missing_run to use it, otherwise, to nothing. AC_DEFUN([AM_MISSING_HAS_RUN], [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl AC_REQUIRE_AUX_FILE([missing])dnl if test x"${MISSING+set}" != xset; then case $am_aux_dir in *\ * | *\ *) MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; *) MISSING="\${SHELL} $am_aux_dir/missing" ;; esac fi # Use eval to expand $SHELL if eval "$MISSING --run true"; then am_missing_run="$MISSING --run " else am_missing_run= AC_MSG_WARN([`missing' script is too old or missing]) fi ]) # Copyright (C) 2003, 2004, 2005, 2006, 2011 Free Software Foundation, # Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 1 # AM_PROG_MKDIR_P # --------------- # Check for `mkdir -p'. AC_DEFUN([AM_PROG_MKDIR_P], [AC_PREREQ([2.60])dnl AC_REQUIRE([AC_PROG_MKDIR_P])dnl dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P, dnl while keeping a definition of mkdir_p for backward compatibility. dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile. dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of dnl Makefile.ins that do not define MKDIR_P, so we do our own dnl adjustment using top_builddir (which is defined more often than dnl MKDIR_P). AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl case $mkdir_p in [[\\/$]]* | ?:[[\\/]]*) ;; */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;; esac ]) # Helper functions for option handling. -*- Autoconf -*- # Copyright (C) 2001, 2002, 2003, 2005, 2008, 2010 Free Software # Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 5 # _AM_MANGLE_OPTION(NAME) # ----------------------- AC_DEFUN([_AM_MANGLE_OPTION], [[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) # _AM_SET_OPTION(NAME) # -------------------- # Set option NAME. Presently that only means defining a flag for this option. AC_DEFUN([_AM_SET_OPTION], [m4_define(_AM_MANGLE_OPTION([$1]), 1)]) # _AM_SET_OPTIONS(OPTIONS) # ------------------------ # OPTIONS is a space-separated list of Automake options. AC_DEFUN([_AM_SET_OPTIONS], [m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) # _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) # ------------------------------------------- # Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. AC_DEFUN([_AM_IF_OPTION], [m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) # Check to make sure that the build environment is sane. -*- Autoconf -*- # Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005, 2008 # Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 5 # AM_SANITY_CHECK # --------------- AC_DEFUN([AM_SANITY_CHECK], [AC_MSG_CHECKING([whether build environment is sane]) # Just in case sleep 1 echo timestamp > conftest.file # Reject unsafe characters in $srcdir or the absolute working directory # name. Accept space and tab only in the latter. am_lf=' ' case `pwd` in *[[\\\"\#\$\&\'\`$am_lf]]*) AC_MSG_ERROR([unsafe absolute working directory name]);; esac case $srcdir in *[[\\\"\#\$\&\'\`$am_lf\ \ ]]*) AC_MSG_ERROR([unsafe srcdir value: `$srcdir']);; esac # Do `set' in a subshell so we don't clobber the current shell's # arguments. Must try -L first in case configure is actually a # symlink; some systems play weird games with the mod time of symlinks # (eg FreeBSD returns the mod time of the symlink's containing # directory). if ( set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null` if test "$[*]" = "X"; then # -L didn't work. set X `ls -t "$srcdir/configure" conftest.file` fi rm -f conftest.file if test "$[*]" != "X $srcdir/configure conftest.file" \ && test "$[*]" != "X conftest.file $srcdir/configure"; then # If neither matched, then we have a broken ls. This can happen # if, for instance, CONFIG_SHELL is bash and it inherits a # broken ls alias from the environment. This has actually # happened. Such a system could not be considered "sane". AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken alias in your environment]) fi test "$[2]" = conftest.file ) then # Ok. : else AC_MSG_ERROR([newly created file is older than distributed files! Check your system clock]) fi AC_MSG_RESULT(yes)]) # Copyright (C) 2001, 2003, 2005, 2011 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 1 # AM_PROG_INSTALL_STRIP # --------------------- # One issue with vendor `install' (even GNU) is that you can't # specify the program used to strip binaries. This is especially # annoying in cross-compiling environments, where the build's strip # is unlikely to handle the host's binaries. # Fortunately install-sh will honor a STRIPPROG variable, so we # always use install-sh in `make install-strip', and initialize # STRIPPROG with the value of the STRIP variable (set by the user). AC_DEFUN([AM_PROG_INSTALL_STRIP], [AC_REQUIRE([AM_PROG_INSTALL_SH])dnl # Installed binaries are usually stripped using `strip' when the user # run `make install-strip'. However `strip' might not be the right # tool to use in cross-compilation environments, therefore Automake # will honor the `STRIP' environment variable to overrule this program. dnl Don't test for $cross_compiling = yes, because it might be `maybe'. if test "$cross_compiling" != no; then AC_CHECK_TOOL([STRIP], [strip], :) fi INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" AC_SUBST([INSTALL_STRIP_PROGRAM])]) # Copyright (C) 2006, 2008, 2010 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 3 # _AM_SUBST_NOTMAKE(VARIABLE) # --------------------------- # Prevent Automake from outputting VARIABLE = @VARIABLE@ in Makefile.in. # This macro is traced by Automake. AC_DEFUN([_AM_SUBST_NOTMAKE]) # AM_SUBST_NOTMAKE(VARIABLE) # -------------------------- # Public sister of _AM_SUBST_NOTMAKE. AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)]) # Check how to create a tarball. -*- Autoconf -*- # Copyright (C) 2004, 2005, 2012 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 2 # _AM_PROG_TAR(FORMAT) # -------------------- # Check how to create a tarball in format FORMAT. # FORMAT should be one of `v7', `ustar', or `pax'. # # Substitute a variable $(am__tar) that is a command # writing to stdout a FORMAT-tarball containing the directory # $tardir. # tardir=directory && $(am__tar) > result.tar # # Substitute a variable $(am__untar) that extract such # a tarball read from stdin. # $(am__untar) < result.tar AC_DEFUN([_AM_PROG_TAR], [# Always define AMTAR for backward compatibility. Yes, it's still used # in the wild :-( We should find a proper way to deprecate it ... AC_SUBST([AMTAR], ['$${TAR-tar}']) m4_if([$1], [v7], [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'], [m4_case([$1], [ustar],, [pax],, [m4_fatal([Unknown tar format])]) AC_MSG_CHECKING([how to create a $1 tar archive]) # Loop over all known methods to create a tar archive until one works. _am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' _am_tools=${am_cv_prog_tar_$1-$_am_tools} # Do not fold the above two line into one, because Tru64 sh and # Solaris sh will not grok spaces in the rhs of `-'. for _am_tool in $_am_tools do case $_am_tool in gnutar) for _am_tar in tar gnutar gtar; do AM_RUN_LOG([$_am_tar --version]) && break done am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' am__untar="$_am_tar -xf -" ;; plaintar) # Must skip GNU tar: if it does not support --format= it doesn't create # ustar tarball either. (tar --version) >/dev/null 2>&1 && continue am__tar='tar chf - "$$tardir"' am__tar_='tar chf - "$tardir"' am__untar='tar xf -' ;; pax) am__tar='pax -L -x $1 -w "$$tardir"' am__tar_='pax -L -x $1 -w "$tardir"' am__untar='pax -r' ;; cpio) am__tar='find "$$tardir" -print | cpio -o -H $1 -L' am__tar_='find "$tardir" -print | cpio -o -H $1 -L' am__untar='cpio -i -H $1 -d' ;; none) am__tar=false am__tar_=false am__untar=false ;; esac # If the value was cached, stop now. We just wanted to have am__tar # and am__untar set. test -n "${am_cv_prog_tar_$1}" && break # tar/untar a dummy directory, and stop if the command works rm -rf conftest.dir mkdir conftest.dir echo GrepMe > conftest.dir/file AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) rm -rf conftest.dir if test -s conftest.tar; then AM_RUN_LOG([$am__untar /dev/null 2>&1 && break fi done rm -rf conftest.dir AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) AC_MSG_RESULT([$am_cv_prog_tar_$1])]) AC_SUBST([am__tar]) AC_SUBST([am__untar]) ]) # _AM_PROG_TAR m4_include([m4/libtool.m4]) m4_include([m4/ltoptions.m4]) m4_include([m4/ltsugar.m4]) m4_include([m4/ltversion.m4]) m4_include([m4/lt~obsolete.m4]) m4_include([acinclude.m4]) ecm-6.4.4/stage2.c0000644023561000001540000007015412106741273010556 00000000000000/* Common stage 2 for ECM, P-1 and P+1 (improved standard continuation with subquadratic polynomial arithmetic). Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Paul Zimmermann, Alexander Kruppa, Dave Newman. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "config.h" #include #include #include /* for floor */ #include /* for strlen */ #ifdef HAVE_UNISTD_H #include /* for unlink */ #endif #include "ecm-impl.h" #include "sp.h" extern unsigned int Fermat; /* r <- Dickson(n,a)(x) */ static void dickson (mpz_t r, mpz_t x, unsigned int n, int a) { unsigned int i, b = 0; mpz_t t, u; if (n == 0) { mpz_set_ui (r, 1); return; } while (n > 2 && (n & 1) == 0) { b++; n >>= 1; } mpz_set (r, x); MPZ_INIT (t); MPZ_INIT (u); if (n > 1) { mpz_set (r, x); mpz_mul (r, r, r); mpz_sub_si (r, r, a); mpz_sub_si (r, r, a); /* r = dickson(x, 2, a) */ mpz_set (t, x); /* t = dickson(x, 1, a) */ for (i = 2; i < n; i++) { mpz_mul_si (u, t, a); mpz_set (t, r); /* t = dickson(x, i, a) */ mpz_mul (r, r, x); mpz_sub (r, r, u); /* r = dickson(x, i+1, a) */ } } for ( ; b > 0; b--) { mpz_mul (t, r, r); /* t = dickson(x, n, a) ^ 2 */ mpz_ui_pow_ui (u, abs (a), n); if (n & 1 && a < 0) mpz_neg (u, u); mpz_mul_2exp (u, u, 1); /* u = 2 * a^n */ mpz_sub (r, t, u); /* r = dickson(x, 2*n, a) */ n <<= 1; } mpz_clear (t); mpz_clear (u); } /* Init table to allow computation of Dickson_{E, a} (s + n*D), for successive n, where Dickson_{E, a} is the Dickson polynomial of degree E with parameter a. For a == 0, Dickson_{E, a} (x) = x^E . See Knuth, TAOCP vol.2, 4.6.4 and exercise 7 in 4.6.4, and "An FFT Extension of the Elliptic Curve Method of Factorization", Peter Montgomery, Dissertation, 1992, Chapter 5. Ternary return value. */ static void fin_diff_coeff (listz_t coeffs, mpz_t s, mpz_t D, unsigned int E, int dickson_a) { unsigned int i, k; mpz_t t; MPZ_INIT (t); mpz_set (t, s); for (i = 0; i <= E; i++) { if (dickson_a != 0) /* fd[i] = dickson_{E,a} (s+i*D) */ dickson (coeffs[i], t, E, dickson_a); else /* fd[i] = (s+i*D)^E */ mpz_pow_ui (coeffs[i], t, E); mpz_add (t, t, D); /* t = s + i * D */ } for (k = 1; k <= E; k++) for (i = E; i >= k; i--) mpz_sub (coeffs[i], coeffs[i], coeffs[i-1]); mpz_clear (t); } /* Init several disjoint progressions for the computation of Dickson_{E,a} (e * (i0 + i + n * d * k)), for 0 <= i < d * k (1) with gcd(e * (i0 + i), d) == 1, i == 1 (mod m), where m divides d for successive n (the variable n does not appear here, it is the application that called this function that wants to evaluate (1) for n = 0, 1, 2, ... This means there will be k sets of progressions, where each set contains eulerphi(d) progressions that generate the values of Dickson_{E,a} (x) with x coprime to d and with i == 1 (mod m), where x == e * (i0 + i) (mod m). i0 may be a NULL pointer, in this case i0 = 0 is assumed. Return NULL if an error occurred. */ listz_t init_progression_coeffs (mpz_t i0, const unsigned long d, const unsigned long e, const unsigned int k, const unsigned int m, const unsigned int E, const int dickson_a) { unsigned int i, j, size_fd; mpz_t t, dke, em; listz_t fd; ASSERT (d % m == 0); size_fd = k * (eulerphi(d) / eulerphi(m)) * (E + 1); fd = (listz_t) malloc (size_fd * sizeof (mpz_t)); if (fd == NULL) return NULL; for (i = 0; i < size_fd; i++) MPZ_INIT (fd[i]); MPZ_INIT (t); if (i0 != NULL) mpz_set (t, i0); outputf (OUTPUT_TRACE, "init_progression_coeffs: i0 = %Zd, d = %u, e = %u, " "k = %u, m = %u, E = %u, a = %d, size_fd = %u\n", t, d, e, k, m, E, dickson_a, size_fd); /* Due to the condition i == 1 (mod m) we start at i = 1 or i = 0, depending on whether m > 1 or m == 1 */ i = (m > 1) ? 1 : 0; mpz_add_ui (t, t, (unsigned long) i); mpz_mul_ui (t, t, e); /* Now t = e * (i0 + i + n * d * k), for n = 0 */ /* dke = d * k * e, the common difference of the arithmetic progressions (it is the same for all arithmetic progressions we initialise) */ MPZ_INIT (dke); mpz_set_ui (dke, d); mpz_mul_ui (dke, dke, k); mpz_mul_ui (dke, dke, e); /* em = e * m, the value by which t advances if we increase i by m */ MPZ_INIT (em); mpz_set_ui (em, e); mpz_mul_ui (em, em, (unsigned long) m); for (j = 0; i < k * d; i += m) { if (mpz_gcd_ui (NULL, t, d) == 1) { outputf (OUTPUT_TRACE, "init_progression_coeffs: initing a " "progression for Dickson_{%d,%d}(%Zd + n * %Zd)\n", E, dickson_a, t, dke); /* Initialise for the evaluation of Dickson_{E,a} (t + n*dke) for n = 0, 1, 2, ... */ fin_diff_coeff (fd + j, t, dke, E, dickson_a); j += E + 1; } else if (test_verbose (OUTPUT_TRACE)) outputf (OUTPUT_TRACE, "init_progression_coeffs: NOT initing a " "progression for Dickson_{%d,%d}(%Zd + n * %Zd), " "gcd (%Zd, %u) == %u)\n", E, dickson_a, t, dke, t, d, mpz_gcd_ui (NULL, t, d)); /* We increase i by m, so we increase t by e*m */ mpz_add (t, t, em); } mpz_clear (em); mpz_clear (dke); mpz_clear (t); return fd; } void init_roots_params (progression_params_t *params, const int S, const unsigned long d1, const unsigned long d2, const double cost) { ASSERT (gcd (d1, d2) == 1); /* If S < 0, use degree |S| Dickson poly, otherwise use x^S */ params->S = abs (S); params->dickson_a = (S < 0) ? -1 : 0; /* We only calculate Dickson_{S, a}(j * d2) * s where gcd (j, dsieve) == 1 and j == 1 (mod 6) by doing nr = eulerphi(dsieve)/2 separate progressions. */ /* Now choose a value for dsieve. */ params->dsieve = 6; params->nr = 1; /* Prospective saving by sieving out multiples of 5: d1 / params->dsieve * params->nr / 5 roots, each one costs S point adds Prospective cost increase: 4 times as many progressions to init (that is, 3 * params->nr more), each costs ~ S * S * log_2(5 * dsieve * d2) / 2 point adds The params->nr and one S cancel. */ if (d1 % 5 == 0 && d1 / params->dsieve / 5. * cost > 3. * params->S * log (5. * params->dsieve * d2) / 2.) { params->dsieve *= 5; params->nr *= 4; } if (d1 % 7 == 0 && d1 / params->dsieve / 7. * cost > 5. * params->S * log (7. * params->dsieve * d2) / 2.) { params->dsieve *= 7; params->nr *= 6; } if (d1 % 11 == 0 && d1 / params->dsieve / 11. * cost > 9. * params->S * log (11. * params->dsieve * d2) / 2.) { params->dsieve *= 11; params->nr *= 10; } params->size_fd = params->nr * (params->S + 1); params->next = 0; params->rsieve = 1; } double memory_use (unsigned long dF, unsigned int sp_num, unsigned int Ftreelvl, mpmod_t modulus) { double mem; /* printf ("memory_use (%lu, %d, %d, )\n", dF, sp_num, Ftreelvl); */ mem = 9.0; /* F:1, T:3*2, invF:1, G:1 */ mem += (double) Ftreelvl; mem *= (double) dF; mem += 2. * list_mul_mem (dF); /* Also in T */ #if (MULT == KS) /* estimated memory for kronecker_schonhage / wrap-case in PrerevertDivision respectively */ mem += (24.0 + 1.0) * (double) (sp_num ? MIN(MUL_NTT_THRESHOLD, dF) : dF); #endif mem *= (double) (mpz_size (modulus->orig_modulus)) * sizeof (mp_limb_t) + sizeof (mpz_t); if (sp_num) mem += /* peak malloc in ecm_ntt.c */ (4.0 * dF * sp_num * sizeof (sp_t)) /* mpzspv_normalise */ + (MPZSPV_NORMALISE_STRIDE * ((double) sp_num * sizeof (sp_t) + 6.0 * sizeof (sp_t) + sizeof (float))) /* sp_F, sp_invF */ + ((1.0 + 2.0) * dF * sp_num * sizeof (sp_t)); return mem; } /* Input: X is the point at end of stage 1 n is the number to factor B2min-B2 is the stage 2 range (we consider B2min is done) k0 is the number of blocks (if 0, use default) S is the exponent for Brent-Suyama's extension invtrick is non-zero iff one uses x+1/x instead of x. method: ECM_ECM, ECM_PM1 or ECM_PP1 Cf "Speeding the Pollard and Elliptic Curve Methods of Factorization", Peter Montgomery, Math. of Comp., 1987, page 257: using x^(i^e)+1/x^(i^e) instead of x^(i^(2e)) reduces the cost of Brent-Suyama's extension from 2*e to e+3 multiplications per value of i. Output: f is the factor found Return value: 2 (step number) iff a factor was found, or ECM_ERROR if an error occurred. */ int stage2 (mpz_t f, void *X, mpmod_t modulus, unsigned long dF, unsigned long k, root_params_t *root_params, int method, int use_ntt, char *TreeFilename, int (*stop_asap)(void)) { unsigned long i, sizeT; mpz_t n; listz_t F, G, H, T; int youpi = ECM_NO_FACTOR_FOUND; long st, st0; void *rootsG_state = NULL; listz_t *Tree = NULL; /* stores the product tree for F */ unsigned int treefiles_used = 0; /* Number of tree files currently in use */ unsigned int lgk; /* ceil(log(k)/log(2)) */ listz_t invF = NULL; double mem; mpzspm_t mpzspm = NULL; mpzspv_t sp_F = NULL, sp_invF = NULL; /* check alloc. size of f */ mpres_realloc (f, modulus); st0 = cputime (); Fermat = 0; if (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat > 0) { Fermat = modulus->Fermat; use_ntt = 0; /* don't use NTT for Fermat numbers */ } if (use_ntt) { mpzspm = mpzspm_init (2 * dF, modulus->orig_modulus); if (mpzspm == NULL) { outputf (OUTPUT_ERROR, "Could not initialise mpzspm, " "presumably out of memory\n"); return ECM_ERROR; } outputf (OUTPUT_VERBOSE, "Using %u small primes for NTT\n", mpzspm->sp_num); } lgk = ceil_log2 (dF); mem = memory_use (dF, use_ntt ? mpzspm->sp_num : 0, (TreeFilename == NULL) ? lgk : 0, modulus); if (mem < 1e4) outputf (OUTPUT_VERBOSE, "Estimated memory usage: %1.0f\n", mem); else if (mem < 1e7) outputf (OUTPUT_VERBOSE, "Estimated memory usage: %1.0fK\n", mem / 1024.); else if (mem < 1e10) outputf (OUTPUT_VERBOSE, "Estimated memory usage: %1.0fM\n", mem / 1048576.); else outputf (OUTPUT_VERBOSE, "Estimated memory usage: %1.0fG\n", mem / 1073741824.); MEMORY_TAG; F = init_list2 (dF + 1, mpz_sizeinbase (modulus->orig_modulus, 2) + 3 * GMP_NUMB_BITS); MEMORY_UNTAG; if (F == NULL) { youpi = ECM_ERROR; goto clear_i0; } sizeT = 3 * dF + list_mul_mem (dF); if (dF > 3) sizeT += dF; MEMORY_TAG; T = init_list2 (sizeT, 2 * mpz_sizeinbase (modulus->orig_modulus, 2) + 3 * GMP_NUMB_BITS); MEMORY_UNTAG; if (T == NULL) { youpi = ECM_ERROR; goto clear_F; } H = T; /* needs dF+1 cells in T */ if (method == ECM_PM1) youpi = pm1_rootsF (f, F, root_params, dF, (mpres_t*) X, T, modulus); else if (method == ECM_PP1) youpi = pp1_rootsF (F, root_params, dF, (mpres_t*) X, T, modulus); else youpi = ecm_rootsF (f, F, root_params, dF, (curve*) X, modulus); if (youpi != ECM_NO_FACTOR_FOUND) { if (youpi != ECM_ERROR) youpi = ECM_FACTOR_FOUND_STEP2; goto clear_T; } if (stop_asap != NULL && (*stop_asap)()) goto clear_T; if (test_verbose (OUTPUT_TRACE)) { unsigned long j; for (j = 0; j < dF; j++) outputf (OUTPUT_TRACE, "f_%lu = %Zd\n", j, F[j]); } /* ---------------------------------------------- | F | invF | G | T | ---------------------------------------------- | rootsF | ??? | ??? | ??? | ---------------------------------------------- */ if (TreeFilename == NULL) { Tree = (listz_t*) malloc (lgk * sizeof (listz_t)); if (Tree == NULL) { outputf (OUTPUT_ERROR, "Error: not enough memory\n"); youpi = ECM_ERROR; goto clear_T; } for (i = 0; i < lgk; i++) { MEMORY_TAG; Tree[i] = init_list2 (dF, mpz_sizeinbase (modulus->orig_modulus, 2) + GMP_NUMB_BITS); MEMORY_UNTAG; if (Tree[i] == NULL) { /* clear already allocated Tree[i] */ while (i) clear_list (Tree[--i], dF); free (Tree); youpi = ECM_ERROR; goto clear_T; } } } else Tree = NULL; #ifdef TELLEGEN_DEBUG outputf (OUTPUT_ALWAYS, "Roots = "); print_list (os, F, dF); #endif mpz_init_set (n, modulus->orig_modulus); st = cputime (); if (TreeFilename != NULL) { FILE *TreeFile; char *fullname = (char *) malloc (strlen (TreeFilename) + 1 + 2 + 1); if (fullname == NULL) { fprintf (stderr, "Cannot allocate memory in stage2\n"); exit (1); } for (i = lgk; i > 0; i--) { if (stop_asap != NULL && (*stop_asap)()) goto free_Tree_i; sprintf (fullname, "%s.%lu", TreeFilename, i - 1); TreeFile = fopen (fullname, "wb"); if (TreeFile == NULL) { outputf (OUTPUT_ERROR, "Error opening file for product tree of F\n"); youpi = ECM_ERROR; goto free_Tree_i; } treefiles_used++; if (use_ntt) { if (ntt_PolyFromRoots_Tree (F, F, dF, T, i - 1, mpzspm, NULL, TreeFile) == ECM_ERROR) { fclose (TreeFile); youpi = ECM_ERROR; goto free_Tree_i; } } else { if (PolyFromRoots_Tree (F, F, dF, T, i - 1, n, NULL, TreeFile, 0) == ECM_ERROR) { fclose (TreeFile); youpi = ECM_ERROR; goto free_Tree_i; } } if (fclose (TreeFile) != 0) { youpi = ECM_ERROR; goto free_Tree_i; } } free (fullname); } else { /* TODO: how to check for stop_asap() here? */ if (use_ntt) ntt_PolyFromRoots_Tree (F, F, dF, T, -1, mpzspm, Tree, NULL); else PolyFromRoots_Tree (F, F, dF, T, -1, n, Tree, NULL, 0); } if (test_verbose (OUTPUT_TRACE)) { unsigned long j; for (j = 0; j < dF; j++) outputf (OUTPUT_TRACE, "F[%lu] = %Zd\n", j, F[j]); } outputf (OUTPUT_VERBOSE, "Building F from its roots took %ldms\n", elltime (st, cputime ())); if (stop_asap != NULL && (*stop_asap)()) goto free_Tree_i; /* needs dF+list_mul_mem(dF/2) cells in T */ mpz_set_ui (F[dF], 1); /* the leading monic coefficient needs to be stored explicitly for PrerevertDivision */ /* ---------------------------------------------- | F | invF | G | T | ---------------------------------------------- | F(x) | ??? | ??? | ??? | ---------------------------------------------- */ /* G*H has degree 2*dF-2, hence we must cancel dF-1 coefficients to get degree dF-1 */ if (dF > 1) { /* only dF-1 coefficients of 1/F are needed to reduce G*H, but we need one more for TUpTree */ MEMORY_TAG; invF = init_list2 (dF + 1, mpz_sizeinbase (modulus->orig_modulus, 2) + 2 * GMP_NUMB_BITS); MEMORY_UNTAG; if (invF == NULL) { youpi = ECM_ERROR; goto free_Tree_i; } st = cputime (); if (use_ntt) { sp_F = mpzspv_init (dF, mpzspm); mpzspv_from_mpzv (sp_F, 0, F, dF, mpzspm); mpzspv_to_ntt (sp_F, 0, dF, dF, 1, mpzspm); ntt_PolyInvert (invF, F + 1, dF, T, mpzspm); sp_invF = mpzspv_init (2 * dF, mpzspm); mpzspv_from_mpzv (sp_invF, 0, invF, dF, mpzspm); mpzspv_to_ntt (sp_invF, 0, dF, 2 * dF, 0, mpzspm); } else PolyInvert (invF, F + 1, dF, T, n); /* now invF[0..dF-1] = Quo(x^(2dF-1), F) */ outputf (OUTPUT_VERBOSE, "Computing 1/F took %ldms\n", elltime (st, cputime ())); /* ---------------------------------------------- | F | invF | G | T | ---------------------------------------------- | F(x) | 1/F(x) | ??? | ??? | ---------------------------------------------- */ } if (stop_asap != NULL && (*stop_asap)()) goto clear_invF; /* start computing G with roots at i0*d, (i0+1)*d, (i0+2)*d, ... where i0*d <= B2min < (i0+1)*d */ MEMORY_TAG; G = init_list2 (dF, mpz_sizeinbase (modulus->orig_modulus, 2) + 3 * GMP_NUMB_BITS); MEMORY_UNTAG; if (G == NULL) { youpi = ECM_ERROR; goto clear_invF; } st = cputime (); if (method == ECM_PM1) rootsG_state = pm1_rootsG_init ((mpres_t *) X, root_params, modulus); else if (method == ECM_PP1) rootsG_state = pp1_rootsG_init ((mpres_t *) X, root_params, modulus); else /* ECM_ECM */ rootsG_state = ecm_rootsG_init (f, (curve *) X, root_params, dF, k, modulus); /* rootsG_state=NULL if an error occurred or (ecm only) a factor was found */ if (rootsG_state == NULL) { /* ecm: f = -1 if an error occurred */ youpi = (method == ECM_ECM && mpz_cmp_si (f, -1)) ? ECM_FACTOR_FOUND_STEP2 : ECM_ERROR; goto clear_G; } if (method != ECM_ECM) /* ecm_rootsG_init prints itself */ outputf (OUTPUT_VERBOSE, "Initializing table of differences for G " "took %ldms\n", elltime (st, cputime ())); if (stop_asap != NULL && (*stop_asap)()) goto clear_fd; for (i = 0; i < k; i++) { /* needs dF+1 cells in T+dF */ if (method == ECM_PM1) youpi = pm1_rootsG (f, G, dF, (pm1_roots_state_t *) rootsG_state, T + dF, modulus); else if (method == ECM_PP1) youpi = pp1_rootsG (G, dF, (pp1_roots_state_t *) rootsG_state, modulus, (mpres_t *) X); else youpi = ecm_rootsG (f, G, dF, (ecm_roots_state_t *) rootsG_state, modulus); if (test_verbose (OUTPUT_TRACE)) { unsigned long j; for (j = 0; j < dF; j++) outputf (OUTPUT_TRACE, "g_%lu = %Zd\n", j, G[j]); } ASSERT(youpi != ECM_ERROR); /* xxx_rootsG cannot fail */ if (youpi) /* factor found */ { youpi = ECM_FACTOR_FOUND_STEP2; goto clear_fd; } if (stop_asap != NULL && (*stop_asap)()) goto clear_fd; /* ----------------------------------------------- | F | invF | G | T | ----------------------------------------------- | F(x) | 1/F(x) | rootsG | ??? | ----------------------------------------------- */ st = cputime (); if (use_ntt) ntt_PolyFromRoots (G, G, dF, T + dF, mpzspm); else PolyFromRoots (G, G, dF, T + dF, n); if (test_verbose (OUTPUT_TRACE)) { unsigned long j; outputf (OUTPUT_TRACE, "G(x) = x^%lu ", dF); for (j = 0; j < dF; j++) outputf (OUTPUT_TRACE, "+ (%Zd * x^%lu)", G[j], j); outputf (OUTPUT_TRACE, "\n"); } /* needs 2*dF+list_mul_mem(dF/2) cells in T */ outputf (OUTPUT_VERBOSE, "Building G from its roots took %ldms\n", elltime (st, cputime ())); if (stop_asap != NULL && (*stop_asap)()) goto clear_fd; /* ----------------------------------------------- | F | invF | G | T | ----------------------------------------------- | F(x) | 1/F(x) | G(x) | ??? | ----------------------------------------------- */ if (i == 0) { list_sub (H, G, F, dF); /* coefficients 1 of degree cancel, thus T is of degree < dF */ list_mod (H, H, dF, n); /* ------------------------------------------------ | F | invF | G | T | ------------------------------------------------ | F(x) | 1/F(x) | ??? |G(x)-F(x)| ??? | ------------------------------------------------ */ } else { /* since F and G are monic of same degree, G mod F = G - F */ list_sub (G, G, F, dF); list_mod (G, G, dF, n); /* ------------------------------------------------ | F | invF | G | T | ------------------------------------------------ | F(x) | 1/F(x) |G(x)-F(x)| H(x) | | ------------------------------------------------ */ st = cputime (); /* previous G mod F is in H, with degree < dF, i.e. dF coefficients: requires 3dF-1+list_mul_mem(dF) cells in T */ if (use_ntt) { ntt_mul (T + dF, G, H, dF, T + 3 * dF, 0, mpzspm); list_mod (H, T + dF, 2 * dF, n); } else list_mulmod (H, T + dF, G, H, dF, T + 3 * dF, n); outputf (OUTPUT_VERBOSE, "Computing G * H took %ldms\n", elltime (st, cputime ())); if (stop_asap != NULL && (*stop_asap)()) goto clear_fd; /* ------------------------------------------------ | F | invF | G | T | ------------------------------------------------ | F(x) | 1/F(x) |G(x)-F(x)| G * H | | ------------------------------------------------ */ st = cputime (); if (use_ntt) { ntt_PrerevertDivision (H, F, invF + 1, sp_F, sp_invF, dF, T + 2 * dF, mpzspm); } else { if (PrerevertDivision (H, F, invF + 1, dF, T + 2 * dF, n)) { youpi = ECM_ERROR; goto clear_fd; } } outputf (OUTPUT_VERBOSE, "Reducing G * H mod F took %ldms\n", elltime (st, cputime ())); if (stop_asap != NULL && (*stop_asap)()) goto clear_fd; } } clear_list (F, dF + 1); F = NULL; clear_list (G, dF); G = NULL; st = cputime (); #ifdef POLYEVALTELLEGEN if (use_ntt) youpi = ntt_polyevalT (T, dF, Tree, T + dF + 1, sp_invF, mpzspm, TreeFilename); else youpi = polyeval_tellegen (T, dF, Tree, T + dF + 1, sizeT - dF - 1, invF, n, TreeFilename); if (youpi) { outputf (OUTPUT_ERROR, "Error, not enough memory\n"); goto clear_fd; } #else clear_list (invF, dF + 1); invF = NULL; polyeval (T, dF, Tree, T + dF + 1, n, 0); #endif treefiles_used = 0; /* Polyeval deletes treefiles by itself */ if (test_verbose (OUTPUT_TRACE)) { unsigned long j; for (j = 0; j < dF; j++) outputf (OUTPUT_TRACE, "G(x_%lu) = %Zd\n", j, T[j]); } outputf (OUTPUT_VERBOSE, "Computing polyeval(F,G) took %ldms\n", elltime (st, cputime ())); st = cputime (); list_mulup (T, dF, n, T[dF]); outputf (OUTPUT_VERBOSE, "Computing product of all F(g_i) took %ldms\n", elltime (st, cputime ())); mpz_gcd (f, T[dF - 1], n); if (mpz_cmp_ui (f, 1) > 0) { youpi = ECM_FACTOR_FOUND_STEP2; if (method == ECM_ECM && test_verbose (OUTPUT_RESVERBOSE)) { /* Find out for which i*X, (i,d)==1, a factor was found */ /* Note that the factor we found may be composite */ /* TBD: use binary search */ unsigned long j, k; mpz_set (T[dF], f); for (k = 0, j = 1; k < dF; j += 6) { if (gcd (j, root_params->d1) > 1) continue; mpz_gcd (T[dF + 1], T[k], T[dF]); if (mpz_cmp_ui (T[dF + 1], 1) > 0) { int sgn; /* Find i so that $f(i d1) X = +-f(j d2) X$ over GF(f) */ sgn = ecm_findmatch (&i, j, root_params, (curve *)X, modulus, f); if (sgn != 0) { mpz_add_ui (T[dF + 2], root_params->i0, i); outputf (OUTPUT_RESVERBOSE, "Divisor %Zd first occurs in T[%lu] = " "((f(%Zd*%lu)%cf(%lu*%lu))*X)_x\n", T[dF + 1], k, T[dF + 2], root_params->d1, sgn < 0 ? '+' : '-', j, root_params->d2); mpz_mul_ui (T[dF + 2], T[dF + 2], root_params->d1); if (sgn < 0) mpz_add_ui (T[dF + 2], T[dF + 2], j * root_params->d2); else mpz_sub_ui (T[dF + 2], T[dF + 2], j * root_params->d2); mpz_abs (T[dF + 2], T[dF + 2]); outputf (OUTPUT_RESVERBOSE, "Maybe largest group order " "factor is or divides %Zd\n", T[dF + 2]); } else { outputf (OUTPUT_RESVERBOSE, "Divisor %Zd first occurs in T[%lu], but could " "not determine associated i\n", T[dF + 1], k); } /* Don't report this divisor again */ mpz_divexact (T[dF], T[dF], T[dF + 1]); } k++; } } } else { /* Here, mpz_cmp_ui (f, 1) == 0, i.e. no factor was found */ outputf (OUTPUT_RESVERBOSE, "Product of G(f_i) = %Zd\n", T[0]); } clear_fd: if (method == ECM_PM1) pm1_rootsG_clear ((pm1_roots_state_t *) rootsG_state, modulus); else if (method == ECM_PP1) pp1_rootsG_clear ((pp1_roots_state_t *) rootsG_state, modulus); else /* ECM_ECM */ ecm_rootsG_clear ((ecm_roots_state_t *) rootsG_state, modulus); clear_G: clear_list (G, dF); clear_invF: clear_list (invF, dF + 1); if (use_ntt) { mpzspv_clear (sp_F, mpzspm); mpzspv_clear (sp_invF, mpzspm); } free_Tree_i: if (Tree != NULL) { for (i = 0; i < lgk; i++) clear_list (Tree[i], dF); free (Tree); } if (TreeFilename != NULL && treefiles_used > 0) { /* Unlink any treefiles still in use */ char *fullname = (char *) malloc (strlen (TreeFilename) + 1 + 2 + 1); if (fullname == NULL) { fprintf (stderr, "Cannot allocate memory in stage2\n"); exit (1); } for (i = 0; i < treefiles_used; i++) { sprintf (fullname, "%s.%lu", TreeFilename, i); outputf (OUTPUT_DEVVERBOSE, "Unlinking %s\n", fullname); if (unlink (fullname) != 0) outputf (OUTPUT_ERROR, "Could not delete %s\n", fullname); } free (fullname); } mpz_clear (n); clear_T: clear_list (T, sizeT); clear_F: clear_list (F, dF + 1); clear_i0: if (use_ntt) mpzspm_clear (mpzspm); if (Fermat) F_clear (); if (stop_asap == NULL || !(*stop_asap)()) { st0 = elltime (st0, cputime ()); outputf (OUTPUT_NORMAL, "Step 2 took %ldms\n", st0); } return youpi; } ecm-6.4.4/longlong.h0000644023561000001540000017574712106741273011233 00000000000000/* longlong.h -- definitions for mixed size 32/64 bit arithmetic. Copyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. This file was copied from the GNU MP Library. This file is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this file. If not, see http://www.gnu.org/licenses/. */ /* added for compatibility with other compilers than gcc */ #if !defined(__GNUC__) #define __builtin_constant_p(x) 0 #endif /* You have to define the following before including this file: UWtype -- An unsigned type, default type for operations (typically a "word") UHWtype -- An unsigned type, at least half the size of UWtype. UDWtype -- An unsigned type, at least twice as large a UWtype W_TYPE_SIZE -- size in bits of UWtype SItype, USItype -- Signed and unsigned 32 bit types. DItype, UDItype -- Signed and unsigned 64 bit types. On a 32 bit machine UWtype should typically be USItype; on a 64 bit machine, UWtype should typically be UDItype. */ #define __BITS4 (W_TYPE_SIZE / 4) #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) /* This is used to make sure no undesirable sharing between different libraries that use this file takes place. */ #ifndef __MPN #define __MPN(x) __##x #endif #ifndef _PROTO #if (__STDC__-0) || defined (__cplusplus) || defined( _MSC_VER ) #define _PROTO(x) x #else #define _PROTO(x) () #endif #endif /* Define auxiliary asm macros. 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype word product in HIGH_PROD and LOW_PROD. 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a UDWtype product. This is just a variant of umul_ppmm. 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, denominator) divides a UDWtype, composed by the UWtype integers HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less than DENOMINATOR for correct operation. If, in addition, the most significant bit of DENOMINATOR must be 1, then the pre-processor symbol UDIV_NEEDS_NORMALIZATION is defined to 1. 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, denominator). Like udiv_qrnnd but the numbers are signed. The quotient is rounded towards 0. 5) count_leading_zeros(count, x) counts the number of zero-bits from the msb to the first non-zero bit in the UWtype X. This is the number of steps X needs to be shifted left to set the msb. Undefined for X == 0, unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts from the least significant end. 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, high_addend_2, low_addend_2) adds two UWtype integers, composed by HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow (i.e. carry out) is not stored anywhere, and is lost. 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, and is lost. If any of these macros are left undefined for a particular CPU, C macros are used. Notes: For add_ssaaaa the two high and two low addends can both commute, but unfortunately gcc only supports one "%" commutative in each asm block. This has always been so but is only documented in recent versions (eg. pre-release 3.3). Having two or more "%"s can cause an internal compiler error in certain rare circumstances. Apparently it was only the last "%" that was ever actually respected, so the code has been updated to leave just that. Clearly there's a free choice whether high or low should get it, if there's a reason to favour one over the other. Also obviously when the constraints on the two operands are identical there's no benefit to the reloader in any "%" at all. */ /* The CPUs come in alphabetical order below. Please add support for more CPUs here, or improve the current support for the CPUs below! */ /* FIXME: The macros using external routines like __MPN(count_leading_zeros) don't need to be under !NO_ASM */ #if ! defined (NO_ASM) #if defined (__alpha) && W_TYPE_SIZE == 64 /* Most alpha-based machines, except Cray systems. */ #if defined (__GNUC__) #define umul_ppmm(ph, pl, m0, m1) \ do { \ UDItype __m0 = (m0), __m1 = (m1); \ __asm__ ("umulh %r1,%2,%0" \ : "=r" (ph) \ : "%rJ" (m0), "rI" (m1)); \ (pl) = __m0 * __m1; \ } while (0) #define UMUL_TIME 18 #else /* ! __GNUC__ */ #include #define umul_ppmm(ph, pl, m0, m1) \ do { \ UDItype __m0 = (m0), __m1 = (m1); \ (ph) = __UMULH (m0, m1); \ (pl) = __m0 * __m1; \ } while (0) #endif #ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { UWtype __di; \ __di = __MPN(invert_limb) (d); \ udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ } while (0) #define UDIV_PREINV_ALWAYS 1 #define UDIV_NEEDS_NORMALIZATION 1 #define UDIV_TIME 220 #endif /* LONGLONG_STANDALONE */ /* clz_tab is required by mpn/alpha/cntlz.asm, and that file is built for all alphas, even though ev67 and ev68 don't need it. */ #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB #if defined (__GNUC__) && \ (defined(HAVE_HOST_CPU_alphaev67) && HAVE_HOST_CPU_alphaev67 || \ defined(HAVE_HOST_CPU_alphaev68) && HAVE_HOST_CPU_alphaev68) #define count_leading_zeros(COUNT,X) \ __asm__("ctlz %1,%0" : "=r"(COUNT) : "r"(X)) #define count_trailing_zeros(COUNT,X) \ __asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X)) #else /* ! (ev67 || ev68) */ #ifndef LONGLONG_STANDALONE #if defined(HAVE_ATTRIBUTE_CONST) && HAVE_ATTRIBUTE_CONST long __MPN(count_leading_zeros) _PROTO ((UDItype)) __attribute__ ((const)); #else long __MPN(count_leading_zeros) _PROTO ((UDItype)); #endif #define count_leading_zeros(count, x) \ ((count) = __MPN(count_leading_zeros) (x)) #endif /* LONGLONG_STANDALONE */ #endif /* ! (ev67 || ev68) */ #endif /* __alpha */ #if defined (_CRAY) && W_TYPE_SIZE == 64 #include #define UDIV_PREINV_ALWAYS 1 #define UDIV_NEEDS_NORMALIZATION 1 #define UDIV_TIME 220 long __MPN(count_leading_zeros) _PROTO ((UDItype)); #define count_leading_zeros(count, x) \ ((count) = _leadz ((UWtype) (x))) #if defined (_CRAYIEEE) /* I.e., Cray T90/ieee, T3D, and T3E */ #define umul_ppmm(ph, pl, m0, m1) \ do { \ UDItype __m0 = (m0), __m1 = (m1); \ (ph) = _int_mult_upper (m0, m1); \ (pl) = __m0 * __m1; \ } while (0) #ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { UWtype __di; \ __di = __MPN(invert_limb) (d); \ udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ } while (0) #endif /* LONGLONG_STANDALONE */ #endif /* _CRAYIEEE */ #endif /* _CRAY */ #if defined (__hppa) && W_TYPE_SIZE == 64 /* These macros are for ABI=2.0w. In ABI=2.0n they can't be used, since GCC (3.2) puts longlong into two adjacent 32-bit registers. Presumably this is just a case of no direct support for 2.0n but treating it like 1.0. */ #if defined (__GNUC__) && ! defined (_LONG_LONG_LIMB) #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add %4,%5,%1\n\tadd,dc %2,%3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rM" (ah), "rM" (bh), "%rM" (al), "rM" (bl)) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("sub %4,%5,%1\n\tsub,db %2,%3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rM" (ah), "rM" (bh), "rM" (al), "rM" (bl)) #endif /* We put the result pointer parameter last here, since it makes passing of the other parameters more efficient. */ #ifndef LONGLONG_STANDALONE #define umul_ppmm(wh, wl, u, v) \ do { \ UWtype __p0; \ (wh) = __MPN(umul_ppmm) (u, v, &__p0); \ (wl) = __p0; \ } while (0) extern UWtype __MPN(umul_ppmm) _PROTO ((UWtype, UWtype, UWtype *)); #define udiv_qrnnd(q, r, n1, n0, d) \ do { UWtype __r; \ (q) = __MPN(udiv_qrnnd) (n1, n0, d, &__r); \ (r) = __r; \ } while (0) extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype, UWtype, UWtype, UWtype *)); #define UMUL_TIME 8 #define UDIV_TIME 60 #endif /* LONGLONG_STANDALONE */ #endif /* hppa */ #if defined (__ia64) && W_TYPE_SIZE == 64 #if defined (__GNUC__) #define umul_ppmm(ph, pl, m0, m1) \ do { \ UDItype __m0 = (m0), __m1 = (m1); \ __asm__ ("xma.hu %0 = %1, %2, f0" \ : "=f" (ph) \ : "f" (m0), "f" (m1)); \ (pl) = __m0 * __m1; \ } while (0) #define UMUL_TIME 14 #define count_leading_zeros(count, x) \ do { \ UWtype _x = (x), _y, _a, _c; \ __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \ __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \ _c = (_a - 1) << 3; \ _x >>= _c; \ if (_x >= 1 << 4) \ _x >>= 4, _c += 4; \ if (_x >= 1 << 2) \ _x >>= 2, _c += 2; \ _c += _x >> 1; \ (count) = W_TYPE_SIZE - 1 - _c; \ } while (0) #endif #ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { UWtype __di; \ __di = __MPN(invert_limb) (d); \ udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ } while (0) #define UDIV_PREINV_ALWAYS 1 #define UDIV_NEEDS_NORMALIZATION 1 #endif #define UDIV_TIME 220 #endif #if defined (__GNUC__) /* We sometimes need to clobber "cc" with gcc2, but that would not be understood by gcc1. Use cpp to avoid major code duplication. */ #if __GNUC__ < 2 #define __CLOBBER_CC #define __AND_CLOBBER_CC #else /* __GNUC__ >= 2 */ #define __CLOBBER_CC : "cc" #define __AND_CLOBBER_CC , "cc" #endif /* __GNUC__ < 2 */ #if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add %1,%4,%5\n\taddc %0,%2,%3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl)) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("sub %1,%4,%5\n\tsubc %0,%2,%3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl)) #define umul_ppmm(xh, xl, m0, m1) \ do { \ USItype __m0 = (m0), __m1 = (m1); \ __asm__ ("multiplu %0,%1,%2" \ : "=r" (xl) \ : "r" (__m0), "r" (__m1)); \ __asm__ ("multmu %0,%1,%2" \ : "=r" (xh) \ : "r" (__m0), "r" (__m1)); \ } while (0) #define udiv_qrnnd(q, r, n1, n0, d) \ __asm__ ("dividu %0,%3,%4" \ : "=r" (q), "=q" (r) \ : "1" (n1), "r" (n0), "r" (d)) #define count_leading_zeros(count, x) \ __asm__ ("clz %0,%1" \ : "=r" (count) \ : "r" (x)) #define COUNT_LEADING_ZEROS_0 32 #endif /* __a29k__ */ #if defined (__arc__) #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add.f\t%1, %4, %5\n\tadc\t%0, %2, %3" \ : "=r" ((USItype) (sh)), \ "=&r" ((USItype) (sl)) \ : "r" ((USItype) (ah)), \ "rIJ" ((USItype) (bh)), \ "%r" ((USItype) (al)), \ "rIJ" ((USItype) (bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("sub.f\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ : "=r" ((USItype) (sh)), \ "=&r" ((USItype) (sl)) \ : "r" ((USItype) (ah)), \ "rIJ" ((USItype) (bh)), \ "r" ((USItype) (al)), \ "rIJ" ((USItype) (bl))) #endif #if defined (__arm__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (al)) \ { \ if (__builtin_constant_p (ah)) \ __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \ : "=r" (sh), "=&r" (sl) \ : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ else \ __asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ } \ else if (__builtin_constant_p (ah)) \ { \ if (__builtin_constant_p (bl)) \ __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \ : "=r" (sh), "=&r" (sl) \ : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \ else \ __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \ : "=r" (sh), "=&r" (sl) \ : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ } \ else if (__builtin_constant_p (bl)) \ { \ if (__builtin_constant_p (bh)) \ __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \ else \ __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \ : "=r" (sh), "=&r" (sl) \ : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \ } \ else /* only bh might be a constant */ \ __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC);\ } while (0) #if 1 || defined (__arm_m__) /* `M' series has widening multiply support */ #define umul_ppmm(xh, xl, a, b) \ __asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b)) #define UMUL_TIME 5 #define smul_ppmm(xh, xl, a, b) \ __asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b)) #ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { UWtype __di; \ __di = __MPN(invert_limb) (d); \ udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ } while (0) #define UDIV_PREINV_ALWAYS 1 #define UDIV_NEEDS_NORMALIZATION 1 #define UDIV_TIME 70 #endif /* LONGLONG_STANDALONE */ #else #define umul_ppmm(xh, xl, a, b) \ __asm__ ("%@ Inlined umul_ppmm\n" \ " mov %|r0, %2, lsr #16\n" \ " mov %|r2, %3, lsr #16\n" \ " bic %|r1, %2, %|r0, lsl #16\n" \ " bic %|r2, %3, %|r2, lsl #16\n" \ " mul %1, %|r1, %|r2\n" \ " mul %|r2, %|r0, %|r2\n" \ " mul %|r1, %0, %|r1\n" \ " mul %0, %|r0, %0\n" \ " adds %|r1, %|r2, %|r1\n" \ " addcs %0, %0, #65536\n" \ " adds %1, %1, %|r1, lsl #16\n" \ " adc %0, %0, %|r1, lsr #16" \ : "=&r" (xh), "=r" (xl) \ : "r" (a), "r" (b) \ : "r0", "r1", "r2") #define UMUL_TIME 20 #ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { UWtype __r; \ (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \ (r) = __r; \ } while (0) extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype)); #define UDIV_TIME 200 #endif /* LONGLONG_STANDALONE */ #endif #endif /* __arm__ */ #if defined (__clipper__) && W_TYPE_SIZE == 32 #define umul_ppmm(w1, w0, u, v) \ ({union {UDItype __ll; \ struct {USItype __l, __h;} __i; \ } __x; \ __asm__ ("mulwux %2,%0" \ : "=r" (__x.__ll) \ : "%0" ((USItype)(u)), "r" ((USItype)(v))); \ (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) #define smul_ppmm(w1, w0, u, v) \ ({union {DItype __ll; \ struct {SItype __l, __h;} __i; \ } __x; \ __asm__ ("mulwx %2,%0" \ : "=r" (__x.__ll) \ : "%0" ((SItype)(u)), "r" ((SItype)(v))); \ (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) #define __umulsidi3(u, v) \ ({UDItype __w; \ __asm__ ("mulwux %2,%0" \ : "=r" (__w) : "%0" ((USItype)(u)), "r" ((USItype)(v))); \ __w; }) #endif /* __clipper__ */ /* Fujitsu vector computers. */ #if defined (__uxp__) && W_TYPE_SIZE == 32 #define umul_ppmm(ph, pl, u, v) \ do { \ union {UDItype __ll; \ struct {USItype __h, __l;} __i; \ } __x; \ __asm__ ("mult.lu %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v));\ (ph) = __x.__i.__h; \ (pl) = __x.__i.__l; \ } while (0) #define smul_ppmm(ph, pl, u, v) \ do { \ union {UDItype __ll; \ struct {USItype __h, __l;} __i; \ } __x; \ __asm__ ("mult.l %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v)); \ (ph) = __x.__i.__h; \ (pl) = __x.__i.__l; \ } while (0) #endif #if defined (__gmicro__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add.w %5,%1\n\taddx %3,%0" \ : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "%1" ((USItype)(al)), "g" ((USItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("sub.w %5,%1\n\tsubx %3,%0" \ : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "1" ((USItype)(al)), "g" ((USItype)(bl))) #define umul_ppmm(ph, pl, m0, m1) \ __asm__ ("mulx %3,%0,%1" \ : "=g" ((USItype)(ph)), "=r" ((USItype)(pl)) \ : "%0" ((USItype)(m0)), "g" ((USItype)(m1))) #define udiv_qrnnd(q, r, nh, nl, d) \ __asm__ ("divx %4,%0,%1" \ : "=g" ((USItype)(q)), "=r" ((USItype)(r)) \ : "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d))) #define count_leading_zeros(count, x) \ __asm__ ("bsch/1 %1,%0" \ : "=g" (count) : "g" ((USItype)(x)), "0" ((USItype)0)) #endif #if defined (__hppa) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rM" (ah), "rM" (bh), "%rM" (al), "rM" (bl)) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rM" (ah), "rM" (bh), "rM" (al), "rM" (bl)) #if defined (_PA_RISC1_1) #define umul_ppmm(wh, wl, u, v) \ do { \ union {UDItype __ll; \ struct {USItype __h, __l;} __i; \ } __x; \ __asm__ ("xmpyu %1,%2,%0" : "=*f" (__x.__ll) : "*f" (u), "*f" (v)); \ (wh) = __x.__i.__h; \ (wl) = __x.__i.__l; \ } while (0) #define UMUL_TIME 8 #define UDIV_TIME 60 #else #define UMUL_TIME 40 #define UDIV_TIME 80 #endif #ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { UWtype __r; \ (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \ (r) = __r; \ } while (0) extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype)); #endif /* LONGLONG_STANDALONE */ #define count_leading_zeros(count, x) \ do { \ USItype __tmp; \ __asm__ ( \ "ldi 1,%0\n" \ " extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \ " extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n" \ " ldo 16(%0),%0 ; Yes. Perform add.\n" \ " extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \ " extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n" \ " ldo 8(%0),%0 ; Yes. Perform add.\n" \ " extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \ " extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n" \ " ldo 4(%0),%0 ; Yes. Perform add.\n" \ " extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \ " extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n" \ " ldo 2(%0),%0 ; Yes. Perform add.\n" \ " extru %1,30,1,%1 ; Extract bit 1.\n" \ " sub %0,%1,%0 ; Subtract it.\n" \ : "=r" (count), "=r" (__tmp) : "1" (x)); \ } while (0) #endif /* hppa */ #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32 #define smul_ppmm(xh, xl, m0, m1) \ do { \ union {DItype __ll; \ struct {USItype __h, __l;} __i; \ } __x; \ __asm__ ("lr %N0,%1\n\tmr %0,%2" \ : "=&r" (__x.__ll) \ : "r" (m0), "r" (m1)); \ (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ } while (0) #define sdiv_qrnnd(q, r, n1, n0, d) \ do { \ union {DItype __ll; \ struct {USItype __h, __l;} __i; \ } __x; \ __x.__i.__h = n1; __x.__i.__l = n0; \ __asm__ ("dr %0,%2" \ : "=r" (__x.__ll) \ : "0" (__x.__ll), "r" (d)); \ (q) = __x.__i.__l; (r) = __x.__i.__h; \ } while (0) #endif #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addl %5,%1\n\tadcl %3,%0" \ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "%1" ((USItype)(al)), "g" ((USItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("subl %5,%1\n\tsbbl %3,%0" \ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "1" ((USItype)(al)), "g" ((USItype)(bl))) #define umul_ppmm(w1, w0, u, v) \ __asm__ ("mull %3" \ : "=a" (w0), "=d" (w1) \ : "%0" ((USItype)(u)), "rm" ((USItype)(v))) #define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\ __asm__ ("divl %4" /* stringification in K&R C */ \ : "=a" (q), "=d" (r) \ : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(dx))) /* P5 bsrl takes between 10 and 72 cycles depending where the most significant 1 bit is, hence the use of the alternatives below. bsfl is slow too, between 18 and 42 depending where the least significant 1 bit is. The faster count_leading_zeros are pressed into service via the generic count_trailing_zeros at the end of the file. */ #if defined(HAVE_HOST_CPU_i586) && HAVE_HOST_CPU_i586 || \ defined(HAVE_HOST_CPU_pentium) && HAVE_HOST_CPU_pentium /* The following should be a fixed 14 cycles or so. Some scheduling opportunities should be available between the float load/store too. This is used (with "n&-n" to get trailing zeros) in gcc 3 for __builtin_ffs and is apparently suggested by the Intel optimizing manual (don't know exactly where). gcc 2.95 or up will be best for this, so the "double" is correctly aligned on the stack. */ #define count_leading_zeros(c,n) \ do { \ union { \ double d; \ unsigned a[2]; \ } __u; \ ASSERT ((n) != 0); \ __u.d = (UWtype) (n); \ (c) = 0x3FF + 31 - (__u.a[1] >> 20); \ } while (0) #define COUNT_LEADING_ZEROS_0 (0x3FF + 31) #else /* ! pentium */ #if defined(HAVE_HOST_CPU_pentiummmx) && HAVE_HOST_CPU_pentiummmx /* The following should be a fixed 14 or 15 cycles, but possibly plus an L1 cache miss reading from __clz_tab. It's favoured over the float above so as to avoid mixing MMX and x87, since the penalty for switching between the two is about 100 cycles. The asm block sets __shift to -3 if the high 24 bits are clear, -2 for 16, -1 for 8, or 0 otherwise. This could be written equivalently as follows, but as of gcc 2.95.2 it results in conditional jumps. __shift = -(__n < 0x1000000); __shift -= (__n < 0x10000); __shift -= (__n < 0x100); The middle two sbbl and cmpl's pair, and with luck something gcc generates might pair with the first cmpl and the last sbbl. The "32+1" constant could be folded into __clz_tab[], but it doesn't seem worth making a different table just for that. */ #define count_leading_zeros(c,n) \ do { \ USItype __n = (n); \ USItype __shift; \ __asm__ ("cmpl $0x1000000, %1\n" \ "sbbl %0, %0\n" \ "cmpl $0x10000, %1\n" \ "sbbl $0, %0\n" \ "cmpl $0x100, %1\n" \ "sbbl $0, %0\n" \ : "=&r" (__shift) : "r" (__n)); \ __shift = __shift*8 + 24 + 1; \ (c) = 32 + 1 - __shift - __clz_tab[__n >> __shift]; \ } while (0) #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB #define COUNT_LEADING_ZEROS_0 31 /* n==0 indistinguishable from n==1 */ #else /* !pentiummmx */ /* On P6, gcc prior to 3.0 generates a partial register stall for __cbtmp^31, due to using "xorb $31" instead of "xorl $31", the former being 1 code byte smaller. "31-__cbtmp" is a workaround, probably at the cost of one extra instruction. Do this for "i386" too, since that means generic x86. */ #if __GNUC__ < 3 \ && (defined(HAVE_HOST_CPU_i386) && HAVE_HOST_CPU_i386 \ || defined(HAVE_HOST_CPU_i686) && HAVE_HOST_CPU_i686 \ || defined(HAVE_HOST_CPU_pentiumpro) && HAVE_HOST_CPU_pentiumpro \ || defined(HAVE_HOST_CPU_pentium2) && HAVE_HOST_CPU_pentium2 \ || defined(HAVE_HOST_CPU_pentium3) && HAVE_HOST_CPU_pentium3) #define count_leading_zeros(count, x) \ do { \ USItype __cbtmp; \ ASSERT ((x) != 0); \ __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ (count) = 31 - __cbtmp; \ } while (0) #else #define count_leading_zeros(count, x) \ do { \ USItype __cbtmp; \ ASSERT ((x) != 0); \ __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ (count) = __cbtmp ^ 31; \ } while (0) #endif #define count_trailing_zeros(count, x) \ do { \ ASSERT ((x) != 0); \ __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x))); \ } while (0) #endif /* ! pentiummmx */ #endif /* ! pentium */ #ifndef UMUL_TIME #define UMUL_TIME 10 #endif #ifndef UDIV_TIME #define UDIV_TIME 40 #endif #endif /* 80x86 */ #if defined (__x86_64__) && W_TYPE_SIZE == 64 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addq %5,%1\n\tadcq %3,%0" \ : "=r" ((UDItype)(sh)), "=&r" ((UDItype)(sl)) \ : "0" ((UDItype)(ah)), "g" ((UDItype)(bh)), \ "%1" ((UDItype)(al)), "g" ((UDItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("subq %5,%1\n\tsbbq %3,%0" \ : "=r" ((UDItype)(sh)), "=&r" ((UDItype)(sl)) \ : "0" ((UDItype)(ah)), "g" ((UDItype)(bh)), \ "1" ((UDItype)(al)), "g" ((UDItype)(bl))) #define umul_ppmm(w1, w0, u, v) \ __asm__ ("mulq %3" \ : "=a" (w0), "=d" (w1) \ : "%0" ((UDItype)(u)), "rm" ((UDItype)(v))) #define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\ __asm__ ("divq %4" /* stringification in K&R C */ \ : "=a" (q), "=d" (r) \ : "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx))) #define count_leading_zeros(count, x) \ do { \ UDItype __cbtmp; \ ASSERT ((x) != 0); \ __asm__ ("bsrq %1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x))); \ (count) = __cbtmp ^ 63; \ } while (0) /* bsfq destination must be a 64-bit register, "%q0" forces this in case count is only an int. */ #define count_trailing_zeros(count, x) \ do { \ ASSERT ((x) != 0); \ __asm__ ("bsfq %1,%q0" : "=r" (count) : "rm" ((UDItype)(x))); \ } while (0) #endif /* x86_64 */ #if defined (__i860__) && W_TYPE_SIZE == 32 #define rshift_rhlc(r,h,l,c) \ __asm__ ("shr %3,r0,r0\;shrd %1,%2,%0" \ "=r" (r) : "r" (h), "r" (l), "rn" (c)) #endif /* i860 */ #if defined (__i960__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("cmpo 1,0\;addc %5,%4,%1\;addc %3,%2,%0" \ : "=r" (sh), "=&r" (sl) \ : "dI" (ah), "dI" (bh), "%dI" (al), "dI" (bl)) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("cmpo 0,0\;subc %5,%4,%1\;subc %3,%2,%0" \ : "=r" (sh), "=&r" (sl) \ : "dI" (ah), "dI" (bh), "dI" (al), "dI" (bl)) #define umul_ppmm(w1, w0, u, v) \ ({union {UDItype __ll; \ struct {USItype __l, __h;} __i; \ } __x; \ __asm__ ("emul %2,%1,%0" \ : "=d" (__x.__ll) : "%dI" (u), "dI" (v)); \ (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) #define __umulsidi3(u, v) \ ({UDItype __w; \ __asm__ ("emul %2,%1,%0" : "=d" (__w) : "%dI" (u), "dI" (v)); \ __w; }) #define udiv_qrnnd(q, r, nh, nl, d) \ do { \ union {UDItype __ll; \ struct {USItype __l, __h;} __i; \ } __nn; \ __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ __asm__ ("ediv %d,%n,%0" \ : "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d)); \ (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ } while (0) #define count_leading_zeros(count, x) \ do { \ USItype __cbtmp; \ __asm__ ("scanbit %1,%0" : "=r" (__cbtmp) : "r" (x)); \ (count) = __cbtmp ^ 31; \ } while (0) #define COUNT_LEADING_ZEROS_0 (-32) /* sic */ #if defined (__i960mx) /* what is the proper symbol to test??? */ #define rshift_rhlc(r,h,l,c) \ do { \ union {UDItype __ll; \ struct {USItype __l, __h;} __i; \ } __nn; \ __nn.__i.__h = (h); __nn.__i.__l = (l); \ __asm__ ("shre %2,%1,%0" : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ } #endif /* i960mx */ #endif /* i960 */ #if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \ || defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \ || defined (__mc5307__)) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \ : "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \ : "0" ((USItype)(ah)), "d" ((USItype)(bh)), \ "%1" ((USItype)(al)), "g" ((USItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \ : "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \ : "0" ((USItype)(ah)), "d" ((USItype)(bh)), \ "1" ((USItype)(al)), "g" ((USItype)(bl))) /* The '020, '030, '040 and CPU32 have 32x32->64 and 64/32->32q-32r. */ #if defined (__mc68020__) || defined(mc68020) \ || defined (__mc68030__) || defined (mc68030) \ || defined (__mc68040__) || defined (mc68040) \ || defined (__mcpu32__) || defined (mcpu32) \ || defined (__NeXT__) #define umul_ppmm(w1, w0, u, v) \ __asm__ ("mulu%.l %3,%1:%0" \ : "=d" ((USItype)(w0)), "=d" ((USItype)(w1)) \ : "%0" ((USItype)(u)), "dmi" ((USItype)(v))) #define UMUL_TIME 45 #define udiv_qrnnd(q, r, n1, n0, d) \ __asm__ ("divu%.l %4,%1:%0" \ : "=d" ((USItype)(q)), "=d" ((USItype)(r)) \ : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d))) #define UDIV_TIME 90 #define sdiv_qrnnd(q, r, n1, n0, d) \ __asm__ ("divs%.l %4,%1:%0" \ : "=d" ((USItype)(q)), "=d" ((USItype)(r)) \ : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d))) #else /* for other 68k family members use 16x16->32 multiplication */ #define umul_ppmm(xh, xl, a, b) \ do { USItype __umul_tmp1, __umul_tmp2; \ __asm__ ("| Inlined umul_ppmm\n" \ " move%.l %5,%3\n" \ " move%.l %2,%0\n" \ " move%.w %3,%1\n" \ " swap %3\n" \ " swap %0\n" \ " mulu%.w %2,%1\n" \ " mulu%.w %3,%0\n" \ " mulu%.w %2,%3\n" \ " swap %2\n" \ " mulu%.w %5,%2\n" \ " add%.l %3,%2\n" \ " jcc 1f\n" \ " add%.l %#0x10000,%0\n" \ "1: move%.l %2,%3\n" \ " clr%.w %2\n" \ " swap %2\n" \ " swap %3\n" \ " clr%.w %3\n" \ " add%.l %3,%1\n" \ " addx%.l %2,%0\n" \ " | End inlined umul_ppmm" \ : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ } while (0) #define UMUL_TIME 100 #define UDIV_TIME 400 #endif /* not mc68020 */ /* The '020, '030, '040 and '060 have bitfield insns. GCC 3.4 defines __mc68020__ when in CPU32 mode, check for __mcpu32__ to exclude bfffo on that chip (bitfield insns not available). */ #if (defined (__mc68020__) || defined (mc68020) \ || defined (__mc68030__) || defined (mc68030) \ || defined (__mc68040__) || defined (mc68040) \ || defined (__mc68060__) || defined (mc68060) \ || defined (__NeXT__)) \ && ! defined (__mcpu32__) #define count_leading_zeros(count, x) \ __asm__ ("bfffo %1{%b2:%b2},%0" \ : "=d" ((USItype) (count)) \ : "od" ((USItype) (x)), "n" (0)) #define COUNT_LEADING_ZEROS_0 32 #endif #endif /* mc68000 */ #if defined (__m88000__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \ : "=r" (sh), "=&r" (sl) \ : "rJ" (ah), "rJ" (bh), "%rJ" (al), "rJ" (bl)) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \ : "=r" (sh), "=&r" (sl) \ : "rJ" (ah), "rJ" (bh), "rJ" (al), "rJ" (bl)) #define count_leading_zeros(count, x) \ do { \ USItype __cbtmp; \ __asm__ ("ff1 %0,%1" : "=r" (__cbtmp) : "r" (x)); \ (count) = __cbtmp ^ 31; \ } while (0) #define COUNT_LEADING_ZEROS_0 63 /* sic */ #if defined (__m88110__) #define umul_ppmm(wh, wl, u, v) \ do { \ union {UDItype __ll; \ struct {USItype __h, __l;} __i; \ } __x; \ __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ (wh) = __x.__i.__h; \ (wl) = __x.__i.__l; \ } while (0) #define udiv_qrnnd(q, r, n1, n0, d) \ ({union {UDItype __ll; \ struct {USItype __h, __l;} __i; \ } __x, __q; \ __x.__i.__h = (n1); __x.__i.__l = (n0); \ __asm__ ("divu.d %0,%1,%2" \ : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) #define UMUL_TIME 5 #define UDIV_TIME 25 #else #define UMUL_TIME 17 #define UDIV_TIME 150 #endif /* __m88110__ */ #endif /* __m88000__ */ #if defined (__mips) && W_TYPE_SIZE == 32 #if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 #define umul_ppmm(w1, w0, u, v) \ __asm__ ("multu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v)) #else #define umul_ppmm(w1, w0, u, v) \ __asm__ ("multu %2,%3\n\tmflo %0\n\tmfhi %1" \ : "=d" (w0), "=d" (w1) : "d" (u), "d" (v)) #endif #define UMUL_TIME 10 #define UDIV_TIME 100 #endif /* __mips */ /* copied from GMP-5.0.4 longlong.h */ #if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64 #if __GMP_GNUC_PREREQ (4,4) #define umul_ppmm(w1, w0, u, v) \ do { \ typedef unsigned int __ll_UTItype __attribute__((mode(TI))); \ __ll_UTItype __ll = (__ll_UTItype)(u) * (v); \ w1 = __ll >> 64; \ w0 = __ll; \ } while (0) #endif #if !defined (umul_ppmm) && __GMP_GNUC_PREREQ (2,7) #define umul_ppmm(w1, w0, u, v) \ __asm__ ("dmultu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v)) #endif #if !defined (umul_ppmm) #define umul_ppmm(w1, w0, u, v) \ __asm__ ("dmultu %2,%3\n\tmflo %0\n\tmfhi %1" \ : "=d" (w0), "=d" (w1) : "d" (u), "d" (v)) #endif #define UMUL_TIME 20 #define UDIV_TIME 140 #endif /* __mips */ #if defined (__ns32000__) && W_TYPE_SIZE == 32 #define umul_ppmm(w1, w0, u, v) \ ({union {UDItype __ll; \ struct {USItype __l, __h;} __i; \ } __x; \ __asm__ ("meid %2,%0" \ : "=g" (__x.__ll) \ : "%0" ((USItype)(u)), "g" ((USItype)(v))); \ (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) #define __umulsidi3(u, v) \ ({UDItype __w; \ __asm__ ("meid %2,%0" \ : "=g" (__w) \ : "%0" ((USItype)(u)), "g" ((USItype)(v))); \ __w; }) #define udiv_qrnnd(q, r, n1, n0, d) \ ({union {UDItype __ll; \ struct {USItype __l, __h;} __i; \ } __x; \ __x.__i.__h = (n1); __x.__i.__l = (n0); \ __asm__ ("deid %2,%0" \ : "=g" (__x.__ll) \ : "0" (__x.__ll), "g" ((USItype)(d))); \ (r) = __x.__i.__l; (q) = __x.__i.__h; }) #define count_trailing_zeros(count,x) \ do { \ __asm__ ("ffsd %2,%0" \ : "=r" ((USItype) (count)) \ : "0" ((USItype) 0), "r" ((USItype) (x))); \ } while (0) #endif /* __ns32000__ */ /* FIXME: We should test _IBMR2 here when we add assembly support for the system vendor compilers. */ #if (defined (_ARCH_PPC) /* AIX */ \ || defined (_ARCH_PWR) /* AIX */ \ || defined (__powerpc__) /* gcc */ \ || defined (__POWERPC__) /* BEOS */ \ || defined (__ppc__) /* Darwin */ \ || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \ || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \ && CPU_FAMILY == PPC) \ ) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (bh) && (bh) == 0) \ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ else \ __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ } while (0) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (ah) && (ah) == 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ else if (__builtin_constant_p (bh) && (bh) == 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ else \ __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ } while (0) #define count_leading_zeros(count, x) \ __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x)) #define COUNT_LEADING_ZEROS_0 32 #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \ || defined (__ppc__) \ || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \ || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \ && CPU_FAMILY == PPC) #define umul_ppmm(ph, pl, m0, m1) \ do { \ USItype __m0 = (m0), __m1 = (m1); \ __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ (pl) = __m0 * __m1; \ } while (0) #define UMUL_TIME 15 #define smul_ppmm(ph, pl, m0, m1) \ do { \ SItype __m0 = (m0), __m1 = (m1); \ __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ (pl) = __m0 * __m1; \ } while (0) #define SMUL_TIME 14 #define UDIV_TIME 120 #else #define UMUL_TIME 8 #define smul_ppmm(xh, xl, m0, m1) \ __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1)) #define SMUL_TIME 4 #define sdiv_qrnnd(q, r, nh, nl, d) \ __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d)) #define UDIV_TIME 100 #endif #endif /* 32-bit POWER architecture variants. */ /* We should test _IBMR2 here when we add assembly support for the system vendor compilers. */ #if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 64 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (bh) && (bh) == 0) \ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ else \ __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ } while (0) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (ah) && (ah) == 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ else if (__builtin_constant_p (bh) && (bh) == 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ else \ __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ } while (0) #define count_leading_zeros(count, x) \ __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x)) #define COUNT_LEADING_ZEROS_0 64 #define umul_ppmm(ph, pl, m0, m1) \ do { \ UDItype __m0 = (m0), __m1 = (m1); \ __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ (pl) = __m0 * __m1; \ } while (0) #define UMUL_TIME 15 #define smul_ppmm(ph, pl, m0, m1) \ do { \ DItype __m0 = (m0), __m1 = (m1); \ __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ (pl) = __m0 * __m1; \ } while (0) #define SMUL_TIME 14 /* ??? */ #define UDIV_TIME 120 /* ??? */ #endif /* 64-bit PowerPC. */ #if defined (__pyr__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addw %5,%1\n\taddwc %3,%0" \ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "%1" ((USItype)(al)), "g" ((USItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("subw %5,%1\n\tsubwb %3,%0" \ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "1" ((USItype)(al)), "g" ((USItype)(bl))) /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ #define umul_ppmm(w1, w0, u, v) \ ({union {UDItype __ll; \ struct {USItype __h, __l;} __i; \ } __x; \ __asm__ ("movw %1,%R0\n\tuemul %2,%0" \ : "=&r" (__x.__ll) \ : "g" ((USItype) (u)), "g" ((USItype)(v))); \ (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) #endif /* __pyr__ */ #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("a %1,%5\n\tae %0,%3" \ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ : "0" ((USItype)(ah)), "r" ((USItype)(bh)), \ "%1" ((USItype)(al)), "r" ((USItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("s %1,%5\n\tse %0,%3" \ : "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \ : "0" ((USItype)(ah)), "r" ((USItype)(bh)), \ "1" ((USItype)(al)), "r" ((USItype)(bl))) #define smul_ppmm(ph, pl, m0, m1) \ __asm__ ( \ "s r2,r2\n" \ " mts r10,%2\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " cas %0,r2,r0\n" \ " mfs r10,%1" \ : "=r" ((USItype)(ph)), "=r" ((USItype)(pl)) \ : "%r" ((USItype)(m0)), "r" ((USItype)(m1)) \ : "r2") #define UMUL_TIME 20 #define UDIV_TIME 200 #define count_leading_zeros(count, x) \ do { \ if ((x) >= 0x10000) \ __asm__ ("clz %0,%1" \ : "=r" ((USItype)(count)) : "r" ((USItype)(x) >> 16)); \ else \ { \ __asm__ ("clz %0,%1" \ : "=r" ((USItype)(count)) : "r" ((USItype)(x))); \ (count) += 16; \ } \ } while (0) #endif /* RT/ROMP */ #if defined (__sh2__) && W_TYPE_SIZE == 32 #define umul_ppmm(w1, w0, u, v) \ __asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0" \ : "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach") #define UMUL_TIME 5 #endif #if defined (__sparc__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl) \ __CLOBBER_CC) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl) \ __CLOBBER_CC) /* FIXME: When gcc -mcpu=v9 is used on solaris, gcc/config/sol2-sld-64.h doesn't define anything to indicate that to us, it only sets __sparcv8. */ #if defined (__sparc_v9__) || defined (__sparcv9) /* Perhaps we should use floating-point operations here? */ #if 0 /* Triggers a bug making mpz/tests/t-gcd.c fail. Perhaps we simply need explicitly zero-extend the inputs? */ #define umul_ppmm(w1, w0, u, v) \ __asm__ ("mulx %2,%3,%%g1; srl %%g1,0,%1; srlx %%g1,32,%0" : \ "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "g1") #else /* Use v8 umul until above bug is fixed. */ #define umul_ppmm(w1, w0, u, v) \ __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) #endif /* Use a plain v8 divide for v9. */ #define udiv_qrnnd(q, r, n1, n0, d) \ do { \ USItype __q; \ __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \ (r) = (n0) - __q * (d); \ (q) = __q; \ } while (0) #else #if defined (__sparc_v8__) /* gcc normal */ \ || defined (__sparcv8) /* gcc solaris */ /* Don't match immediate range because, 1) it is not often useful, 2) the 'I' flag thinks of the range as a 13 bit signed interval, while we want to match a 13 bit interval, sign extended to 32 bits, but INTERPRETED AS UNSIGNED. */ #define umul_ppmm(w1, w0, u, v) \ __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) #define UMUL_TIME 5 #if defined(HAVE_HOST_CPU_supersparc) && HAVE_HOST_CPU_supersparc #define UDIV_TIME 60 /* SuperSPARC timing */ #else /* Don't use this on SuperSPARC because its udiv only handles 53 bit dividends and will trap to the kernel for the rest. */ #define udiv_qrnnd(q, r, n1, n0, d) \ do { \ USItype __q; \ __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \ (r) = (n0) - __q * (d); \ (q) = __q; \ } while (0) #define UDIV_TIME 25 #endif /* HAVE_HOST_CPU_supersparc */ #else /* ! __sparc_v8__ */ #if defined (__sparclite__) /* This has hardware multiply but not divide. It also has two additional instructions scan (ffs from high bit) and divscc. */ #define umul_ppmm(w1, w0, u, v) \ __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) #define UMUL_TIME 5 #define udiv_qrnnd(q, r, n1, n0, d) \ __asm__ ("! Inlined udiv_qrnnd\n" \ " wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ " tst %%g0\n" \ " divscc %3,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%0\n" \ " rd %%y,%1\n" \ " bl,a 1f\n" \ " add %1,%4,%1\n" \ "1: ! End of inline udiv_qrnnd" \ : "=r" (q), "=r" (r) : "r" (n1), "r" (n0), "rI" (d) \ : "%g1" __AND_CLOBBER_CC) #define UDIV_TIME 37 #define count_leading_zeros(count, x) \ __asm__ ("scan %1,1,%0" : "=r" (count) : "r" (x)) /* Early sparclites return 63 for an argument of 0, but they warn that future implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0 undefined. */ #endif /* __sparclite__ */ #endif /* __sparc_v8__ */ #endif /* __sparc_v9__ */ /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ #ifndef umul_ppmm #define umul_ppmm(w1, w0, u, v) \ __asm__ ("! Inlined umul_ppmm\n" \ " wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \ " sra %3,31,%%g2 ! Don't move this insn\n" \ " and %2,%%g2,%%g2 ! Don't move this insn\n" \ " andcc %%g0,0,%%g1 ! Don't move this insn\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,0,%%g1\n" \ " add %%g1,%%g2,%0\n" \ " rd %%y,%1" \ : "=r" (w1), "=r" (w0) : "%rI" (u), "r" (v) \ : "%g1", "%g2" __AND_CLOBBER_CC) #define UMUL_TIME 39 /* 39 instructions */ #endif #ifndef udiv_qrnnd #ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { UWtype __r; \ (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \ (r) = __r; \ } while (0) extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype)); #ifndef UDIV_TIME #define UDIV_TIME 140 #endif #endif /* LONGLONG_STANDALONE */ #endif /* udiv_qrnnd */ #endif /* __sparc__ */ #if defined (__sparc__) && W_TYPE_SIZE == 64 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ( \ "addcc %r4,%5,%1\n" \ " addccc %r6,%7,%%g0\n" \ " addc %r2,%3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl), \ "%rJ" ((al) >> 32), "rI" ((bl) >> 32) \ __CLOBBER_CC) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ( \ "subcc %r4,%5,%1\n" \ " subccc %r6,%7,%%g0\n" \ " subc %r2,%3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl), \ "rJ" ((al) >> 32), "rI" ((bl) >> 32) \ __CLOBBER_CC) #endif #if defined (__vax__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \ : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "%1" ((USItype)(al)), "g" ((USItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \ : "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "1" ((USItype)(al)), "g" ((USItype)(bl))) #define smul_ppmm(xh, xl, m0, m1) \ do { \ union {UDItype __ll; \ struct {USItype __l, __h;} __i; \ } __x; \ USItype __m0 = (m0), __m1 = (m1); \ __asm__ ("emul %1,%2,$0,%0" \ : "=g" (__x.__ll) : "g" (__m0), "g" (__m1)); \ (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ } while (0) #define sdiv_qrnnd(q, r, n1, n0, d) \ do { \ union {DItype __ll; \ struct {SItype __l, __h;} __i; \ } __x; \ __x.__i.__h = n1; __x.__i.__l = n0; \ __asm__ ("ediv %3,%2,%0,%1" \ : "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d)); \ } while (0) #if 0 /* FIXME: This instruction appears to be unimplemented on some systems (vax 8800 maybe). */ #define count_trailing_zeros(count,x) \ do { \ __asm__ ("ffs 0, 31, %1, %0" \ : "=g" ((USItype) (count)) \ : "g" ((USItype) (x))); \ } while (0) #endif #endif /* __vax__ */ #if defined (__z8000__) && W_TYPE_SIZE == 16 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ : "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \ : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \ "%1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ : "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \ : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \ "1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl))) #define umul_ppmm(xh, xl, m0, m1) \ do { \ union {long int __ll; \ struct {unsigned int __h, __l;} __i; \ } __x; \ unsigned int __m0 = (m0), __m1 = (m1); \ __asm__ ("mult %S0,%H3" \ : "=r" (__x.__i.__h), "=r" (__x.__i.__l) \ : "%1" (m0), "rQR" (m1)); \ (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ (xh) += ((((signed int) __m0 >> 15) & __m1) \ + (((signed int) __m1 >> 15) & __m0)); \ } while (0) #endif /* __z8000__ */ #endif /* __GNUC__ */ #endif /* NO_ASM */ #ifdef _MSC_VER # include # if defined( _WIN64 ) # define count_leading_zeros(c,x) \ do { \ ASSERT ((x) != 0); \ _BitScanReverse64(&c, (x)); \ c = 63 - c; \ } while (0) # define count_trailing_zeros(c,x) \ do { \ ASSERT ((x) != 0); \ _BitScanForward64(&c, (x)); \ } while (0) # define umul_ppmm(xh, xl, m0, m1) \ do { \ xl = _umul128( (m0), (m1), &xh); \ } while (0) # else # define count_leading_zeros(c,x) \ do { \ ASSERT ((x) != 0); \ _BitScanReverse(&c, (x)); \ c = 31 - c; \ } while (0) # define count_trailing_zeros(c,x) \ do { \ ASSERT ((x) != 0); \ _BitScanForward(&c, (x)); \ } while (0) # define umul_ppmm(xh, xl, m0, m1) \ do { unsigned __int64 _t; \ _t = __emulu( (m0), (m1)); \ xl = _t & 0xffffffff; \ xh = _t >> 32; \ } while (0) # endif #endif #if !defined (umul_ppmm) && defined (__umulsidi3) #define umul_ppmm(ph, pl, m0, m1) \ { \ UDWtype __ll = __umulsidi3 (m0, m1); \ ph = (UWtype) (__ll >> W_TYPE_SIZE); \ pl = (UWtype) __ll; \ } #endif #if !defined (__umulsidi3) #define __umulsidi3(u, v) \ ({UWtype __hi, __lo; \ umul_ppmm (__hi, __lo, u, v); \ ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) #endif /* Note the prototypes are under !define(umul_ppmm) etc too, since the HPPA versions above are different and we don't want to conflict. */ #if ! defined (umul_ppmm) && \ defined(HAVE_NATIVE_mpn_umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm #define mpn_umul_ppmm __MPN(umul_ppmm) extern mp_limb_t mpn_umul_ppmm _PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t)); #define umul_ppmm(wh, wl, u, v) \ do { \ mp_limb_t __umul_ppmm__p0; \ (wh) = __MPN(umul_ppmm) (&__umul_ppmm__p0, \ (mp_limb_t) (u), (mp_limb_t) (v)); \ (wl) = __umul_ppmm__p0; \ } while (0) #endif #if ! defined (udiv_qrnnd) && \ defined(HAVE_NATIVE_mpn_udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd #define mpn_udiv_qrnnd __MPN(udiv_qrnnd) extern mp_limb_t mpn_udiv_qrnnd _PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t)); #define udiv_qrnnd(q, r, n1, n0, d) \ do { \ mp_limb_t __udiv_qrnnd__r; \ (q) = mpn_udiv_qrnnd (&__udiv_qrnnd__r, \ (mp_limb_t) (n1), (mp_limb_t) (n0), (mp_limb_t) d); \ (r) = __udiv_qrnnd__r; \ } while (0) #endif /* If this machine has no inline assembler, use C macros. */ #if !defined (add_ssaaaa) #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ do { \ UWtype __x; \ __x = (al) + (bl); \ (sh) = (ah) + (bh) + (__x < (al)); \ (sl) = __x; \ } while (0) #endif #if !defined (sub_ddmmss) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ do { \ UWtype __x; \ __x = (al) - (bl); \ (sh) = (ah) - (bh) - (__x > (al)); \ (sl) = __x; \ } while (0) #endif /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of smul_ppmm. */ #if !defined (umul_ppmm) && defined (smul_ppmm) #define umul_ppmm(w1, w0, u, v) \ do { \ UWtype __w1; \ UWtype __xm0 = (u), __xm1 = (v); \ smul_ppmm (__w1, w0, __xm0, __xm1); \ (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \ + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \ } while (0) #endif /* If we still don't have umul_ppmm, define it using plain C. */ #if !defined (umul_ppmm) #define umul_ppmm(w1, w0, u, v) \ do { \ UWtype __x0, __x1, __x2, __x3; \ UHWtype __ul, __vl, __uh, __vh; \ UWtype __u = (u), __v = (v); \ \ __ul = __ll_lowpart (__u); \ __uh = __ll_highpart (__u); \ __vl = __ll_lowpart (__v); \ __vh = __ll_highpart (__v); \ \ __x0 = (UWtype) __ul * __vl; \ __x1 = (UWtype) __ul * __vh; \ __x2 = (UWtype) __uh * __vl; \ __x3 = (UWtype) __uh * __vh; \ \ __x1 += __ll_highpart (__x0);/* this can't give carry */ \ __x1 += __x2; /* but this indeed can */ \ if (__x1 < __x2) /* did we get it? */ \ __x3 += __ll_B; /* yes, add it in the proper pos. */ \ \ (w1) = __x3 + __ll_highpart (__x1); \ (w0) = (__x1 << W_TYPE_SIZE/2) + __ll_lowpart (__x0); \ } while (0) #endif /* If we don't have smul_ppmm, define it using umul_ppmm (which surely will exist in one form or another. */ #if !defined (smul_ppmm) #define smul_ppmm(w1, w0, u, v) \ do { \ UWtype __w1; \ UWtype __xm0 = (u), __xm1 = (v); \ umul_ppmm (__w1, w0, __xm0, __xm1); \ (w1) = __w1 - (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \ - (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \ } while (0) #endif /* Define this unconditionally, so it can be used for debugging. */ #define __udiv_qrnnd_c(q, r, n1, n0, d) \ do { \ UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ \ ASSERT ((d) != 0); \ ASSERT ((n1) < (d)); \ \ __d1 = __ll_highpart (d); \ __d0 = __ll_lowpart (d); \ \ __q1 = (n1) / __d1; \ __r1 = (n1) - __q1 * __d1; \ __m = (UWtype) __q1 * __d0; \ __r1 = __r1 * __ll_B | __ll_highpart (n0); \ if (__r1 < __m) \ { \ __q1--, __r1 += (d); \ if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ if (__r1 < __m) \ __q1--, __r1 += (d); \ } \ __r1 -= __m; \ \ __q0 = __r1 / __d1; \ __r0 = __r1 - __q0 * __d1; \ __m = (UWtype) __q0 * __d0; \ __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ if (__r0 < __m) \ { \ __q0--, __r0 += (d); \ if (__r0 >= (d)) \ if (__r0 < __m) \ __q0--, __r0 += (d); \ } \ __r0 -= __m; \ \ (q) = (UWtype) __q1 * __ll_B | __q0; \ (r) = __r0; \ } while (0) /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through __udiv_w_sdiv (defined in libgcc or elsewhere). */ #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) #define udiv_qrnnd(q, r, nh, nl, d) \ do { \ UWtype __r; \ (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ (r) = __r; \ } while (0) #endif /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ #if !defined (udiv_qrnnd) #define UDIV_NEEDS_NORMALIZATION 1 #define udiv_qrnnd __udiv_qrnnd_c #endif #if !defined (count_leading_zeros) #define count_leading_zeros(count, x) \ do { \ UWtype __xr = (x); \ UWtype __a; \ \ if (W_TYPE_SIZE == 32) \ { \ __a = __xr < ((UWtype) 1 << 2*__BITS4) \ ? (__xr < ((UWtype) 1 << __BITS4) ? 1 : __BITS4 + 1) \ : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 + 1 \ : 3*__BITS4 + 1); \ } \ else \ { \ for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ if (((__xr >> __a) & 0xff) != 0) \ break; \ ++__a; \ } \ \ (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \ } while (0) /* This version gives a well-defined value for zero. */ #define COUNT_LEADING_ZEROS_0 (W_TYPE_SIZE - 1) #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB #endif #ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB extern const unsigned char __GMP_DECLSPEC __clz_tab[128]; #endif #if !defined (count_trailing_zeros) /* Define count_trailing_zeros using count_leading_zeros. The latter might be defined in asm, but if it is not, the C version above is good enough. */ #define count_trailing_zeros(count, x) \ do { \ UWtype __ctz_x = (x); \ UWtype __ctz_c; \ ASSERT (__ctz_x != 0); \ count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \ (count) = W_TYPE_SIZE - 1 - __ctz_c; \ } while (0) #endif #ifndef UDIV_NEEDS_NORMALIZATION #define UDIV_NEEDS_NORMALIZATION 0 #endif /* Whether udiv_qrnnd is actually implemented with udiv_qrnnd_preinv, and that hence the latter should always be used. */ #ifndef UDIV_PREINV_ALWAYS #define UDIV_PREINV_ALWAYS 0 #endif /* Give defaults for UMUL_TIME and UDIV_TIME. */ #ifndef UMUL_TIME #define UMUL_TIME 1 #endif #ifndef UDIV_TIME #define UDIV_TIME UMUL_TIME #endif ecm-6.4.4/ecm2.c0000644023561000001540000010432312106741273010213 00000000000000/* Elliptic Curve Method implementation: stage 2 routines. Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Paul Zimmermann, Alexander Kruppa, Pierrick Gaudry, Dave Newman. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include "ecm-impl.h" /* R_i <- q_i * S, 0 <= i < n, where q_i are large integers, S is a point on an elliptic curve. Uses max(bits in q_i) modular inversions (one less if max(q_i) is a power of 2). Needs up to n+2 cells in T. Returns whether factor was found or not found, factor goes into p. No error can occur. */ static int multiplyW2n (mpz_t p, point *R, curve *S, mpz_t *q, const unsigned int n, mpmod_t modulus, mpres_t u, mpres_t v, mpres_t *T, unsigned long *tot_muls, unsigned long *tot_gcds) { unsigned int i, maxbit, k; /* k is the number of values to batch invert */ unsigned int l, t, muls = 0, gcds = 0; #ifdef WANT_EXPCOST unsigned int hamweight = 0; #endif int youpi = ECM_NO_FACTOR_FOUND; mpz_t flag; /* Used as bit field, keeps track of which R[i] contain partial results */ point s; /* 2^t * S */ mpz_t signs; /* Used as bit field, i-th bit is set iff q[i]<0 */ #ifdef WANT_ASSERT mpz_t __dummy; /* used for local computations */ #endif if (n == 0) return ECM_NO_FACTOR_FOUND; /* Is S the neutral element ? */ if (mpres_is_zero (S->x, modulus) && mpres_is_zero (S->y, modulus)) { for (i = 0; i < n; i++) { mpres_set (R[i].x, S->x, modulus); mpres_set (R[i].y, S->y, modulus); } return ECM_NO_FACTOR_FOUND; } MPZ_INIT2 (flag, n); MPZ_INIT2 (signs, n); mpres_init (s.x, modulus); mpres_init (s.y, modulus); mpres_set (s.x, S->x, modulus); mpres_set (s.y, S->y, modulus); /* Set maxbit to index of highest set bit among all the q[i] */ /* Index of highest bit of q is sizeinbase(q, 2) - 1 */ maxbit = 0; for (i = 0; i < n; i++) { /* We'll first compute positive multiples and change signs later */ if (mpz_sgn (q[i]) < 0) { mpz_setbit (signs, i);; mpz_neg (q[i], q[i]); } /* Multiplier == 0? Then set result to neutral element */ if (mpz_sgn (q[i]) == 0) { mpres_set_ui (R[i].x, 0, modulus); mpres_set_ui (R[i].y, 0, modulus); } #ifdef WANT_EXPCOST else hamweight += mpz_popcount (q[i]) - 1; #endif if ((t = mpz_sizeinbase (q[i], 2) - 1) > maxbit) maxbit = t; } #ifdef WANT_EXPCOST outputf (OUTPUT_ALWAYS, "Expecting %d multiplications and %d extgcds\n", 4 * (maxbit) + 6 * hamweight - 3, maxbit + 1); /* maxbit is floor(log_2(max(q_i))) */ #endif for (t = 0; t <= maxbit && !youpi; t++) /* Examine t-th bit of the q[i] */ { /* See which values need inverting and put them into T[]. Keep number of those values in k */ k = 0; /* Will we have to double s at the end of this pass? If yes, schedule 2*s.y for inverting */ if (t < maxbit) mpres_add (T[k++], s.y, s.y, modulus); for (i = 0; i < n && !youpi; i++) if (mpz_tstbit (q[i], t)) /* If q[i] & (1< 0) mpres_mul (T[k], T[k], T[k - 1], modulus); k++; } /* If No: we'll simply set R[i] to s later on, nothing tbd here */ /* So there are k values in need of inverting, call them v[m], 0 <= m < k. */ /* Here T[m], 0 <= m < k, contains v[0]*...*v[m] */ /* Put inverse of the product of all scheduled values in T[k]*/ if (k > 0) { muls += 3 * (k - 1); gcds++; if (!mpres_invert (T[k], T[k - 1], modulus)) { /* If a factor was found, put factor in p, flag success and bail out of loop */ if (p != NULL) mpres_gcd (p, T[k - 1], modulus); youpi = ECM_FACTOR_FOUND_STEP2; break; } } /* T[k] now contains 1/(v[0]*...*v[k - 1]), T[m], 0 <= m < k, still contain v[0]*...*v[m] */ l = k - 1; for (i = n; i-- > 0; ) /* Go through the R[i] again, backwards */ if (mpz_tstbit (q[i], t)) { if (mpz_tstbit (flag, i)) { /* T[k] contains 1/(v[0]*...*v[l]) */ if (l > 0) /* need to separate the values */ { /* T[l - 1] has v[0]*...*v[l-1] */ mpres_mul (T[l], T[l - 1], T[k], modulus); /* So T[l] now has 1/v[l] == 1/(s.x - R[i].x) */ mpres_sub (u, s.x, R[i].x, modulus); mpres_mul (T[k], T[k], u, modulus); /* T[k] now has 1/(v[0]*...*v[l - 1]) */ } else { /* T[k] contains 1/v[0] */ mpres_set (T[0], T[k], modulus); } /* 1/(s.x - R[i].x) is in T[l] */ #ifdef WANT_ASSERT mpres_sub (u, s.x, R[i].x, modulus); mpres_mul (u, u, T[l], modulus); mpz_init(__dummy); mpres_get_z (__dummy, u, modulus); mpz_mod (__dummy, __dummy, modulus->orig_modulus); if (mpz_cmp_ui (__dummy, 1) != 0) outputf (OUTPUT_ERROR, "Error, (s.x - R[%d].x) * T[%d] == " "%Zd\n", i, l, __dummy); mpz_clear(__dummy); #endif mpres_sub (u, s.y, R[i].y, modulus); /* U = y2 - y1 */ mpres_mul (T[l], T[l], u, modulus); /* T[l] = (y2-y1)/(x2-x1) = lambda */ mpres_sqr (u, T[l], modulus); /* U = lambda^2 */ mpres_sub (u, u, R[i].x, modulus); /* U = lambda^2 - x1 */ mpres_sub (R[i].x, u, s.x, modulus); /* x3 = lambda^2 - x1 - x2 */ mpres_sub (u, s.x, R[i].x, modulus); /* U = x2 - x3 */ mpres_mul (u, u, T[l], modulus); /* U = lambda*(x2 - x3) */ mpres_sub (R[i].y, u, s.y, modulus); /* y3 = lambda*(x2 - x3) - y2 */ muls += 3; l--; } else /* R[i] does not contain a partial result. */ { mpres_set (R[i].x, s.x, modulus); /* Just set R[i] to s */ mpres_set (R[i].y, s.y, modulus); mpz_setbit (flag, i); /* and flag it as used */ } } if (t < maxbit) /* Double s */ { ASSERT(l==0); #ifdef WANT_ASSERT mpres_add (u, s.y, s.y, modulus); mpres_mul (u, u, T[k], modulus); mpz_init(__dummy); mpres_get_z (__dummy, u, modulus); mpz_mod (__dummy, __dummy, modulus->orig_modulus); if (mpz_cmp_ui (__dummy, 1) != 0) outputf (OUTPUT_ERROR, "Error, at t==%d, 2*s.y / (2*s.y) == %Zd\n", t, __dummy); mpz_clear(__dummy); #endif /* 1/(2*s.y) is in T[k] */ mpres_sqr (u, s.x, modulus); /* U = X^2 */ mpres_mul_ui (u, u, 3, modulus); /* U = 3*X^2 */ mpres_add (u, u, S->A, modulus); /* U = 3*X^2 + A */ mpres_mul (T[k], T[k], u, modulus); /* T = (3*X^2 + A) / (2*Y) = lambda */ mpres_sqr (u, T[k], modulus); /* U = lambda^2 */ mpres_sub (u, u, s.x, modulus); /* U = lambda^2 - X */ mpres_sub (u, u, s.x, modulus); /* U = lambda^2 - 2*X = s.x' */ mpres_sub (v, s.x, u, modulus); /* V = s.x - s.x' */ mpres_mul (v, v, T[k], modulus); /* V = lambda*(s.x - s.x') */ mpres_sub (s.y, v, s.y, modulus); /* s.y' = lambda*(s.x - s.x') - s.y */ mpres_set (s.x, u, modulus); muls += 4; } } mpres_clear (s.y, modulus); mpres_clear (s.x, modulus); mpz_clear (flag); if (tot_muls != NULL) *tot_muls += muls; if (tot_gcds != NULL) *tot_gcds += gcds; /* Now take inverse points (negative y-coordinate) where q[i] was < 0 */ for (i = 0; i < n; i++) if (mpz_tstbit (signs, i)) { mpz_neg (R[i].y, R[i].y); mpz_neg (q[i], q[i]); } mpz_clear (signs); return youpi; } /* Input: Points X[0]..X[(n+1)*m-1] T is used for temporary values and needs to have (n-1)*m+1 entries. Performs the following loop with only one gcdext, using Montgomery's trick: for (i=0;i 0. Processes neutral (zero), identical and negative points correctly. Return factor found or not (no error can occur here). */ static int addWnm (mpz_t p, point *X, curve *S, mpmod_t modulus, unsigned int m, unsigned int n, mpres_t *T, unsigned long *tot_muls, unsigned long *tot_gcds) { unsigned int k, l; int i, j; if (n == 0 || m == 0) return ECM_NO_FACTOR_FOUND; k = 0; for (i = m - 1; i >= 0; i--) /* Go through the m different lists */ for (j = n - 1; j >= 0; j--) /* Go through each list backwards */ { /* And prepare the values to be inverted */ point *X1, *X2; X1 = X + i * (n + 1) + j; X2 = X + i * (n + 1) + j + 1; /* If either element is the neutral element, nothing tbd here */ if ((mpres_is_zero (X1->x, modulus) && mpres_is_zero (X1->y, modulus)) || (mpres_is_zero (X2->x, modulus) && mpres_is_zero (X2->y, modulus))) continue; mpres_sub (T[k], X2->x, X1->x, modulus); /* Schedule X2.x - X1.x */ if (mpres_is_zero (T[k], modulus)) /* If both x-cordinates are identical */ { /* Are the points identical? Compare y coordinates: */ mpres_sub (T[k], X2->y, X1->y, modulus); if (mpres_is_zero (T[k], modulus)) { /* Yes, we need to double. Schedule 2*X[...].y */ mpres_add (T[k], X1->y, X1->y, modulus); } else /* No, they are inverses. Nothing tbd here */ { #ifdef WANT_ASSERT /* Check that the y coordinates are mutual negatives */ mpres_add (T[k], X2->y, X1->y, modulus); ASSERT (mpres_is_zero (T[k], modulus)); #endif continue; } } if (k > 0) mpres_mul (T[k], T[k], T[k - 1], modulus); k++; } /* v_m = X[i * (n + 1) + j] - X[i * (n + 1) + j + 1], 0 <= j < n, and m = i * n + j */ /* Here T[m] = v_0 * ... * v_m, 0 <= m < k */ if (k > 0 && !mpres_invert (T[k], T[k - 1], modulus)) { if (p != NULL) mpres_gcd (p, T[k - 1], modulus); if (tot_muls != NULL) (*tot_muls) += m * n - 1; if (tot_gcds != NULL) (*tot_gcds) ++; return ECM_FACTOR_FOUND_STEP2; } /* T[k] = 1/(v_0 * ... * v_m), 0 <= m < k */ l = k - 1; for (i = 0; (unsigned) i < m; i++) for (j = 0; (unsigned) j < n; j++) { point *X1, *X2; X1 = X + i * (n + 1) + j; X2 = X + i * (n + 1) + j + 1; /* Is X1 the neutral element? */ if (mpres_is_zero (X1->x, modulus) && mpres_is_zero (X1->y, modulus)) { /* Yes, set X1 to X2 */ mpres_set (X1->x, X2->x, modulus); mpres_set (X1->y, X2->y, modulus); continue; } /* Is X2 the neutral element? If so, X1 stays the same */ if (mpres_is_zero (X2->x, modulus) && mpres_is_zero (X2->y, modulus)) continue; /* Are the x-coordinates identical? */ mpres_sub (T[k + 1], X2->x, X1->x, modulus); if (mpres_is_zero (T[k + 1], modulus)) { /* Are the points inverses of each other? */ mpres_sub (T[k + 1], X2->y, X1->y, modulus); if (!mpres_is_zero (T[k + 1], modulus)) { /* Yes. Set X1 to neutral element */ mpres_set_ui (X1->x, 0, modulus); mpres_set_ui (X1->y, 0, modulus); continue; } /* No, we need to double. Restore T[k+1] */ mpres_sub (T[k + 1], X2->x, X1->x, modulus); } if (l == 0) mpz_set (T[0], T[k]); else mpres_mul (T[l], T[k], T[l - 1], modulus); /* T_l = 1/(v_0 * ... * v_l) * (v_0 * ... * v_{l-1}) = 1/v_l */ if (mpres_is_zero (T[k + 1], modulus)) /* Identical points, so double X1 */ { if (l > 0) { mpres_add (T[k + 1], X1->y, X1->y, modulus); /* T[k+1] = v_{l} */ mpres_mul (T[k], T[k], T[k + 1], modulus); /* T_k = 1/(v_0 * ... * v_l) * v_l = 1/(v_0 * ... * v_{l-1}) */ } mpres_sqr (T[k + 1], X1->x, modulus); mpres_mul_ui (T[k + 1], T[k + 1], 3, modulus); mpres_add (T[k + 1], T[k + 1], S->A, modulus); mpres_mul (T[l], T[k + 1], T[l], modulus); /* T[l] = lambda */ mpres_sqr (T[k + 1], T[l], modulus); /* T1 = lambda^2 */ mpres_sub (T[k + 1], T[k + 1], X1->x, modulus); /* T1 = lambda^2 - x1 */ mpres_sub (X1->x, T[k + 1], X2->x, modulus); /* X1.x = lambda^2 - x1 - x2 = x3 */ mpres_sub (T[k + 1], X2->x, X1->x, modulus); /* T1 = x2 - x3 */ mpres_mul (T[k + 1], T[k + 1], T[l], modulus); /* T1 = lambda*(x2 - x3) */ mpres_sub (X1->y, T[k + 1], X2->y, modulus); /* Y1 = lambda*(x2 - x3) - y2 = y3 */ } else { if (l > 0) { mpres_mul (T[k], T[k], T[k + 1], modulus); /* T_k = 1/(v_0 * ... * v_l) * v_l = 1/(v_0 * ... * v_{l-1}) */ } mpres_sub (T[k + 1], X2->y, X1->y, modulus); /* T1 = y2 - y1 */ mpres_mul (T[l], T[l], T[k + 1], modulus); /* Tl = (y2 - y1) / (x2 - x1) = lambda */ mpres_sqr (T[k + 1], T[l], modulus); /* T1 = lambda^2 */ mpres_sub (T[k + 1], T[k + 1], X1->x, modulus); /* T1 = lambda^2 - x1 */ mpres_sub (X1->x, T[k + 1], X2->x, modulus); /* X1.x = lambda^2 - x1 - x2 = x3 */ mpres_sub (T[k + 1], X2->x, X1->x, modulus); /* T1 = x2 - x3 */ mpres_mul (T[k + 1], T[k + 1], T[l], modulus); /* T1 = lambda*(x2 - x3) */ mpres_sub (X1->y, T[k + 1], X2->y, modulus); /* Y1 = lambda*(x2 - x3) - y2 = y3 */ } l--; } if (tot_muls != NULL) (*tot_muls) += 6 * m * n - 3; if (tot_gcds != NULL) (*tot_gcds) ++; return ECM_NO_FACTOR_FOUND; } /* puts in F[0..dF-1] the successive values of Dickson_{S, a} (j * d2) * s where s is a point on the elliptic curve for j == 1 mod 6, j and d1 coprime. Returns non-zero iff a factor was found (then stored in f) or an error occurred. */ int ecm_rootsF (mpz_t f, listz_t F, root_params_t *root_params, unsigned long dF, curve *s, mpmod_t modulus) { unsigned long i; unsigned long muls = 0, gcds = 0; long st; int youpi = ECM_NO_FACTOR_FOUND; listz_t coeffs; ecm_roots_state_t state; progression_params_t *params = &state.params; /* for less typing */ mpz_t t; if (dF == 0) return ECM_NO_FACTOR_FOUND; st = cputime (); /* Relative cost of point add during init and computing roots assumed =1 */ init_roots_params (params, root_params->S, root_params->d1, root_params->d2, 1.0); outputf (OUTPUT_DEVVERBOSE, "ecm_rootsF: state: nr = %d, dsieve = %d, " "size_fd = %d, S = %d, dickson_a = %d\n", params->nr, params->dsieve, params->size_fd, params->S, params->dickson_a); /* Init finite differences tables */ MPZ_INIT (t); /* t = 0 */ coeffs = init_progression_coeffs (t, params->dsieve, root_params->d2, 1, 6, params->S, params->dickson_a); mpz_clear (t); if (coeffs == NULL) /* error */ { youpi = ECM_ERROR; goto clear; } /* The highest coefficient is the same for all progressions, so set them to one for all but the first progression, later we copy the point. FIXME: can we avoid the multiplication of those points in multiplyW2n() below? */ for (i = params->S + 1; i < params->size_fd; i += params->S + 1) mpz_set_ui (coeffs[i + params->S], 1); /* Allocate memory for fd[] and T[] */ state.fd = (point *) malloc (params->size_fd * sizeof (point)); if (state.fd == NULL) { youpi = ECM_ERROR; goto exit_ecm_rootsF; } for (i = 0; i < params->size_fd; i++) { outputf (OUTPUT_TRACE, "ecm_rootsF: coeffs[%d] = %Zd\n", i, coeffs[i]); MEMORY_TAG; mpres_init (state.fd[i].x, modulus); MEMORY_TAG; mpres_init (state.fd[i].y, modulus); MEMORY_UNTAG; } state.T = (mpres_t *) malloc ((params->size_fd + 4) * sizeof (mpres_t)); if (state.T == NULL) { youpi = ECM_ERROR; goto ecm_rootsF_clearfdi; } for (i = 0 ; i < params->size_fd + 4; i++) { MEMORY_TAG; mpres_init (state.T[i], modulus); MEMORY_UNTAG; } /* Multiply fd[] = s * coeffs[] */ youpi = multiplyW2n (f, state.fd, s, coeffs, params->size_fd, modulus, state.T[0], state.T[1], state.T + 2, &muls, &gcds); if (youpi == ECM_FACTOR_FOUND_STEP2) outputf (OUTPUT_VERBOSE, "Found factor while computing coeff[] * X\n"); if (youpi == ECM_ERROR) goto clear; /* Copy the point corresponding to the highest coefficient of the first progression to the other progressions */ for (i = params->S + 1; i < params->size_fd; i += params->S + 1) { mpres_set (state.fd[i + params->S].x, state.fd[params->S].x, modulus); mpres_set (state.fd[i + params->S].y, state.fd[params->S].y, modulus); } clear_list (coeffs, params->size_fd); coeffs = NULL; if (test_verbose (OUTPUT_VERBOSE)) { unsigned int st1 = cputime (); outputf (OUTPUT_VERBOSE, "Initializing tables of differences for F took %ldms", elltime (st, st1)); outputf (OUTPUT_DEVVERBOSE, ", %lu muls and %lu extgcds", muls, gcds); outputf (OUTPUT_VERBOSE, "\n"); st = st1; muls = 0; gcds = 0; } /* Now for the actual calculation of the roots. */ for (i = 0; i < dF && !youpi;) { /* Is this a rsieve value where we computed Dickson(j * d2) * X? */ if (gcd ((unsigned long) params->rsieve, (unsigned long) params->dsieve) == 1UL) { /* Did we use every progression since the last update? */ if (params->next == params->nr) { /* Yes, time to update again */ youpi = addWnm (f, state.fd, s, modulus, params->nr, params->S, state.T, &muls, &gcds); ASSERT(youpi != ECM_ERROR); /* no error can occur in addWnm */ params->next = 0; if (youpi == ECM_FACTOR_FOUND_STEP2) outputf (OUTPUT_VERBOSE, "Found factor while computing roots of F\n"); } /* Is this a j value where we want Dickson(j * d2) * X as a root? */ if (gcd ((unsigned long) params->rsieve, root_params->d1) == 1UL) mpres_get_z (F[i++], state.fd[params->next * (params->S + 1)].x, modulus); params->next ++; } params->rsieve += 6; } clear: for (i = 0 ; i < params->size_fd + 4; i++) mpres_clear (state.T[i], modulus); free (state.T); ecm_rootsF_clearfdi: for (i = 0; i < params->size_fd; i++) { mpres_clear (state.fd[i].x, modulus); mpres_clear (state.fd[i].y, modulus); } free (state.fd); exit_ecm_rootsF: if (youpi) return youpi; /* error or factor found */ outputf (OUTPUT_VERBOSE, "Computing roots of F took %ldms", elltime (st, cputime ())); outputf (OUTPUT_DEVVERBOSE, ", %ld muls and %ld extgcds", muls, gcds); outputf (OUTPUT_VERBOSE, "\n"); return ECM_NO_FACTOR_FOUND; } /* Perform the necessary initialization to allow computation of Dickson_{S, a}(s+n*d) * P , where P is a point on the elliptic curve for successive n, where Dickson_{S, a} is the degree S Dickson polynomial with parameter a. For a == 0, Dickson_{S, a} (x) = x^S. If a factor is found during the initialisation, NULL is returned and the factor in f. If an error occurred, NULL is returned and f is -1. */ ecm_roots_state_t * ecm_rootsG_init (mpz_t f, curve *X, root_params_t *root_params, unsigned long dF, unsigned long blocks, mpmod_t modulus) { unsigned int k, phid2; unsigned long muls = 0, gcds = 0; listz_t coeffs; ecm_roots_state_t *state; progression_params_t *params; /* for less typing */ int youpi = 0; unsigned int T_inv; double bestnr; long st = 0; ASSERT (gcd (root_params->d1, root_params->d2) == 1UL); if (test_verbose (OUTPUT_VERBOSE)) st = cputime (); state = (ecm_roots_state_t *) malloc (sizeof (ecm_roots_state_t)); if (state == NULL) { mpz_set_si (f, -1); return NULL; } params = &(state->params); /* If S < 0, use degree |S| Dickson poly, otherwise use x^S */ params->dickson_a = (root_params->S < 0) ? -1 : 0; params->S = abs (root_params->S); /* Estimate the cost of a modular inversion (in unit of time per modular multiplication) */ if (modulus->repr == ECM_MOD_BASE2) T_inv = 18; else T_inv = 6; /* Guesstimate a value for the number of disjoint progressions to use */ bestnr = -(4. + T_inv) + sqrt(12. * (double) dF * (double) blocks * (T_inv - 3.) * log (2. * root_params->d1) / log (2.) - (4. + T_inv) * (4. + T_inv)); bestnr /= 6. * (double) (params->S) * log (2. * root_params->d1) / log (2.0); outputf (OUTPUT_TRACE, "ecm_rootsG_init: bestnr = %f\n", bestnr); if (bestnr < 1.) params->nr = 1; else params->nr = (unsigned int) (bestnr + .5); phid2 = eulerphi (root_params->d2); /* Round up params->nr to multiple of eulerphi(d2) */ if (phid2 > 1) params->nr = ((params->nr + (phid2 - 1)) / phid2) * phid2; params->size_fd = params->nr * (params->S + 1); outputf (OUTPUT_DEVVERBOSE, "ecm_rootsG_init: i0=%Zd, d1=%lu, d2=%lu, " "dF=%lu, blocks=%lu, S=%u, T_inv = %d, nr=%d\n", root_params->i0, root_params->d1, root_params->d2, dF, blocks, params->S, T_inv, params->nr); state->X = X; params->next = 0; params->dsieve = 1; /* We only init progressions coprime to d2, so nothing to be skipped */ params->rsieve = 0; coeffs = init_progression_coeffs (root_params->i0, root_params->d2, root_params->d1, params->nr / phid2, 1, params->S, params->dickson_a); if (coeffs == NULL) /* error */ { free (state); mpz_set_si (f, -1); return NULL; } state->fd = (point *) malloc (params->size_fd * sizeof (point)); if (state->fd == NULL) { clear_list (coeffs, params->size_fd); free (state); mpz_set_si (f, -1); return NULL; } for (k = 0; k < params->size_fd; k++) { MEMORY_TAG; mpres_init (state->fd[k].x, modulus); MEMORY_TAG; mpres_init (state->fd[k].y, modulus); MEMORY_UNTAG; } state->size_T = params->size_fd + 4; state->T = (mpres_t *) malloc (state->size_T * sizeof (mpres_t)); if (state->T == NULL) { for (k = 0; k < params->size_fd; k++) { mpres_clear (state->fd[k].x, modulus); mpres_clear (state->fd[k].y, modulus); } clear_list (coeffs, params->size_fd); free (state); mpz_set_si (f, -1); return NULL; } for (k = 0; k < state->size_T; k++) { MEMORY_TAG; mpres_init (state->T[k], modulus); MEMORY_UNTAG; } for (k = params->S + 1; k < params->size_fd; k += params->S + 1) mpz_set_ui (coeffs[k + params->S], 1); if (test_verbose (OUTPUT_TRACE)) for (k = 0; k < params->size_fd; k++) outputf (OUTPUT_TRACE, "ecm_rootsG_init: coeffs[%d] == %Zd\n", k, coeffs[k]); youpi = multiplyW2n (f, state->fd, X, coeffs, params->size_fd, modulus, state->T[0], state->T[1], state->T + 2, &muls, &gcds); if (youpi == ECM_ERROR) mpz_set_si (f, -1); /* fall through */ for (k = params->S + 1; k < params->size_fd; k += params->S + 1) { mpres_set (state->fd[k + params->S].x, state->fd[params->S].x, modulus); mpres_set (state->fd[k + params->S].y, state->fd[params->S].y, modulus); } clear_list (coeffs, params->size_fd); coeffs = NULL; if (youpi != ECM_NO_FACTOR_FOUND) /* factor found or error */ { if (youpi == ECM_FACTOR_FOUND_STEP2) outputf (OUTPUT_VERBOSE, "Found factor while computing fd[]\n"); ecm_rootsG_clear (state, modulus); /* Signal that a factor was found, or an error occurred (f=-1) */ state = NULL; } else { if (test_verbose (OUTPUT_VERBOSE)) { st = elltime (st, cputime ()); outputf (OUTPUT_VERBOSE, "Initializing table of differences for G took %ldms", st); outputf (OUTPUT_DEVVERBOSE, ", %lu muls and %lu extgcds", muls, gcds); outputf (OUTPUT_VERBOSE, "\n"); } } return state; } void ecm_rootsG_clear (ecm_roots_state_t *state, ATTRIBUTE_UNUSED mpmod_t modulus) { unsigned int k; for (k = 0; k < state->params.size_fd; k++) { mpres_clear (state->fd[k].x, modulus); mpres_clear (state->fd[k].y, modulus); } free (state->fd); for (k = 0; k < state->size_T; k++) mpres_clear (state->T[k], modulus); free (state->T); free (state); } /* Puts in G the successive values of Dickson_{S, a}(s+j*k) P where P is a point on the elliptic curve, 0<= j <= dF-1, k is the 'd' value from ecm_rootsG_init() and s is the 's' value of ecm_rootsG_init() or where a previous call to ecm_rootsG has left off. Returns non-zero iff a factor was found (then stored in f). Cannot return an error. */ int ecm_rootsG (mpz_t f, listz_t G, unsigned long dF, ecm_roots_state_t *state, mpmod_t modulus) { unsigned long i; unsigned long muls = 0, gcds = 0; int youpi = ECM_NO_FACTOR_FOUND; long st; point *fd = state->fd; /* to save typing */ progression_params_t *params = &(state->params); /* for less typing */ st = cputime (); outputf (OUTPUT_TRACE, "ecm_rootsG: dF = %lu, state: nr = %u, next = %u, " "S = %u, dsieve = %u, rsieve = %u,\n\tdickson_a = %d\n", dF, params->nr, params->next, params->S, params->dsieve, params->rsieve, params->dickson_a); for (i = 0; i < dF;) { /* Did we use every progression since the last update? */ if (params->next == params->nr) { /* Yes, time to update again */ youpi = addWnm (f, fd, state->X, modulus, params->nr, params->S, state->T, &muls, &gcds); ASSERT(youpi != ECM_ERROR); /* no error can occur in addWnm */ params->next = 0; if (youpi == ECM_FACTOR_FOUND_STEP2) { outputf (OUTPUT_VERBOSE, "Found factor while computing G[]\n"); break; } } /* Is this a root we should skip? (Take only if gcd == 1) */ if (gcd ((unsigned long) params->rsieve, (unsigned long) params->dsieve) == 1UL) { mpres_get_z (G[i++], (fd + params->next * (params->S + 1))->x, modulus); outputf (OUTPUT_TRACE, "ecm_rootsG: storing d1*%u*X = %Zd in G[%lu]\n", params->rsieve, G[i - 1], i); } params->next ++; params->rsieve ++; } outputf (OUTPUT_VERBOSE, "Computing roots of G took %ldms", elltime (st, cputime ())); outputf (OUTPUT_DEVVERBOSE, ", %lu muls and %lu extgcds", muls, gcds); outputf (OUTPUT_VERBOSE, "\n"); return youpi; } /* Find smallest i >= 0 such that f(j * d2)*X = +-f((i0 + i) * d1)*X over GF(p). If "+" holds, return 1, if "-" holds, return -1. If the correct i could not be determined (because a non-invertible residue appeared during initialisation) return 0. */ int ecm_findmatch (unsigned long *I, const unsigned long j, root_params_t *root_params, const curve *X, mpmod_t n, const mpz_t p) { const int dickson_a = root_params->S < 0 ? -1 : 0; const unsigned int S = abs (root_params->S); const unsigned int sizeT = S + 3; unsigned int k; unsigned long i; int r, sgn = 0; point iX, jX; curve Xp; /* The point and curve over GF(p) */ mpmod_t modulus; mpz_t s, t; /* temp vars */ mpres_t u, v; /* temp vars */ listz_t coeffs; point *fd; mpres_t *T; outputf (OUTPUT_RESVERBOSE, "Looking for i such that " "f((i+%Zd)*%lu)*X = f(%lu*%lu)*X\n", root_params->i0, root_params->d1, j, root_params->d2); mpmod_init (modulus, p, ECM_MOD_DEFAULT); mpz_init (s); mpz_init (t); mpres_init (u, modulus); mpres_init (v, modulus); mpres_init (Xp.x, modulus); mpres_init (Xp.y, modulus); mpres_init (Xp.A, modulus); mpres_init (iX.x, modulus); mpres_init (iX.y, modulus); mpres_init (jX.x, modulus); mpres_init (jX.y, modulus); T = malloc (sizeT * sizeof (mpres_t)); if (T == NULL) goto clear_and_exit; for (k = 0; k < sizeT; k++) mpres_init (T[k], modulus); fd = malloc ((S + 1) * sizeof (point)); if (fd == NULL) goto clear_T_and_exit; for (k = 0; k < S + 1; k++) { mpres_init (fd[k].x, modulus); mpres_init (fd[k].y, modulus); } /* Copy the parameters of the curve over Z/ZN to the curve over GF(p) */ mpres_get_z (t, X->x, n); mpres_set_z (Xp.x, t, modulus); mpres_get_z (t, X->y, n); mpres_set_z (Xp.y, t, modulus); mpres_get_z (t, X->A, n); mpres_set_z (Xp.A, t, modulus); /* We use init_progression_coeffs() to compute f(j * d2) */ mpz_set_ui (t, j); coeffs = init_progression_coeffs (t, 1UL, root_params->d2, 1U, 1U, S, dickson_a); if (coeffs == NULL) goto clear_fd_and_exit; /* Now compute f(j * d2) X */ r = multiplyW2n (NULL, &jX, &Xp, coeffs, 1U, modulus, u, v, T, NULL, NULL); clear_list (coeffs, S + 1); if (r != ECM_NO_FACTOR_FOUND) goto clear_fd_and_exit; /* We'll keep {f(j * d2) X}_x in s */ mpres_get_z (s, jX.x, modulus); outputf (OUTPUT_DEVVERBOSE, "ecm_findmatch: (f(j * d2) X)_x = %Zd\n", s); /* Now compute {f((i0 + i) d1) X}_x one at a time and put them in t, until s == t */ /* Init the progression */ coeffs = init_progression_coeffs (root_params->i0, 1UL, root_params->d1, 1U, 1U, S, dickson_a); if (coeffs == NULL) goto clear_fd_and_exit; r = multiplyW2n (NULL, fd, &Xp, coeffs, S + 1, modulus, u, v, T, NULL, NULL); clear_list (coeffs, S + 1); if (r != ECM_NO_FACTOR_FOUND) goto clear_fd_and_exit; mpres_get_z (t, fd[0].x, modulus); for (i = 0; mpz_cmp (s, t) != 0; i++) { r = addWnm (NULL, fd, &Xp, modulus, 1, S, T, NULL, NULL); if (r != ECM_NO_FACTOR_FOUND) goto clear_fd_and_exit; mpres_get_z (t, fd[0].x, modulus); } outputf (OUTPUT_DEVVERBOSE, "ecm_findmatch: i - i0 = %lu, " "{f(i * d1) X}_x = %Zd\n", i, t); /* We'll compute f(i * d1)*X and compare it to f(j * d2)*X to verify correctness of the result, and to determine whether it was f(i * d1)-f(j * d2) or f(i * d1)+f(j * d2) that found the factor */ /* We use init_progression_coeffs() to compute f(i * d1) */ mpz_add_ui (t, root_params->i0, i); coeffs = init_progression_coeffs (t, 1UL, root_params->d1, 1U, 1U, S, dickson_a); if (coeffs == NULL) goto clear_fd_and_exit; /* Now compute iX = f(i * d1)*X */ r = multiplyW2n (NULL, &iX, &Xp, coeffs, 1U, modulus, u, v, T, NULL, NULL); clear_list (coeffs, S + 1); if (r != ECM_NO_FACTOR_FOUND) goto clear_fd_and_exit; mpres_get_z (t, iX.x, modulus); if (mpz_cmp (s, t) != 0) { outputf (OUTPUT_ERROR, "ecm_findmatch: ERROR, (f(i*d1) X)_x != " "(f(j*d2) X)_x\n(f(i*d1) X)_x = %Zd\n", t); goto clear_fd_and_exit; } mpres_get_z (s, jX.y, modulus); mpres_get_z (t, iX.y, modulus); if (mpz_cmp (s, t) == 0) { *I = i; sgn = 1; } else { mpz_sub (t, p, t); if (mpz_cmp (s, t) == 0) { *I = i; sgn = -1; } else { mpz_sub (t, p, t); outputf (OUTPUT_ERROR, "ecm_findmatch: ERROR, (f(i*d1) X)_y != " "+-(f(j*d2) X)_y\n"); outputf (OUTPUT_ERROR, "(f(i*d1) X)_y = %Zd\n", t); outputf (OUTPUT_ERROR, "(f(j*d2) X)_y = %Zd\n", s); } } clear_fd_and_exit: for (k = 0; k < S + 1; k++) { mpres_clear (fd[k].x, modulus); mpres_clear (fd[k].y, modulus); } free(fd); clear_T_and_exit: for (k = 0; k < sizeT; k++) mpres_clear (T[k], modulus); free (T); clear_and_exit: mpz_clear (s); mpz_clear (t); mpres_clear (u, modulus); mpres_clear (v, modulus); mpres_clear (Xp.x, modulus); mpres_clear (Xp.y, modulus); mpres_clear (Xp.A, modulus); mpres_clear (iX.x, modulus); mpres_clear (iX.y, modulus); mpres_clear (jX.x, modulus); mpres_clear (jX.y, modulus); mpmod_clear (modulus); return sgn; } ecm-6.4.4/schoen_strass.c0000644023561000001540000012007612106741273012246 00000000000000/* Arithmetic modulo Fermat numbers. Copyright 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Alexander Kruppa, Paul Zimmermann This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include /* for abs if assertions enabled */ #include "ecm-impl.h" #include "ecm-gmp.h" #ifdef HAVE_LIMITS_H # include #else # ifndef UINT_MAX # define UINT_MAX (~(unsigned int) 0) # endif #endif /* #define DEBUG 1 #define CHECKSUM 1 */ static mpz_t gt; static int gt_inited = 0; static int radix2 = 0; unsigned int Fermat; #define CACHESIZE 512U /* a' <- a+b, b' <- a-b. */ #define ADDSUB_MOD(a, b) \ mpz_sub (gt, a, b); \ mpz_add (a, a, b); \ F_mod_gt (b, n); \ F_mod_1 (a, n); __GMP_DECLSPEC mp_limb_t __gmpn_mod_34lsub1 (mp_limb_t*, mp_size_t); /* compute remainder modulo 2^(GMP_LIMB_BITS*3/4)-1 */ #ifndef HAVE___GMPN_MOD_34LSUB1 mp_limb_t __gmpn_mod_34lsub1 (mp_limb_t *src, mp_size_t size) { mp_ptr tp; mp_limb_t r, d; ASSERT(GMP_LIMB_BITS % 4 == 0); tp = malloc (size * sizeof (mp_limb_t)); if (tp == NULL) { fprintf (stderr, "Cannot allocate memory in __gmpn_mod_34lsub1\n"); exit (1); } MPN_COPY (tp, src, size); d = ((mp_limb_t) 1 << (3 * (GMP_LIMB_BITS / 4))) - (mp_limb_t) 1; mpn_divmod_1 (&r, tp, size, d); free (tp); return r; } #endif /* RS -> RS (mod 2^n+1). If input |RS| < 2^(2*n), result |RS| < 2^(n+1) */ static inline void F_mod_1 (mpz_t RS, unsigned int n) { mp_size_t size; mp_limb_t v; size = mpz_size (RS); if ((unsigned int) size == n / GMP_NUMB_BITS + 1) { int sgn; sgn = mpz_sgn (RS); /* Remember original sign */ v = mpz_getlimbn (RS, n / GMP_NUMB_BITS); mpz_tdiv_r_2exp (RS, RS, n); /* Just a truncate. RS < 2^n. Can make RS zero and so change sgn(RS)! */ if (sgn == -1) mpz_add_ui (RS, RS, v); else mpz_sub_ui (RS, RS, v); } else if ((unsigned int) size > n / GMP_NUMB_BITS + 1) { /* Assuming |RS| < 2^(2*n) */ mpz_tdiv_q_2exp (gt, RS, n); /* |gt| < 2^n */ mpz_tdiv_r_2exp (RS, RS, n); /* |RS| < 2^n */ mpz_sub (RS, RS, gt); /* |RS| < 2^(n+1) */ } } /* R = gt (mod 2^n+1) */ static inline void F_mod_gt (mpz_t R, unsigned int n) { mp_size_t size; mp_limb_t v; size = mpz_size (gt); ASSERT(R != gt); if ((unsigned int) size == n / GMP_NUMB_BITS + 1) { int sgn; sgn = mpz_sgn (gt); v = mpz_getlimbn (gt, n / GMP_NUMB_BITS); mpz_tdiv_r_2exp (gt, gt, n); /* Just a truncate */ if (sgn == -1) mpz_add_ui (R, gt, v); else mpz_sub_ui (R, gt, v); } else if ((unsigned int) size > n / GMP_NUMB_BITS + 1) { mpz_tdiv_q_2exp (R, gt, n); mpz_tdiv_r_2exp (gt, gt, n); /* Just a truncate */ mpz_sub (R, gt, R); } else mpz_set (R, gt); } /* R = S1 * S2 (mod 2^n+1) where n is a power of 2 */ /* S1 == S2, S1 == R, S2 == R ok, but none may == gt */ static void F_mulmod (mpz_t R, mpz_t S1, mpz_t S2, unsigned int n) { int n2 = (n - 1) / GMP_NUMB_BITS + 1; /* type of _mp_size is int */ F_mod_1 (S1, n); F_mod_1 (S2, n); if (mpz_size (S1) > (unsigned) n2) { outputf (OUTPUT_ERROR, "Warning: S1 >= 2^%d after reduction, has %lu bits. " "Trying again\n", n, (unsigned long) mpz_sizeinbase (S1, 2)); F_mod_1 (S1, n); } if (mpz_size (S2) > (unsigned) n2) { outputf (OUTPUT_ERROR, "Warning: S2 >= 2^%d after reduction, has %lu bits. " "Trying again\n", n, (unsigned long) mpz_sizeinbase (S2, 2)); F_mod_1 (S2, n); } if (n >= 32768) { unsigned long k; _mpz_realloc (gt, n2 + 1); /* in case the reallocation fails, _mpz_realloc sets the value to 0 */ ASSERT_ALWAYS (mpz_cmp_ui (gt, 0) != 0); k = mpn_fft_best_k (n2, S1 == S2); mpn_mul_fft (PTR(gt), n2, PTR(S1), ABSIZ(S1), PTR(S2), ABSIZ(S2), k); MPN_NORMALIZE(PTR(gt), n2); SIZ(gt) = ((SIZ(S1) ^ SIZ(S2)) >= 0) ? n2 : -n2; F_mod_gt (R, n); return; } mpz_mul (gt, S1, S2); F_mod_gt (R, n); return; } /* R = S + sgn(S)*(2^e) */ static void mpz_absadd_2exp (mpz_t RS, unsigned int e) { mp_size_t siz, limb_idx, bit_idx; mp_limb_t cy; int sgn; limb_idx = e / GMP_NUMB_BITS; bit_idx = e % GMP_NUMB_BITS; siz = mpz_size (RS); sgn = (mpz_sgn (RS) >= 0) ? 1 : -1; if (limb_idx >= RS->_mp_alloc) /* WARNING: mpz_realloc2 does not keep the value!!! */ mpz_realloc2 (RS, (limb_idx + 1) * GMP_NUMB_BITS); /* Now RS->_mp_alloc > limb_idx) */ while (siz <= limb_idx) { RS->_mp_d[siz++] = 0; RS->_mp_size += sgn; } /* Now RS->_mp_alloc >= siz > limb_idx */ cy = mpn_add_1 (RS->_mp_d + limb_idx, RS->_mp_d + limb_idx, siz - limb_idx, ((mp_limb_t)1) << bit_idx); if (cy) { if (RS->_mp_alloc <= siz) /* WARNING: mpz_realloc2 does not keep the value!!! */ mpz_realloc2 (RS, (siz + 1) * GMP_NUMB_BITS); RS->_mp_d[siz] = 1; RS->_mp_size += sgn; } } /* R = S / 2 (mod 2^n + 1). S == gt is ok */ static void F_divby2 (mpz_t R, mpz_t S, unsigned int n) { int odd, sgn; odd = mpz_odd_p (S); sgn = mpz_sgn (S); mpz_tdiv_q_2exp (R, S, 1); if (odd) { /* We shifted out a set bit at the bottom. With negative wrap-around, that becomes -2^(n-1), so we add -2^(n-1) + 2^n+1 = 2^(n-1)+1. If |S| < 2^(n+1), |R| < 2^n + 2^(n-1) + 1 < 2^(n+1) for n > 1. */ mpz_absadd_2exp (R, n - 1); if (sgn < 0) mpz_sub_ui (R, R, 1); else mpz_add_ui (R, R, 1); } } /* RS = RS / 3 (mod 2^n + 1). RS == gt is ok */ static void F_divby3_1 (mpz_t RS, unsigned int n) { /* 2^2^m == 1 (mod 3) for m>0, thus F_m == 2 (mod 3) */ int mod, sgn; sgn = mpz_sgn (RS); mod = __gmpn_mod_34lsub1 (RS->_mp_d, mpz_size (RS)) % 3; if (mod == 1) { /* Add F_m. If |RS| < 2^(n+1), |RS|+F_m < 3*2^n+1 */ mpz_absadd_2exp (RS, n); if (sgn >= 0) mpz_add_ui (RS, RS, 1); else mpz_sub_ui (RS, RS, 1); } else if (mod == 2) { /* Add 2 * F_m. If |RS| < 2^(n+1), |RS|+2*F_m < 4*2^n+2 */ mpz_absadd_2exp (RS, n + 1); if (sgn >= 0) mpz_add_ui (RS, RS, 2); else mpz_sub_ui (RS, RS, 2); } mpz_divby3_1op (RS); /* |RS| < (4*2^n+2)/3 < 2^(n+1) */ } static void F_divby5_1 (mpz_t RS, unsigned int n) { /* 2^2^m == 1 (mod 5) for m>1, thus F_m == 2 (mod 5) */ int mod, sgn; sgn = mpz_sgn (RS); mod = __gmpn_mod_34lsub1 (RS->_mp_d, mpz_size (RS)) % 5; if (mod == 1) { /* Add 2 * F_m == 4 (mod 5) */ mpz_absadd_2exp (RS, n + 1); if (sgn == 1) mpz_add_ui (RS, RS, 2); else mpz_sub_ui (RS, RS, 2); } else if (mod == 2) { /* Add 4 * F_m == 3 (mod 5) */ mpz_absadd_2exp (RS, n + 2); if (sgn == 1) mpz_add_ui (RS, RS, 4); else mpz_sub_ui (RS, RS, 4); } else if (mod == 3) { /* Add F_m == 3 (mod 5) */ mpz_absadd_2exp (RS, n); if (sgn == 1) mpz_add_ui (RS, RS, 1); else mpz_sub_ui (RS, RS, 1); } else if (mod == 4) { /* Add 3 * F_m == 1 (mod 5) */ mpz_absadd_2exp (RS, n); mpz_absadd_2exp (RS, n + 1); if (sgn == 1) mpz_add_ui (RS, RS, 3); else mpz_sub_ui (RS, RS, 3); } ASSERT(mpz_divisible_ui_p (RS, 5)); mpz_divexact_ui (RS, RS, 5); } /* A 2^(m+2) length convolution is possible: (2^(3n/4) - 2^(n/4))^2 == 2 (mod 2^n+1) so we have an element of order 2^(m+2) of simple enough form to use it as a root of unity the transform */ /* Multiply by sqrt(2)^e (mod F_m). n = 2^m */ /* R = (S * sqrt(2)^e) % (2^n+1) */ /* R == S is ok, but neither must be == gt */ /* Assumes abs(e) < 4*n */ static void F_mul_sqrt2exp (mpz_t R, mpz_t S, int e, unsigned int n) { int chgsgn = 0, odd; ASSERT(S != gt); ASSERT(R != gt); ASSERT((unsigned) abs (e) < 4 * n); if (e < 0) e += 4 * n; /* 0 <= e < 4*n */ if ((unsigned) e >= 2 * n) /* sqrt(2)^(2*n) == -1 (mod F_m), so */ { e -= 2 * n; /* sqrt(2)^e == -sqrt(2)^(e-2*n) (mod F_m) */ chgsgn = 1; } /* Now e < 2*n */ #ifdef DEBUG_PERF if (e == 0) outputf (OUTPUT_ALWAYS, "F_mul_sqrt2exp: called for trivial case %s1\n", chgsgn ? "-" : ""); #endif odd = e & 1; e >>= 1; if (odd) { /* Multiply by sqrt(2) == 2^(3n/4) - 2^(n/4) */ /* S * (2^(3n/4) - 2^(n/4)) == 2^(n/4) * (S*2^(n/2) - S) */ mpz_mul_2exp (gt, S, n / 2); mpz_sub (gt, gt, S); mpz_tdiv_q_2exp (R, gt, n / 4 * 3); mpz_tdiv_r_2exp (gt, gt, n / 4 * 3); mpz_mul_2exp (gt, gt, n / 4); mpz_sub (R, gt, R); if (e != 0) { mpz_tdiv_q_2exp (gt, R, n-e); mpz_tdiv_r_2exp (R, R, n-e); mpz_mul_2exp (R, R, e); mpz_sub (R, R, gt); } } else if (e != 0) { /* S = a*2^(n-e) + b, b < 2^(n-e) */ /* S*2^e = a*2^n + b*2^e = b*2^e - a */ /* b*2^e < 2^(n-e)*2^e = 2^n */ mpz_tdiv_q_2exp (gt, S, n - e); /* upper e bits (=a) into gt */ mpz_tdiv_r_2exp (R, S, n - e); /* lower n-e bits (=b) into R */ /* This is simply a truncate if S == R */ mpz_mul_2exp (R, R, e); /* R < 2^n */ mpz_sub (R, R, gt); } else mpz_set (R, S); if (chgsgn) mpz_neg (R, R); } /* Same, but input may be gt. Input and output must not be identical */ static void F_mul_sqrt2exp_2 (mpz_t R, mpz_t S, int e, unsigned int n) { int chgsgn = 0, odd; ASSERT (S != R); ASSERT (R != gt); ASSERT ((unsigned) abs (e) < 4 * n); if (e < 0) e += 4 * n; if ((unsigned) e >= 2 * n) /* sqrt(2)^(2*n) == -1 (mod F_m), so */ { e -= 2 * n; /* sqrt(2)^e == -sqrt(2)^(e-2*n) (mod F_m) */ chgsgn = 1; } /* Now e < 2*n */ #ifdef DEBUG_PERF if (e == 0) outputf (OUTPUT_ALWAYS, "F_mul_sqrt2exp_2: called for trivial case %s1\n", chgsgn ? "-" : ""); #endif odd = e & 1; e >>= 1; if (odd != 0) { mpz_set (R, S); /* Neccessary? n/32 mov*/ mpz_mul_2exp (gt, S, n / 2); /* May overwrite S n/32 mov */ mpz_sub (gt, gt, R); /* n/32 sub*/ mpz_tdiv_q_2exp (R, gt, n / 4 * 3); /* 3*(n/32)/4 mov */ mpz_tdiv_r_2exp (gt, gt, n / 4 * 3); /* Just a truncate */ mpz_mul_2exp (gt, gt, n / 4); /* 3*(n/32)/4 mov */ mpz_sub (R, gt, R); /* (n/32)/4 sub, 3*(n/32)/4 mov */ if (e != 0) { mpz_tdiv_q_2exp (gt, R, n - e); mpz_tdiv_r_2exp (R, R, n - e); mpz_mul_2exp (R, R, e); mpz_sub (R, R, gt); } } else if (e != 0) { mpz_tdiv_q_2exp (R, S, n - e); /* upper e bits into R */ mpz_tdiv_r_2exp (gt, S, n - e); /* lower n-e bits into gt */ mpz_mul_2exp (gt, gt, e); mpz_sub (R, gt, R); } else mpz_set (R, S); if (chgsgn == -1) mpz_neg (R, R); } #define A0s A[0] #define A1s A[l << stride2] #define A2s A[2 * l << stride2] #define A3s A[3 * l << stride2] #define A0is A[i << stride2] #define A1is A[(i + l) << stride2] #define A2is A[(i + 2 * l) << stride2] #define A3is A[(i + 3 * l) << stride2] /* Decimation-in-frequency FFT. Unscrambled input, scrambled output. */ /* Elements are (mod 2^n+1), l and n must be powers of 2, l must be <= 4*n. */ /* Performs forward transform */ static void F_fft_dif (mpz_t *A, int l, int stride2, int n) { int i, omega = (4 * n) / l, iomega; if (l <= 1) return; ASSERT((4 * n) % l == 0); if (l == 2) { ADDSUB_MOD(A[0], A[1< 1) { F_fft_dif (A, l, stride2, n); F_fft_dif (A + (l << stride2), l, stride2, n); F_fft_dif (A + (2 * l << stride2), l, stride2, n); F_fft_dif (A + (3 * l << stride2), l, stride2, n); } return; } l /= 2; ADDSUB_MOD(A[0], A1s); for (i = 1, iomega = omega; i < l; i++, iomega += omega) { mpz_sub (gt, A0is, A1is); mpz_add (A0is, A0is, A1is); F_mul_sqrt2exp_2 (A1is, gt, iomega, n); F_mod_1 (A0is, n); } F_fft_dif (A, l, stride2, n); F_fft_dif (A + (l << stride2), l, stride2, n); } /* Decimation-in-time inverse FFT. Scrambled input, unscrambled output */ /* Does not perform divide-by-length. l, and n as in F_fft_dif() */ static void F_fft_dit (mpz_t *A, int l, int stride2, int n) { int i, omega = (4 * n) / l, iomega; if (l <= 1) return; ASSERT((4 * n) % l == 0); if (l == 2) { ADDSUB_MOD(A[0], A[1< 1) { F_fft_dit (A, l, stride2, n); F_fft_dit (A + (l << stride2), l, stride2, n); F_fft_dit (A + (2 * l << stride2), l, stride2, n); F_fft_dit (A + (3 * l << stride2), l, stride2, n); } mpz_sub (gt, A3s, A1s); /* gt = -(a1 - a3) */ mpz_add (A1s, A1s, A3s); /* A1 = a1 + a3 */ F_mul_sqrt2exp_2 (A3s, gt, n, n); /* A3 = i * -(a1 - a3) */ mpz_sub (gt, A[0], A2s); /* gt = a0 - a2 */ mpz_add (A[0], A[0], A2s); /* A0 = a0 + a2 */ mpz_sub (A2s, A[0], A1s); /* A2 = a0 - a1 + a2 - a3 */ mpz_add (A[0], A[0], A1s); /* A0 = a0 + a1 + a2 + a3 */ mpz_add (A1s, gt, A3s); /* A1 = a0 - a2 + i * -(a1 - a3) */ mpz_sub (A3s, gt, A3s); /* A3 = a0 - a2 - i * -(a1 - a3) */ for (i = 1, iomega = omega; i < l; i++, iomega += omega) { /* Divide by omega^i. Since sqrt(2)^(4*n) == 1 (mod 2^n+1), this is like multiplying by omega^(4*n-i) */ F_mul_sqrt2exp (A1is, A1is, 4 * n - iomega, n); F_mul_sqrt2exp (A2is, A2is, 4 * n - 2 * iomega, n); F_mul_sqrt2exp (A3is, A3is, 4 * n - 3 * iomega, n); mpz_sub (gt, A3is, A1is); mpz_add (A1is, A1is, A3is); F_mul_sqrt2exp_2 (A3is, gt, n, n); mpz_sub (gt, A0is, A2is); mpz_add (A0is, A0is, A2is); mpz_sub (A2is, A0is, A1is); mpz_add (A0is, A0is, A1is); mpz_add (A1is, gt, A3is); mpz_sub (A3is, gt, A3is); if (1) { F_mod_1 (A0is, n); F_mod_1 (A1is, n); F_mod_1 (A2is, n); F_mod_1 (A3is, n); } } return; } l /= 2; F_fft_dit (A, l, stride2, n); F_fft_dit (A + (l << stride2), l, stride2, n); ADDSUB_MOD(A[0], A1s); for (i = 1, iomega = 4*n - omega; i < l; i++, iomega -= omega) { F_mul_sqrt2exp (A1is, A1is, iomega, n); mpz_sub (gt, A0is, A1is); mpz_add (A0is, A0is, A1is); F_mod_gt (A1is, n); F_mod_1 (A0is, n); } } #define A0 A[i] #define A1 A[l+i] #define A2 A[2*l+i] #define A3 A[3*l+i] #define B0 B[i] #define B1 B[l+i] #define B2 B[2*l+i] #define B3 B[3*l+i] #define C0 C[i] #define C1 C[l+i] #define C2 C[2*l+i] #define C3 C[3*l+i] #define C4 C[4*l+i] #define C5 C[5*l+i] #define C6 C[6*l+i] #define C7 C[7*l+i] #define t0 t[i] #define t1 t[l+i] #define t2 t[2*l+i] #define t3 t[3*l+i] #define t4 t[4*l+i] #define t5 t[5*l+i] static unsigned int F_toomcook4 (mpz_t *C, mpz_t *A, mpz_t *B, unsigned int len, unsigned int n, mpz_t *t) { unsigned int l, i, r; ASSERT(len % 4 == 0); l = len / 4; if (A == B) /* Squaring. The interpolation could probably be optimized, too */ { for (i = 0; i < l; i++) { /*** Evaluate A(2), A(-2), 8*A(1/2) ***/ mpz_mul_2exp (t0, A0, 1); mpz_add (t0, t0, A1); mpz_mul_2exp (t0, t0, 1); mpz_add (t0, t0, A2); mpz_mul_2exp (t0, t0, 1); mpz_add (t0, t0, A3); /* t[0 .. l-1] = 8*A(1/2) < 15*N */ F_mod_1 (t0, n); mpz_mul_2exp (t2, A3, 2); mpz_add (t2, t2, A1); mpz_mul_2exp (t2, t2, 1); /* t[2l .. 3l-1] = 8*A_3 + 2*A_1 */ mpz_mul_2exp (gt, A2, 2); mpz_add (gt, gt, A0); /* gt = 4*A_2 + A0 */ mpz_sub (t4, gt, t2); /* t[4l .. 5l-1] = A(-2) */ mpz_add (t2, t2, gt); /* t[2l .. 3l-1] = A(2) */ F_mod_1 (t4, n); F_mod_1 (t2, n); /* Evaluate A(1), A(-1) */ mpz_add (C2, A0, A2); /* May overwrite A2 */ mpz_add (gt, A1, A3); mpz_sub (C4, C2, gt); /* C4 = A(-1) */ mpz_add (C2, C2, gt); /* C2 = A(1) < 4*N */ F_mod_1 (C2, n); F_mod_1 (C4, n); } /* A0 A1 A2 A3 */ /* A0 A(1) A3 A(-1) */ /* C0 C1 C2 C3 C4 C5 C6 C7 */ r = F_mul (t, t, t, l, DEFAULT, n, t + 6 * l); /* t0 = (8*A(1/2)) ^ 2 = 64*C(1/2) */ r += F_mul (t + 2 * l, t + 2 * l, t + 2 * l, l, DEFAULT, n, t + 6 * l); /* t2 = A(2) ^ 2 = C(2) */ r += F_mul (t + 4 * l, t + 4 * l, t + 4 * l, l, DEFAULT, n, t + 6 * l); /* t4 = A(-2) ^ 2 = C(-2) */ r += F_mul (C, A, A, l, DEFAULT, n, t + 6 * l); /* C0 = A(0) ^ 2 = C(0) */ r += F_mul (C + 6 * l, A + 3 * l, A + 3 * l, l, DEFAULT, n, t + 6 * l); /* C6 = A(inf) ^ 2 = C(inf) */ r += F_mul (C + 2 * l, C + 2 * l, C + 2 * l, l, DEFAULT, n, t + 6 * l); /* C2 = A(1) ^ 2 = C(1). May overwrite A3 */ r += F_mul (C + 4 * l, C + 4 * l, C + 4 * l, l, DEFAULT, n, t + 6 * l); /* C4 = A(-1) ^ 2 = C(-1) */ } else /* Multiply */ { for (i = 0; i < l; i++) { /*** Evaluate A(2), A(-2), 8*A(1/2) ***/ mpz_mul_2exp (t0, A0, 1); mpz_add (t0, t0, A1); mpz_mul_2exp (t0, t0, 1); mpz_add (t0, t0, A2); mpz_mul_2exp (t0, t0, 1); mpz_add (t0, t0, A3); /* t[0 .. l-1] = 8*A(1/2) < 15*N */ F_mod_1 (t0, n); mpz_mul_2exp (t2, A3, 2); mpz_add (t2, t2, A1); mpz_mul_2exp (t2, t2, 1); /* t[2l .. 3l-1] = 8*A_3 + 2*A_1 */ mpz_mul_2exp (gt, A2, 2); mpz_add (gt, gt, A0); /* gt = 4*A_2 + A0 */ mpz_sub (t4, gt, t2); /* t[4l .. 5l-1] = A(-2) */ mpz_add (t2, t2, gt); /* t[2l .. 3l-1] = A(2) */ F_mod_1 (t4, n); F_mod_1 (t2, n); /*** Evaluate B(2), B(-2), 8*B(1/2) ***/ mpz_mul_2exp (t1, B0, 1); mpz_add (t1, t1, B1); mpz_mul_2exp (t1, t1, 1); mpz_add (t1, t1, B2); mpz_mul_2exp (t1, t1, 1); mpz_add (t1, t1, B3); /* t[l .. 2l-1] = 8*B(1/2) */ F_mod_1 (t1, n); mpz_mul_2exp (t3, B3, 2); mpz_add (t3, t3, B1); mpz_mul_2exp (t3, t3, 1); /* t[3l .. 4l-1] = 8*B_3 + 2*B_1 */ mpz_mul_2exp (gt, B2, 2); mpz_add (gt, gt, B0); /* gt = 4*B_2 + B0 */ mpz_sub (t5, gt, t3); /* t[5l .. 6l-1] = B(-2) */ mpz_add (t3, t3, gt); /* t[3l .. 4l-1] = B(2) */ F_mod_1 (t5, n); F_mod_1 (t3, n); /* Evaluate A(1), A(-1) */ mpz_add (C2, A0, A2); /* May overwrite A2 */ #undef A2 mpz_add (gt, A1, A3); mpz_set (C1, B0); /* C1 = B(0) May overwrite A1 */ #undef A1 mpz_sub (C4, C2, gt); /* C4 = A(-1). May overwrite B0 */ #undef B0 mpz_add (C2, C2, gt); /* C2 = A(1) < 4*N */ F_mod_1 (C2, n); F_mod_1 (C4, n); /* Evaluate B(1), B(-1) */ mpz_add (gt, C1, B2); /* B0 is in C1 */ mpz_set (C6, A3); /* C6 = A(inf) May overwrite B2 */ #undef B2 mpz_add (C3, B1, B3); /* May overwrite A3 */ #undef A3 mpz_sub (C5, gt, C3); /* C5 = B(-1). May overwrite B1 */ #undef B1 mpz_add (C3, gt, C3); /* C3 = B(1) */ F_mod_1 (C3, n); F_mod_1 (C5, n); } /* A0 A1 A2 A3 B0 B1 B2 B3 */ /* A0 B0 A(1) B(1) A(-1) B(-1) A3 B3 */ /* C0 C1 C2 C3 C4 C5 C6 C7 */ r = F_mul (t, t, t + l, l, DEFAULT, n, t + 6 * l); /* t0 = 8*A(1/2) * 8*B(1/2) = 64*C(1/2) */ r += F_mul (t + 2 * l, t + 2 * l, t + 3 * l, l, DEFAULT, n, t + 6 * l); /* t2 = A(2) * B(2) = C(2) */ r += F_mul (t + 4 * l, t + 4 * l, t + 5 * l, l, DEFAULT, n, t + 6 * l); /* t4 = A(-2) * B(-2) = C(-2) */ r += F_mul (C, A, C + l, l, DEFAULT, n, t + 6 * l); /* C0 = A(0)*B(0) = C(0) */ r += F_mul (C + 2 * l, C + 2 * l, C + 3 * l, l, DEFAULT, n, t + 6 * l); /* C2 = A(1)*B(1) = C(1) */ r += F_mul (C + 4 * l, C + 4 * l, C + 5 * l, l, DEFAULT, n, t + 6 * l); /* C4 = A(-1)*B(-1) = C(-1) */ r += F_mul (C + 6 * l, C + 6 * l, B + 3 * l, l, DEFAULT, n, t + 6 * l); /* C6 = A(inf)*B(inf) = C(inf) */ } /* C(0) C(1) C(-1) C(inf) 64*C(1/2) C(2) C(-2) */ /* C0,C1 C2,C3 C4,C5 C6,C7 t0,t1 t2,t3 t4,t5 */ for (i = 0; i < 2 * l - 1; i++) { mpz_add (t0, t0, t2); /* t0 = 65 34 20 16 20 34 65 */ mpz_sub (gt, C2, C4); /* gt = 2*C_odd(1) = 0 2 0 2 0 2 0 */ mpz_add (C2, C2, C4); /* C2 = 2*C_even(1) = 2 0 2 0 2 0 2 */ F_divby2 (C2, C2, n); /* C2 = C_even(1) */ mpz_add (C4, t2, t4); /* C4 = 2*C_even(2) */ F_divby2 (C4, C4, n); /* C4 = C_even(2) */ mpz_sub (t4, t2, t4); /* t4 = 2*C_odd(2) */ F_divby2 (t4, t4, n); F_divby2 (t4, t4, n); /* t4 = C_odd(2)/2 = C_1 + 4*C_3 + 16*C_5 */ F_divby2 (t2, gt, n); /* t2 = C_odd(1) */ mpz_sub (t0, t0, gt); /* t0 = 65 32 20 14 20 32 65 */ mpz_mul_2exp (gt, gt, 4); mpz_sub (t0, t0, gt); /* t0 = 65 0 20 -18 20 0 65 */ mpz_add (gt, C0, C6); /* gt = C_0 + C_6 */ mpz_sub (C2, C2, gt); /* C2 = C_2 + C_4 */ mpz_sub (t0, t0, gt); /* t0 = 64 0 20 -18 20 0 64 */ mpz_mul_2exp (gt, gt, 5); /* gt = 32*C_0 + 32*C_6 */ F_divby2 (t0, t0, n); /* t0 = 32 0 10 -9 10 0 32 */ mpz_sub (t0, t0, gt); /* t0 = 0 0 10 -9 10 0 0 */ mpz_sub (t0, t0, C2); /* t0 = 0 0 9 -9 9 0 0 */ F_divby3_1 (t0, n); F_divby3_1 (t0, n); /* t0 = 0 0 1 -1 1 0 0 */ mpz_sub (t0, C2, t0); /* t0 = C_3 */ mpz_sub (t2, t2, t0); /* t2 = C_1 + C_5 */ mpz_mul_2exp (gt, t0, 2); /* gt = 4*C_3 */ mpz_sub (t4, t4, gt); /* t4 = C_1 + 16*C_5 */ mpz_sub (t4, t4, t2); /* t4 = 15*C_5 */ F_divby3_1 (t4, n); F_divby5_1 (t4, n); /* t4 = C_5 */ mpz_sub (t2, t2, t4); /* t2 = C_1 */ mpz_sub (C4, C4, C0); /* C4 = 4*C_2 + 16*C_4 + 64*C_6 */ F_divby2 (C4, C4, n); F_divby2 (C4, C4, n); /* C4 = C_2 + 4*C_4 + 16*C_6 */ mpz_mul_2exp (gt, C6, 4); mpz_sub (C4, C4, gt); /* C4 = C_2 + 4*C_4 */ mpz_sub (C4, C4, C2); /* C4 = 3*C_4 */ F_divby3_1 (C4, n); /* C4 = C_4 */ mpz_sub (C2, C2, C4); /* C2 = C_2 */ } for (i = 0; i < l - 1; i++) { mpz_add (C1, C1, t2); F_mod_1 (C1, n); } mpz_set (C1, t2); F_mod_1 (C1, n); for (i = l; i < 2 * l - 1; i++) { mpz_add (C1, C1, t2); F_mod_1 (C1, n); } for (i = 0; i < l - 1; i++) { mpz_add (C3, C3, t0); F_mod_1 (C3, n); } mpz_set (C3, t0); F_mod_1 (C3, n); for (i = l; i < 2 * l - 1; i++) { mpz_add (C3, C3, t0); F_mod_1 (C3, n); } for (i = 0; i < l - 1; i++) { mpz_add (C5, C5, t4); F_mod_1 (C5, n); } mpz_set (C5, t4); F_mod_1 (C5, n); for (i = l; i < 2 * l - 1; i++) { mpz_add (C5, C5, t4); F_mod_1 (C5, n); } return r; } /* Karatsuba split. Calls F_mul() to multiply the three pieces. */ static unsigned int F_karatsuba (mpz_t *R, mpz_t *A, mpz_t *B, unsigned int len, unsigned int n, mpz_t *t) { unsigned int i, r; ASSERT(len % 2 == 0); len /= 2; if (A == B) /* Squaring */ { r = F_mul (t, A, A + len, len, DEFAULT, n, t + 2 * len); /* A0 * A1 */ r += F_mul (R + 2 * len, A + len, A + len, len, DEFAULT, n, t + 2 * len); /* A1^2 */ r += F_mul (R, A, A, len, DEFAULT, n, t + 2 * len); /* A0^2 */ for (i = 0; i < 2 * len - 1; i++) { mpz_mul_2exp (t[i], t[i], 1); mpz_add (R[i + len], R[i + len], t[i]); /* i==len could be a mpz_set */ } return r; } for (i = 0; i < len; i++) { mpz_add (t[i], A[i], A[i + len]); /* t0 = A0 + A1 */ mpz_add (t[i + len], B[i], B[i + len]); /* t1 = B0 + B1 */ } r = F_mul (t, t, t + len, len, DEFAULT, n, t + 2 * len); /* t[0...2*len-1] = (A0+A1) * (B0+B1) = A0*B0 + A0*B1 + A1*B0 + A1*B1 */ if (R != A) { r += F_mul (R, A, B, len, DEFAULT, n, t + 2 * len); /* R[0...2*len-1] = A0 * B0 */ r += F_mul (R + 2 * len, A + len, B + len, len, DEFAULT, n, t + 2 * len); /* R[2*len...4*len-1] = A1 * B1, may overwrite B */ } else if (R + 2 * len != B) { r += F_mul (R + 2 * len, A + len, B + len, len, DEFAULT, n, t + 2 * len); /* R[2*len...4*len-1] = A1 * B1 */ r += F_mul (R, A, B, len, DEFAULT, n, t + 2 * len); /* R[0...2*len-1] = A0 * B0, overwrites A */ } else /* R == A && R + 2*len == B */ { for (i = 0; i < len; i++) { /* mpz_swap instead? Perhaps undo later? Or interface for F_mul to specify separate result arrays for high/low half? */ mpz_set (gt, A[len + i]); /* Swap A1 and B0 */ mpz_set (A[len + i], B[i]); mpz_set (B[i], gt); } r += F_mul (R, R, R + len, len, DEFAULT, n, t + 2 * len); /* R[0...2*len-1] = A0 * B0, overwrites A */ r += F_mul (R + 2 * len, R + 2 * len, R + 3 * len, len, DEFAULT, n, t + 2 * len); /* R[2*len...4*len-1] = A1 * B1, overwrites B */ } /* R[0...2*len-2] == A0*B0, R[2*len-1] == 0 */ /* R[2*len...3*len-2] == A1*B1, R[4*len-1] == 0 */ /* t[0...2*len-2] == (A0+A1)*(B0+B1), t[2*len-1] == 0 */ /* We're doing indices i and i+len in one loop on the assumption that 6 residues will probably fit into cache. After all, Karatsuba is only called for smallish F_m. This way, the final add R[i+len] += t[i] can be done inside the same loop and we need only one pass over main memory. */ for (i = 0; i < len - 1; i++) { mpz_sub (t[i], t[i], R[i]); /* t = A0*B1 + A1*B0 + A1*B1 */ mpz_sub (t[i], t[i], R[i + 2 * len]); /* t = A0*B1 + A1*B0 */ mpz_sub (t[i + len], t[i + len], R[i + len]); mpz_sub (t[i + len], t[i + len ], R[i + 3 * len]); mpz_add (R[i + len], R[i + len], t[i]); mpz_add (R[i + 2 * len], R[i + 2 * len], t[i + len]); } mpz_sub (t[len - 1], t[len - 1], R[len - 1]); mpz_sub (R[2 * len - 1], t[len - 1], R[3 * len - 1]); return r; } /* Multiply two polynomials with coefficients modulo 2^(2^m)+1. */ /* len is length (=degree+1) of polynomials and must be a power of 2. */ /* n=2^m */ /* Return value: number of multiplies performed, or UINT_MAX in case of error */ unsigned int F_mul (mpz_t *R, mpz_t *A, mpz_t *B, unsigned int len, int parameter, unsigned int n, mpz_t *t) { unsigned int i, r=0; unsigned int transformlen = (parameter == NOPAD) ? len : 2 * len; #ifdef CHECKSUM mpz_t chksum1, chksum_1, chksum0, chksuminf; #endif /* Handle trivial cases */ if (len == 0) return 0; if (!gt_inited) { mpz_init2 (gt, 2 * n); gt_inited = 1; } if (len == 1) { if (parameter == MONIC) { /* (x + a0)(x + b0) = x^2 + (a0 + b0)x + a0*b0 */ mpz_add (gt, A[0], B[0]); F_mod_gt (t[0], n); F_mulmod (R[0], A[0], B[0], n); /* May overwrite A[0] */ mpz_set (R[1], t[0]); /* May overwrite B[0] */ /* We don't store the leading 1 monomial in the result poly */ } else { F_mulmod (R[0], A[0], B[0], n); /* May overwrite A[0] */ mpz_set_ui (R[1], 0); /* May overwrite B[0] */ } return 1; } #ifdef CHECKSUM mpz_init2 (chksum1, n+64); mpz_init2 (chksum_1, n+64); mpz_init2 (chksum0, n+64); mpz_init2 (chksuminf, n+64); mpz_set_ui (gt, 0); for (i = 0; i < len; i++) { /* Compute A(1) and B(1) */ mpz_add (chksum1, chksum1, A[i]); mpz_add (gt, gt, B[i]); /* Compute A(-1) and B(-1) */ if (i % 2 == 0) { mpz_add (chksum_1, chksum_1, A[i]); mpz_add (chksum0, chksum0, B[i]); /* chksum0 used temporarily here */ } else { mpz_sub (chksum_1, chksum_1, A[i]); mpz_sub (chksum0, chksum0, B[i]); } } if (parameter == MONIC) { mpz_add_ui (chksum1, chksum1, 1); mpz_add_ui (gt, gt, 1); mpz_add_ui (chksum_1, chksum_1, 1); mpz_add_ui (chksum0, chksum0, 1); } mpz_mul (gt, gt, chksum1); F_mod_gt (chksum1, n); mpz_mul (gt, chksum0, chksum_1); F_mod_gt (chksum_1, n); /* Compute A(0) * B(0) */ mpz_mul (gt, A[0], B[0]); F_mod_gt (chksum0, n); /* Compute A(inf) * B(inf) */ mpz_mul (gt, A[len - 1], B[len - 1]); F_mod_gt (chksuminf, n); if (parameter == MONIC) { mpz_add (chksuminf, chksuminf, A[len - 2]); mpz_add (chksuminf, chksuminf, B[len - 2]); } r += 4; #endif /* CHECKSUM */ /* Don't do FFT if len =< 4 (Karatsuba or Toom-Cook are faster) unless we do a transform without zero padding, or if transformlen > 4*n (no suitable primitive roots of 1) */ if ((len > 4 || parameter == NOPAD) && transformlen <= 4 * n) { unsigned int len2; /* len2 = log_2(transformlen). Assumes transformlen > 0 */ for (i = transformlen, len2 = 0; (i&1) == 0; i >>= 1, len2++); if (i != 1) { outputf (OUTPUT_ERROR, "F_mul: polynomial length must be power of 2, " "but is %d\n", len); return UINT_MAX; } /* Are we performing a squaring or multiplication? */ if (A != B) { /* So it's a multiplication */ /* Put transform of B into t */ for (i = 0; i < len; i++) mpz_set (t[i], B[i]); if (parameter == MONIC) mpz_set_ui (t[i++], 1); for (; i < transformlen; i++) mpz_set_ui (t[i], 0); F_fft_dif (t, transformlen, 0, n); } else t = R; /* Do squaring */ /* Put A into R */ for (i = 0; i < len; i++) mpz_set (R[i], A[i]); if (parameter == MONIC) mpz_set_ui (R[i++], 1); /* May overwrite B[0] */ for (; i < transformlen; i++) mpz_set_ui (R[i], 0); /* May overwrite B[i - len] */ F_fft_dif (R, transformlen, 0, n); for (i = 0; i < transformlen; i++) { F_mulmod (R[i], R[i], t[i], n); /* Do the div-by-length. Transform length was transformlen, len2 = log_2 (transformlen), so divide by 2^(len2) = sqrt(2)^(2*len2) */ F_mul_sqrt2exp (R[i], R[i], - 2 * len2, n); } r += transformlen; F_fft_dit (R, transformlen, 0, n); if (parameter == MONIC) mpz_sub_ui (R[0], R[0], 1); } else { /* Karatsuba or Toom-Cook split */ if (parameter == NOPAD) { outputf (OUTPUT_ERROR, "F_mul: cyclic/short products not supported " "by Karatsuba/Toom-Cook\n"); return UINT_MAX; } if (len / n == 4 || len == 2) r += F_karatsuba (R, A, B, len, n, t); else r += F_toomcook4 (R, A, B, len, n, t); if (parameter == MONIC) /* Handle the leading monomial the hard way */ { /* This only works if A, B and R do not overlap */ if (A == R || B == R + len) { outputf (OUTPUT_ERROR, "F_mul: monic polynomials with Karatsuba/" "Toom-Cook and overlapping input/output not supported\n"); return UINT_MAX; } for (i = 0; i < len; i++) { mpz_add (R[i + len], R[i + len], A[i]); mpz_add (R[i + len], R[i + len], B[i]); F_mod_1 (R[i + len], n); } } } #ifdef DEBUG if (parameter != MONIC && parameter != NOPAD) { F_mod_1 (R[transformlen - 1], n); if (mpz_sgn (R[transformlen - 1]) != 0) outputf (OUTPUT_ALWAYS, "F_mul, len %d: R[%d] == %Zd != 0\n", len, transformlen - 1, R[transformlen - 1]); } #endif #ifdef CHECKSUM /* Compute R(1) = (A*B)(1) and subtract from chksum1 */ for (i = 0; i < transformlen; i++) mpz_sub (chksum1, chksum1, R[i]); if (parameter == MONIC) mpz_sub_ui (chksum1, chksum1, 1); while (mpz_sizeinbase (chksum1, 2) > n) F_mod_1 (chksum1, n); if (mpz_sgn (chksum1) != 0) outputf (OUTPUT_ALWAYS, "F_mul, len %d: A(1)*B(1) != R(1), difference %Zd\n", len, chksum1); /* Compute R(-1) = (A*B)(-1) and subtract from chksum_1 */ for (i = 0; i < transformlen; i++) if (i % 2 == 0) mpz_sub (chksum_1, chksum_1, R[i]); else mpz_add (chksum_1, chksum_1, R[i]); if (parameter == MONIC) mpz_sub_ui (chksum_1, chksum_1, 1); while (mpz_sizeinbase (chksum_1, 2) > n) F_mod_1 (chksum_1, n); if (mpz_sgn (chksum_1) != 0) outputf (OUTPUT_ALWAYS, "F_mul, len %d: A(-1)*B(-1) != R(-1), difference %Zd\n", len, chksum_1); if (parameter != NOPAD) { mpz_sub (chksum0, chksum0, R[0]); while (mpz_sizeinbase (chksum0, 2) > n) F_mod_1 (chksum0, n); if (mpz_sgn (chksum0) != 0) outputf (OUTPUT_ALWAYS, "F_mul, len %d: A(0)*B(0) != R(0), difference %Zd\n", len, chksum0); mpz_sub (chksuminf, chksuminf, R[transformlen - 2]); while (mpz_sizeinbase (chksuminf, 2) > n) F_mod_1 (chksuminf, n); if (mpz_sgn (chksuminf) != 0) outputf (OUTPUT_ALWAYS, "F_mul, len %d: A(inf)*B(inf) != R(inf), difference %Zd\n", len, chksuminf); } mpz_clear (chksum1); mpz_clear (chksum_1); mpz_clear (chksum0); mpz_clear (chksuminf); #endif /* CHECKSUM */ return r; } /* Transposed multiply of two polynomials with coefficients modulo 2^(2^m)+1. lenB is the length of polynomial B and must be a power of 2, lenA is the length of polynomial A and must be lenB / 2 or lenB / 2 + 1. n=2^m t must have space for 2*lenB coefficients Only the product coefficients [lenA - 1 ... lenA + lenB/2 - 2] will go into R[0 ... lenB / 2 - 1] Return value: number of multiplies performed, UINT_MAX in error case. */ unsigned int F_mul_trans (mpz_t *R, mpz_t *A, mpz_t *B, unsigned int lenA, unsigned int lenB, unsigned int n, mpz_t *t) { unsigned int i, r = 0, len2; /* Handle trivial cases */ if (lenB < 2) return 0; ASSERT(lenA == lenB / 2 || lenA == lenB / 2 + 1); if (!gt_inited) { mpz_init2 (gt, 2 * n); gt_inited = 1; } if (lenB == 2) { F_mulmod (R[0], A[0], B[0], n); return 1; } if (lenB <= 4 * n) { /* len2 = log_2(lenB) */ for (i = lenB, len2 = 0; i > 1 && (i&1) == 0; i >>= 1, len2++); if (i != 1) { outputf (OUTPUT_ERROR, "F_mul_trans: polynomial length must be power of 2, " "but is %d\n", lenB); return UINT_MAX; } /* Put transform of B into t */ for (i = 0; i < lenB; i++) mpz_set (t[i], B[i]); F_fft_dif (t, lenB, 0, n); /* Put transform of reversed A into t + lenB */ for (i = 0; i < lenA; i++) mpz_set (t[i + lenB], A[lenA - 1 - i]); for (i = lenA; i < lenB; i++) mpz_set_ui (t[i + lenB], 0); F_fft_dif (t + lenB, lenB, 0, n); for (i = 0; i < lenB; i++) { F_mulmod (t[i], t[i], t[i + lenB], n); /* Do the div-by-length. Transform length was len, so divide by 2^len2 = sqrt(2)^(2*len2) */ F_mul_sqrt2exp (t[i], t[i], - 2 * len2, n); } r += lenB; F_fft_dit (t, lenB, 0, n); for (i = 0; i < lenB / 2; i++) mpz_set (R[i], t[i + lenA - 1]); } else { /* Only Karatsuba, no Toom-Cook here */ unsigned int h = lenB / 4; const unsigned int lenA0 = h, lenA1 = lenA - h; outputf (OUTPUT_DEVVERBOSE, "schoen_strass.c: Transposed Karatsuba, " "lenA = %lu, lenB = %lu\n", lenA, lenB); /* A = a1 * x^h + a0 B = b3 * x^3h + b2 * x^2h + b1 * x^h + b0 mul^T(A, B) = mul^T(a0,b3) * x^4h + (mul^T(a1,b3) + mul^T(a0,b2)) * x^3h + (mul^T(a1,b2) + mul^T(a0,b1)) * x^2h + (mul^T(a1,b1) + mul^T(a0,b0)) * x + mul^T(a1,b0) We only want the x^h, x^2h and x^3h coefficients, mul^T(a1,b1) + mul^T(a0,b0) mul^T(a1,b2) + mul^T(a0,b1) mul^T(a1,b3) + mul^T(a0,b2) Specifically, we want R[i] = \sum_{j=0}^{lenA} A[j] * B[j+i], 0 <= i < 2h */ /* T */ for (i = 0; i < h; i++) mpz_add (t[i], A[i], A[i + h]); if (lenA1 == h + 1) mpz_set (t[h], A[2*h]); r = F_mul_trans (t, t, B + h, lenA1, 2 * h, n, t + lenA1); /* Uses t[h ... 5h-1] as temp */ /* U */ for (i = 0; i < 2 * h; i++) mpz_sub (t[i + h], B[i], B[h + i]); r += F_mul_trans (t + h, A, t + h, lenA0, 2 * h, n, t + 3 * h); /* Uses t[3h ... 7h-1] as temp */ for (i = 0; i < h; i++) mpz_add (R[i], t[i], t[i + h]); /* R[0 ... h-1] = t + r */ /* V */ for (i = 0; i < 2 * h; i++) mpz_sub (t[i + h], B[i + 2 * h], B[i + h]); r += F_mul_trans (t + h, A + h, t + h, lenA1, 2 * h, n, t + 3 * h); /* Uses t[3h ... 7h - 1] as temp */ for (i = 0; i < h; i++) mpz_add (R[i + h], t[i], t[i + h]); } return r; } void F_clear () { if (gt_inited) mpz_clear (gt); gt_inited = 0; } ecm-6.4.4/ecm-params.h.pentium-m0000644023561000001540000000117512106741273013332 00000000000000/* those parameters were obtained on toto.loria.fr with ecm-6.3-rc3 gmp-5.0.1, and gcc 4.0.2 -m32 -O2 -pedantic -fomit-frame-pointer -mtune=pentium3 -march=pentium3 */ #define MPZMOD_THRESHOLD 98 #define REDC_THRESHOLD 398 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 10, 1, 1, 12, 12, 1, 14, 12, 13, 1, 15, 16, 15, 16, 19, 20, 22} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 17 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 1024 #define PREREVERTDIVISION_NTT_THRESHOLD 16 #define POLYINVERT_NTT_THRESHOLD 256 #define POLYEVALT_NTT_THRESHOLD 512 #define MPZSPV_NORMALISE_STRIDE 1024 ecm-6.4.4/candi.c0000644023561000001540000002030112106741273010434 00000000000000/* Encapsulated candidate. This candidate should have been a C++ class, but since we are using straight C for this project, I guess I can deal with it. Copyright 2003, 2004, 2005, 2006 Jim Fougeron, Paul Zimmermann. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include "ecm-ecm.h" #define VALID_MAGIC 0x24837BF5 #define DEAD_MAGIC 0xDEADBEEF #if defined (CANDI_DEBUG) static void Candi_Validate (const char *FunctionStr, const mpcandi_t *n) { int abrt = 0; if (!FunctionStr) { fprintf (stderr, "ERROR, UNKNOWN FUNCTION, can NOT continue checks!\n"); exit(-1); } if (!n) { abrt = fprintf (stderr, "ERROR, %s() *n was NULL, can NOT continue checks!\n", FunctionStr); exit(-1); } if (n->magic != VALID_MAGIC) abrt = fprintf (stderr, "ERROR, %s() VALID_MAGIC not valid\n", FunctionStr); if (n->cpExpr && n->nexprlen != strlen(n->cpExpr)) abrt = fprintf (stderr, "ERROR, %s() Invalid cpExpr length\n", FunctionStr); if (n->ndigits != nb_digits(n->n)) abrt = fprintf (stderr, "ERROR, %s() Invalid n->ndigits length\n", FunctionStr); if (abrt) exit(-1); } #endif void mpcandi_t_init (mpcandi_t *n) { n->cpExpr = NULL; n->nexprlen = 0; n->ndigits = 1; mpz_init_set_ui (n->n, 1); n->isPrp = 0; #if defined (CANDI_DEBUG) n->magic = VALID_MAGIC; Candi_Validate ("mpcandi_t_init", n); #endif } void mpcandi_t_free (mpcandi_t *n) { #if defined (CANDI_DEBUG) Candi_Validate("mpcandi_t_free", n); #endif if (n->cpExpr) free (n->cpExpr); n->cpExpr = NULL; n->nexprlen = 0; n->ndigits = 0; mpz_clear (n->n); n->isPrp = 1; /* "default" to prp, so that if the candidate does not get filled in, it will not be tested */ #if defined (CANDI_DEBUG) n->magic = DEAD_MAGIC; #endif } /* performs a safe "deep" copy */ int mpcandi_t_copy (mpcandi_t *to, mpcandi_t *from) { #if defined (CANDI_DEBUG) Candi_Validate("Pre mpcandi_t_copy", to); Candi_Validate("Pre mpcandi_t_copy", from); #endif if (to == from) return 1; if (to->cpExpr) free(to->cpExpr); to->cpExpr = NULL; if (from->cpExpr) { to->cpExpr = (char *) malloc(from->nexprlen+1); if (to->cpExpr == NULL) { fprintf (stderr, "Error: not enough memory\n"); exit (EXIT_FAILURE); } strcpy(to->cpExpr, from->cpExpr); } to->nexprlen = from->nexprlen; mpz_set(to->n, from->n); to->isPrp = from->isPrp; to->ndigits = from->ndigits; #if defined (CANDI_DEBUG) Candi_Validate("Post mpcandi_t_copy", to); Candi_Validate("Post mpcandi_t_copy", from); #endif return 1; } int mpcandi_t_add_candidate (mpcandi_t *n, mpz_t c, const char *cpExpr, int primetest) { #if defined (CANDI_DEBUG) Candi_Validate("Pre mpcandi_t_add_candidate", n); #endif if (n->cpExpr) free (n->cpExpr); n->cpExpr = NULL; if (cpExpr) { n->nexprlen = strlen (cpExpr); n->cpExpr = (char *) malloc (n->nexprlen + 1); if (n->cpExpr == NULL) { fprintf (stderr, "Error: not enough memory\n"); exit (EXIT_FAILURE); } strcpy (n->cpExpr, cpExpr); } mpz_set (n->n, c); if (primetest) n->isPrp = probab_prime_p (c, PROBAB_PRIME_TESTS); else n->isPrp = 0; /* there is a candidate there now, and the user did not tell us to prp it, so assume it is composite */ n->ndigits = nb_digits (c); #if defined (CANDI_DEBUG) Candi_Validate("Post mpcandi_t_add_candidate", n); #endif return 1; } int mpcandi_t_addfoundfactor_d (mpcandi_t *n, double f) { #if defined (CANDI_DEBUG) Candi_Validate("Pre mpcandi_t_addfoundfactor_d", n); #endif int ret; mpz_t t; mpz_init_set_d(t,f); /* do not display a warning if this factor does not divide the remaining cofactor. This function is called repeatedly (until it fails) to remove all traces of the prime factor. It is highly likely that these smaller factors will be non square-free within the candidate when starting. A return of zero is exprected by the calling trial divider, as that tells it that all residue of the factor has been eliminated */ ret = mpcandi_t_addfoundfactor (n, t, 0); mpz_clear (t); #if defined (CANDI_DEBUG) Candi_Validate("Post mpcandi_t_addfoundfactor_d", n); #endif return ret; } int mpcandi_t_addfoundfactor (mpcandi_t *n, mpz_t f, int displaywarning) { #if defined (CANDI_DEBUG) Candi_Validate("Pre mpcandi_t_addfoundfactor_d", n); #endif char *cp, *cp1; if (!mpz_divisible_p (n->n, f)) { /* ERROR was not a factor NOTE however, that this is "valid" for the ui() function to call. When trial dividing, it is VERY frequent to be divisible by 2^3, and we try to remove factors UNTIL */ if (displaywarning) gmp_fprintf (stderr, "ECM logic ERROR. Trying to remove a " "non-factor %Zd\n", f); #if defined (CANDI_DEBUG) Candi_Validate("Post (no factor removed) mpcandi_t_addfoundfactor_d", n); #endif return 0; } /* remove f from n->n */ mpz_divexact (n->n, n->n, f); n->ndigits = nb_digits (n->n); n->isPrp = probab_prime_p (n->n, PROBAB_PRIME_TESTS); if (n->cpExpr != NULL) { /* If there is an expression, then lets preserve it */ cp1 = mpz_get_str (NULL, 10, f); cp = (char *) malloc(n->nexprlen+1 + 3 + strlen(cp1)); /* +1 for null, +3 for ()/ */ if (cp == NULL) { fprintf (stderr, "Error: not enough memory\n"); exit (EXIT_FAILURE); } sprintf (cp, "(%s)/%s", n->cpExpr, cp1); free(n->cpExpr); n->cpExpr = cp; n->nexprlen += (3+strlen(cp1)); FREE (cp1, strlen (cp1) + 1); } #if defined (CANDI_DEBUG) Candi_Validate("Post (removed factor) mpcandi_t_addfoundfactor_d", n); #endif return 1; } /********************************************************************** Group order candidate functions. These wrap the logic for the -go command line switch which allows the user to "insert" the proper group order. **********************************************************************/ void mpgocandi_t_init (mpgocandi_t *go) { go->cpOrigExpr = NULL; mpcandi_t_init (&(go->Candi)); go->containsN = 0; go->Valid = 0; } void mpgocandi_t_free (mpgocandi_t *go) { if (go->cpOrigExpr) free (go->cpOrigExpr); mpcandi_t_free (&(go->Candi)); go->Valid = 0; } int mpgocandi_fixup_with_N (mpgocandi_t *go, mpcandi_t *n) { int NumNs, len; char *cp, *cpo, *numbuf; if (go->Valid == 0) return 0; if (go->containsN == 0) return 1; /* a valid "normal" expression does not need updating */ cp = strchr (go->cpOrigExpr, 'N'); NumNs = 0; while (cp) { ++NumNs; cp = strchr (&cp[1], 'N'); } /* compute size of string needed, and add some safety buffer to it */ cp = go->cpOrigExpr; len = NumNs * mpz_sizeinbase (n->n, 10) + strlen (cp) + 100; numbuf = (char *) malloc(len); if (numbuf == NULL) { fprintf (stderr, "Error: not enough memory\n"); exit (EXIT_FAILURE); } cpo = numbuf; while (*cp) { if (*cp == 'N') cpo += gmp_sprintf (cpo, "%Zi", n->n); else *cpo++ = *cp; ++cp; } *cpo = 0; /* Null terminate the string correctly. */ if (eval_str (&(go->Candi), numbuf, 0, NULL)) go->Valid = 1; else { static int warned = 0; if (!warned) { warned = 1; fprintf(stderr, "Warning, invalid expression %s for the -go option\n", go->cpOrigExpr); } go->Valid = 0; /* it is not valid, so do not use it */ } free (numbuf); return go->Valid; } ecm-6.4.4/eval.c0000644023561000001540000004401112106741274010312 00000000000000/* Simple expression parser for GMP-ECM. Copyright 2003, 2004, 2005, 2006, 2007, 2008, 2012 Jim Fougeron, Paul Zimmermann and Alexander Kruppa. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include "ecm-ecm.h" #ifdef HAVE_STRINGS_H # include /* for strncasecmp */ #endif #ifdef HAVE_CTYPE_H # include #endif /***************************************************************** * Syntax for this VERY simple recursive expression parser: * * * * ( or [ or { along with ) or ] or } are valid for grouping * * Normal "simple" operators: + - * / (. can be used for *) * * Module: n%m 345%11 * * Unary minus is supported: -n -500 * * Exponentation: n^m 2^500 * * Simple factorial: n! 53! == 1*2*3*4...*52*53 * * Multi-factorial: n!m 15!3 == 15.12.9.6.3 * * Simple Primorial: n# 11# == 2*3*5*7*11 * * Reduced Primorial: n#m 17#5 == 5.7.11.13.17 * * * * Adding (working on these at least: * * Phi(x,n) * * * * NOTE Lines ending in a \ character are "joined" * * NOTE Lines starting with #are comments * * NOTE C++ // single line comments (rest of line is a comment) * * * ****************************************************************/ /* value only used by the expression parser */ static mpz_t t, mpOne; static char *expr_str; static void eval_power (mpz_t prior_n, mpz_t n,char op); static void eval_product (mpz_t prior_n, mpz_t n,char op); static void eval_sum (mpz_t prior_n, mpz_t n,char op); static int eval_Phi (mpz_t prior_n, mpz_t n, int ParamCnt); static int eval_2 (int bInFuncParams); #if 0 /* strncasecmp is a required function in configure.in */ #if defined (_MSC_VER) || defined (__MINGW32__) #define strncasecmp strnicmp #endif #endif /**************************************/ /* Main expression evalation function */ /* This is the function that the app */ /* calls to read the expression line */ /**************************************/ int eval (mpcandi_t *n, FILE *fd, int primetest) { int ret; int nMaxSize = 2000, nCurSize = 0; int c; char *expr = (char *) malloc (nMaxSize + 1); if (expr == NULL) { fprintf (stderr, "Error: not enough memory\n"); exit (EXIT_FAILURE); } /* Lines ending in '\\' are "joined" as a single longer line */ JoinLinesLoop:; c = fgetc (fd); if (c == '#') { ChompLine:; do c = fgetc (fd); while (c != EOF && !IS_NEWLINE(c)); if (IS_NEWLINE(c)) goto JoinLinesLoop; } while (c != EOF && !IS_NEWLINE(c) && c != ';') { if (c == '/') { /* This might be a C++ // comment or it might be a / division operator. Check it out, and if it is a comment, then "eat it" */ int peek_c = fgetc (fd); if (peek_c == '/') /* Got a C++ single line comment, so Chomp the line */ goto ChompLine; /* Put the char back on the file, then allow the code to add the '/' char to the buffer */ ungetc (peek_c, fd); } /* strip space and tabs out here, and then we DON'T have to mess with them in the rest of the parser */ if (!isspace (c)) expr[nCurSize++] = (char) c; if (nCurSize == nMaxSize) { char *cp; nMaxSize += nMaxSize / 2; cp = (char *) realloc (expr, nMaxSize + 1); if (!cp) { free (expr); fprintf (stderr, "Severe warning!, out of core memory reading number!\n"); exit (EXIT_FAILURE); } expr = cp; } c = fgetc (fd); } expr[nCurSize] = 0; if (!nCurSize) ret = 0; else { if (expr[nCurSize-1] == '\\') { /* remove the '\\' char, and then process the next line */ expr[--nCurSize] = 0; goto JoinLinesLoop; } if (c == ';') ungetc (c, fd); mpz_init (t); expr_str = expr; ret = eval_2 (0); if (ret) { char *s; char *cpTmpExpr = expr; s = mpz_get_str (NULL, 10, t); if (!strcmp(s, cpTmpExpr)) cpTmpExpr = NULL; ret = mpcandi_t_add_candidate (n, t, cpTmpExpr, primetest); FREE (s, strlen (s) + 1); } mpz_clear(t); } free(expr); return ret; } int eval_str (mpcandi_t *n, char *cp, int primetest, char **EndChar) { int ret; int nMaxSize=2000, nCurSize=0; char *c; char *expr = (char *) malloc(nMaxSize+1); if (expr == NULL) { fprintf (stderr, "Error: not enough memory\n"); exit (EXIT_FAILURE); } /* Lines ending in '\\' are "joined" as a single longer line */ c = cp; JoinLinesLoop:; if (*c == '#') { do ++c; while (*c && !IS_NEWLINE(*c)); if (IS_NEWLINE(*c)) goto JoinLinesLoop; } while (*c && !IS_NEWLINE(*c) && *c != ';') { /* strip space and tabs out here, and then we DON'T have to mess with them in the rest of the parser */ if (!isspace((int) *c)) expr[nCurSize++] = *c; if (nCurSize == nMaxSize) { char *cp; nMaxSize += 5000; cp = (char *) realloc (expr, nMaxSize + 1); if (!cp) { free(expr); fprintf(stderr, "Severe warning!, out of core memory reading number!\n"); exit (EXIT_FAILURE); } expr = cp; } ++c; } expr[nCurSize] = 0; if (!nCurSize) ret = 0; else { if (expr[nCurSize-1] == '\\') { /* remove the '\\' char, and then process the next line */ expr[--nCurSize] = 0; goto JoinLinesLoop; } if (*c != ';') ++c; mpz_init(t); expr_str = expr; ret = eval_2(0); if (ret) { char *s; char *cpTmpExpr = expr; s = mpz_get_str (NULL, 10, t); if (!strcmp(s, cpTmpExpr)) cpTmpExpr = NULL; ret = mpcandi_t_add_candidate(n, t, cpTmpExpr, primetest); FREE (s, strlen (s) + 1); } mpz_clear(t); } free(expr); if (EndChar && *EndChar) *EndChar = c; return ret; } void eval_power (mpz_t prior_n, mpz_t n,char op) { #if defined (DEBUG_EVALUATOR) if ('#'==op || '^'==op || '!'==op || '@'==op || '$'==op) { fprintf (stderr, "eval_power "); mpz_out_str(stderr, 10, prior_n); fprintf (stderr, "%c", op); mpz_out_str(stderr, 10, n); fprintf (stderr, "\n"); } #endif if ('^'==op) mpz_pow_ui(n,prior_n,mpz_get_ui(n)); else if ('!'==op) /* simple factorial (syntax n! example: 7! == 1*2*3*4*5*6*7) */ mpz_fac_ui(n,mpz_get_ui(n)); else if ('@'==op) /* Multi factorial (syntax n!prior_n. example: 15!3 == 15*12*9*6*3) */ { long nCur; unsigned long nDecr; nCur = mpz_get_si(prior_n); nDecr = mpz_get_ui(n); mpz_set_ui(n,1); /*printf ("Multi-factorial %ld!%ld\n", nCur, nDecr);*/ while (nCur > 1) { /* This could be done much more efficiently (bunching mults using smaller "built-ins"), but I am not going to bother for now */ mpz_mul_ui(n,n,nCur); nCur -= nDecr; } } else if ('#'==op) /* simple primorial (syntax n# example: 11# == 2*3*5*7*11 */ { long nMax; double p; nMax = mpz_get_si(n); mpz_set_ui(n,1); getprime_clear (); /* free the prime tables, and reinitialize */ for (p = 2.0; p <= nMax; p = getprime ()) /* This could be done much more efficiently (bunching mults using smaller "built-ins"), but I am not going to bother for now */ mpz_mul_ui(n,n,(unsigned)p); } else if ('$'==op) /* reduced primorial (syntax n#prior_n example: 13#5 == (5*7*11*13) */ { double p; long nMax; unsigned long nStart; nMax = mpz_get_si(prior_n); nStart = mpz_get_ui(n); mpz_set_ui(n,1); getprime_clear (); /* free the prime tables, and reinitialize */ p = getprime (nStart); /*printf ("Reduced-primorial %ld#%ld\n", nMax, nStart);*/ for (; p <= nMax; p = getprime (p)) { /* Unfortunately, the SoE within GMP-ECM does not always start correctly, so we have to skip the low end stuff by hand */ if (p >= nStart) /* This could be done much more efficiently (bunching mults using smaller "built-ins"), but I am not going to bother for now */ mpz_mul_ui(n,n,(unsigned)p); } } } void eval_product (mpz_t prior_n, mpz_t n, char op) { #if defined (DEBUG_EVALUATOR) if ('*'==op || '.'==op || '/'==op || '%'==op) { fprintf (stderr, "eval_product "); mpz_out_str(stderr, 10, prior_n); fprintf (stderr, "%c", op); mpz_out_str(stderr, 10, n); fprintf (stderr, "\n"); } #endif if ('*' == op || '.' == op) mpz_mul (n, prior_n, n); else if ('/' == op) { mpz_t r; mpz_init (r); mpz_tdiv_qr (n, r, prior_n, n); if (mpz_cmp_ui (r, 0) != 0) { fprintf (stderr, "Parsing Error: inexact division\n"); exit (EXIT_FAILURE); } mpz_clear (r); } else if ('%' == op) mpz_tdiv_r (n, prior_n, n); } void eval_sum (mpz_t prior_n, mpz_t n,char op) { #if defined (DEBUG_EVALUATOR) if ('+'==op || '-'==op) { fprintf (stderr, "eval_sum "); mpz_out_str(stderr, 10, prior_n); fprintf (stderr, "%c", op); mpz_out_str(stderr, 10, n); fprintf (stderr, "\n"); } #endif if ('+' == op) mpz_add(n,prior_n,n); else if ('-' == op) mpz_sub(n,prior_n,n); } int eval_Phi (mpz_t b, mpz_t n, int ParamCnt) { int factors[200]; unsigned dwFactors=0, dw; int B; double p; mpz_t D, T, org_n; if (ParamCnt == 0) { fprintf (stderr, "\nParsing Error - the Phi function (in ECM) requires 2 parameters\n"); return 0; } if (mpz_cmp_ui(n, 1) == 0) { /* return value is 1 if b is composite, or b if b is prime */ int isPrime = mpz_probab_prime_p (b, PROBAB_PRIME_TESTS); if (isPrime) mpz_set(n, b); else mpz_set(n, mpOne); return 1; } if (mpz_cmp_si(n, -1) == 0) { /* this is actually INVALID, but it is easier to simply */ fprintf (stderr, "\nParsing Error - Invalid parameter passed to the Phi function\n"); return 0; } /* OK parse the Phi out now */ if (mpz_cmp_ui(b, 0) == 0) { /* this is valid, but return that it is NOT */ mpz_set(n, mpOne); return 0; } if (mpz_cmp_ui(b, 1) == 0) { if (mpz_cmp_ui(n, 1) != 0) mpz_sub_ui(n, n, 1); return 1; } /* Ok, do the real h_primative work, since we are not one of the trivial case */ B = mpz_get_si(b); if (mpz_cmp_ui(b, B)) { fprintf (stderr, "\nParsing Error - Invalid parameter passed to the Phi function (first param B too high)\n"); return 0; } /* Obtain the factors of B */ getprime_clear (); /* free the prime tables, and reinitialize */ for (p = 2.0; p <= B; p = getprime ()) { if (B % (int) p == 0) { /* Add the factor one time */ factors[dwFactors++] = (int) p; /* but be sure to totally remove it */ do { B /= (int) p; } while (B % (int) p == 0); } } B = mpz_get_si(b); mpz_init_set(org_n, n); mpz_set_ui(n, 1); mpz_init_set_ui(D, 1); mpz_init(T); for(dw=0;(dw<(1U<= n) return (ADD * n); d = n - r; e = 2 * r - n; c = DUP + ADD; /* initial duplicate and final addition */ while (d != e) { if (d < e) { r = d; d = e; e = r; } if (d - e <= e / 4 && ((d + e) % 3) == 0) { /* condition 1 */ d = (2 * d - e) / 3; e = (e - d) / 2; c += 3 * ADD; /* 3 additions */ } else if (d - e <= e / 4 && (d - e) % 6 == 0) { /* condition 2 */ d = (d - e) / 2; c += ADD + DUP; /* one addition, one duplicate */ } else if ((d + 3) / 4 <= e) { /* condition 3 */ d -= e; c += ADD; /* one addition */ } else if ((d + e) % 2 == 0) { /* condition 4 */ d = (d - e) / 2; c += ADD + DUP; /* one addition, one duplicate */ } /* now d+e is odd */ else if (d % 2 == 0) { /* condition 5 */ d /= 2; c += ADD + DUP; /* one addition, one duplicate */ } /* now d is odd and e even */ else if (d % 3 == 0) { /* condition 6 */ d = d / 3 - e; c += 3 * ADD + DUP; /* three additions, one duplicate */ } else if ((d + e) % 3 == 0) { /* condition 7 */ d = (d - 2 * e) / 3; c += 3 * ADD + DUP; /* three additions, one duplicate */ } else if ((d - e) % 3 == 0) { /* condition 8 */ d = (d - e) / 3; c += 3 * ADD + DUP; /* three additions, one duplicate */ } else /* necessarily e is even */ { /* condition 9 */ e /= 2; c += ADD + DUP; /* one addition, one duplicate */ } } return c; } #define NV 4 /* #define SWAP(x,y) { __mpz_struct *tmp = x; x = y; y = tmp; } */ #define SWAP mpres_swap /* computes V_k(P) from P=A and puts the result in P=A. Assumes k>2. Uses auxiliary variables t, B, C, T, T2. */ void pp1_mul_prac (mpres_t A, ecm_uint k, mpmod_t n, mpres_t t, mpres_t B, mpres_t C, mpres_t T, mpres_t T2) { ecm_uint d, e, r, i = 0; static double val[NV] = {0.61803398874989485, 0.5801787282954641, 0.6179144065288179 , 0.6180796684698958}; /* 1/GR, 5/(GR+7) (2), 1429/(GR+2311) (8), 3739/(6051-GR) (9) */ /* chooses the best value of v */ for (d = 0, r = ADD * k; d < NV; d++) { e = lucas_cost_pp1 (k, val[d]); if (e < r) { r = e; i = d; } } d = k; r = (ecm_uint) ((double) d * val[i] + 0.5); /* first iteration always begins by Condition 3, then a swap */ d = k - r; e = 2 * r - k; mpres_set (B, A, n); /* B=A */ mpres_set (C, A, n); /* C=A */ pp1_duplicate (A, A, n); /* A = 2*A */ while (d != e) { if (d < e) { r = d; d = e; e = r; mpres_swap (A, B, n); } /* do the first line of Table 4 whose condition qualifies */ if (d - e <= e / 4 && ((d + e) % 3) == 0) { /* condition 1 */ d = (2 * d - e) / 3; e = (e - d) / 2; pp1_add3 (T, A, B, C, n, t); /* T = f(A,B,C) */ pp1_add3 (T2, T, A, B, n, t); /* T2 = f(T,A,B) */ pp1_add3 (B, B, T, A, n, t); /* B = f(B,T,A) */ mpres_swap (A, T2, n); /* swap A and T2 */ } else if (d - e <= e / 4 && (d - e) % 6 == 0) { /* condition 2 */ d = (d - e) / 2; pp1_add3 (B, A, B, C, n, t); /* B = f(A,B,C) */ pp1_duplicate (A, A, n); /* A = 2*A */ } else if ((d + 3) / 4 <= e) /* <==> (d <= 4 * e) */ { /* condition 3 */ d -= e; pp1_add3 (C, B, A, C, n, t); /* C = f(B,A,C) */ SWAP (B, C, n); } else if ((d + e) % 2 == 0) { /* condition 4 */ d = (d - e) / 2; pp1_add3 (B, B, A, C, n, t); /* B = f(B,A,C) */ pp1_duplicate (A, A, n); /* A = 2*A */ } /* d+e is now odd */ else if (d % 2 == 0) { /* condition 5 */ d /= 2; pp1_add3 (C, C, A, B, n, t); /* C = f(C,A,B) */ pp1_duplicate (A, A, n); /* A = 2*A */ } /* d is odd, e even */ else if (d % 3 == 0) { /* condition 6 */ d = d / 3 - e; pp1_duplicate (T, A, n); /* T = 2*A */ pp1_add3 (T2, A, B, C, n, t); /* T2 = f(A,B,C) */ pp1_add3 (A, T, A, A, n, t); /* A = f(T,A,A) */ pp1_add3 (C, T, T2, C, n, t); /* C = f(T,T2,C) */ SWAP (B, C, n); } else if ((d + e) % 3 == 0) /* d+e <= val[i]*k < k < 2^32 */ { /* condition 7 */ d = (d - 2 * e) / 3; pp1_add3 (T, A, B, C, n, t); /* T1 = f(A,B,C) */ pp1_add3 (B, T, A, B, n, t); /* B = f(T1,A,B) */ pp1_duplicate (T, A, n); pp1_add3 (A, A, T, A, n, t); /* A = 3*A */ } else if ((d - e) % 3 == 0) { /* condition 8: never happens? */ d = (d - e) / 3; pp1_add3 (T, A, B, C, n, t); /* T1 = f(A,B,C) */ pp1_add3 (C, C, A, B, n, t); /* C = f(A,C,B) */ SWAP (B, T, n); /* swap B and T */ pp1_duplicate (T, A, n); pp1_add3 (A, A, T, A, n, t); /* A = 3*A */ } else /* necessarily e is even */ { /* condition 9: never happens? */ e /= 2; pp1_add3 (C, C, B, A, n, t); /* C = f(C,B,A) */ pp1_duplicate (B, B, n); /* B = 2*B */ } } pp1_add3 (A, A, B, C, n, t); ASSERT(d == 1); } ecm-6.4.4/ecm-params.h.alpha-ev60000644023561000001540000000071112106741273013175 00000000000000#define MPZMOD_THRESHOLD 235 #define REDC_THRESHOLD 424 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 11 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 11 #define MUL_NTT_THRESHOLD 2048 #define PREREVERTDIVISION_NTT_THRESHOLD 1024 #define POLYINVERT_NTT_THRESHOLD 2048 #define POLYEVALT_NTT_THRESHOLD 512 #define MPZSPV_NORMALISE_STRIDE 256 ecm-6.4.4/athlon/0000755023561000001540000000000012113421640010552 500000000000000ecm-6.4.4/athlon/autogen.py0000755023561000001540000001675712106741265012543 00000000000000#!/usr/bin/python import re import sys def offaddr(addr, offset): if offset == 0: return "("+addr+")" else: return str(offset)+"("+addr+")" # Generate asm for addmul1_k # src and dst are pointers (stored in regs) + offsets # multiplier is in a register # rax, rbx, rcx, rdx are free for use. def addmul1_k(src, off_src, dst, off_dst, mult, k): init = "### addmul1: src[0] is " + offaddr(src, off_src) + "\n" init = init + "### dst[0] is " + offaddr(dst, off_dst) + "\n" init = init + "### mult is " + mult + "\n" init = init + "### k is " + str(k) + "\n" init = init + "### kills %eax, %ebx, %ecx, %edx\n" init = init + "### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx\n" init = init + " movl " + offaddr(src, off_src) + ", %eax\n" init = init + " mull " + mult + "\n" init = init + " movl %eax, %ebx\n" init = init + " movl %edx, %ecx\n" init = init + " movl " + offaddr(src, off_src+4) + ", %eax\n" block = """ mull __mult__ addl __cylo__, __zi__ movl $0, __cylo__ adcl %eax, __cyhi__ movl __xi2__, %eax adcl %edx, __cylo__ """ code = init cylo = "%ebx" cyhi = "%ecx" for i in range(0,k-2): blocki = re.sub('__cylo__', cylo, block) blocki = re.sub('__cyhi__', cyhi, blocki) blocki = re.sub('__xi2__', offaddr(src, off_src+(i+2)*4), blocki) blocki = re.sub('__zi__', offaddr(dst, off_dst+i*4), blocki) blocki = re.sub('__mult__', mult, blocki) code = code + blocki tmp = cylo cylo = cyhi cyhi = tmp final = " mull " + mult + "\n" final = final + " addl " + cylo + ", " + offaddr(dst, off_dst+(k-2)*4) + "\n" final = final + " adcl " + cyhi + ", %eax\n" final = final + " adcl $0, %edx\n" final = final + " addl %eax, " + offaddr(dst, off_dst+4*(k-1)) + "\n" final = final + " adcl $0, %edx\n" final = final + "### carry limb is in %edx\n" code = code + final return code, "%edx" ### Try mmx/sse2 addmul_1, copying the one of GMP for Pentium4 def addmul1_k_var(src, off_src, dst, off_dst, mult, k): init = "### addmul1: src[0] is " + offaddr(src, off_src) + "\n" init = init + "### dst[0] is " + offaddr(dst, off_dst) + "\n" init = init + "### mult is " + mult + "\n" init = init + "### k is " + str(k) + "\n" init = init + "### kills %eax, %edx and mmx regs \n" init = init + "### dst[0,k[ += mult*src[0,k[ plus carry put in ecx\n" init = init + " pxor %mm0, %mm0\n" init = init + " movd " + mult + ", %mm7\n" block = """ movd __xi__, %mm1 movd __zi__, %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, __zi__ psrlq $32, %mm0 """ code = init for i in range(0,k): blocki = re.sub('__xi__', offaddr(src, off_src+i*4), block) blocki = re.sub('__zi__', offaddr(dst, off_dst+i*4), blocki) code = code + blocki final = " movd %mm0, %ecx\n" final = final + "### carry limb is in %ecx\n" code = code + final return code, "%ecx" def mulredc_k_rolled(k): header = """# mp_limb_t mulredc__k(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc__k TYPE(GSYM_PREFIX`'mulredc__k,`function') GSYM_PREFIX`'mulredc__k: """ init = re.sub("__k", str(k), header) INV_M = offaddr("%esp", 4*(2*k+1) + 40) ADDR_M = offaddr("%esp", 4*(2*k+1) + 36) ADDR_Y = offaddr("%esp", 4*(2*k+1) + 32) ADDR_X = offaddr("%esp", 4*(2*k+1) + 28) ADDR_Z = offaddr("%esp", 4*(2*k+1) + 24) init = init + """ pushl %ebp pushl %edi pushl %esi pushl %ebx """ init = init + " subl $" + str(4*(2*k+2)) + ", %esp\n" init = init + " movl %esp, %edi\n" init = init + "### set tmp[0..2k+1[ to 0\n" for i in range(0,2*k+1): init = init + " movl $0, " + offaddr("%edi", 4*i) + "\n" code = init middle_code = "###########################################\n" middle_code = middle_code + " movl $" + str(k) + ", " + offaddr("%esp", 4*(2*k+1)) + "\n" middle_code = middle_code + """ .align 32 Loop: ## compute u and store in %ebp """ middle_code = middle_code + " movl " + ADDR_X + ", %eax\n" middle_code = middle_code + " movl " + ADDR_Y + ", %esi\n" middle_code = middle_code + """ movl (%eax), %eax mull (%esi) addl (%edi), %eax """ middle_code = middle_code + " mull " + INV_M + "\n" middle_code = middle_code + " movl %eax, %ebp\n" middle_code = middle_code + " movl " + ADDR_M + ", %esi\n" codeaddmul, carry = addmul1_k("%esi", 0, "%edi", 0, "%ebp", k) middle_code = middle_code + codeaddmul middle_code = middle_code + " addl " + carry + ", " + offaddr("%edi", 4*k) + "\n" middle_code = middle_code + " adcl $0, " + offaddr("%edi", 4*(k+1)) + "\n" middle_code = middle_code + " movl " + ADDR_X + ", %eax\n" middle_code = middle_code + " movl (%eax), %ebp\n" middle_code = middle_code + " movl " + ADDR_Y + ", %esi\n" codeaddmul, carry = addmul1_k("%esi", 0, "%edi", 0, "%ebp", k) middle_code = middle_code + codeaddmul middle_code = middle_code + " addl " + carry + ", " + offaddr("%edi", 4*k) + "\n" middle_code = middle_code + " adcl $0, " + offaddr("%edi", 4*(k+1)) + "\n\n" middle_code = middle_code + " addl $4, " + ADDR_X + "\n addl $4, %edi\n" middle_code = middle_code + " decl " + offaddr("%esp", 4*(2*k+1)) + "\n jnz Loop\n" code = code + middle_code final = "###########################################\n" final = final + "### Copy result in z\n" final = final + " movl " + ADDR_Z + ", %ebx\n" for i in range(0,k): final = final + " movl " + offaddr("%edi", 4*i) + ", %eax\n" final = final + " movl %eax, " + offaddr("%ebx", 4*i) + "\n" final = final + " movl " + offaddr("%edi", 4*k) + ", %eax # carry\n" final = final + " addl $" + str(4*(2*k+2)) + ", %esp\n" final = final + " popl %ebx\n" final = final + " popl %esi\n" final = final + " popl %edi\n" final = final + " popl %ebp\n" # final = final + " emms\n" final = final + " ret\n" code = code + final return code k = int(sys.argv[1]) if k == 1: print """# # mp_limb_t mulredc1(mp_limb_t *z, const mp_limb_t x, const mp_limb_t y, # const mp_limb_t m, mp_limb_t inv_m) # # Compute z := x*y mod m, in Montgomery representation, where x, y < m # and m is n limb wide. inv_m is the less significant limb of the # inverse of m modulo 2^(n*GMP_LIMB_BITS) # # The result might be unreduced (larger than m) but becomes reduced # after subtracting m. The calling function should take care of that. # # We use a temporary space for unreduced product on the stack. # Therefore, this can not be used for large integers (anyway, the # algorithm is quadratic). # # WARNING: z is only n limbs but since it might be unreduced, there # could be a carry that does not fit in z. This carry is returned. include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc1 TYPE(GSYM_PREFIX`'mulredc1,`function') GSYM_PREFIX`'mulredc1: # Stack: # inv_m 20(%esp) # m 16 # y 12(%esp) # x 8 # z 4(%esp) movl 12(%esp), %eax mull 8(%esp) movl %edx, 12(%esp) movl %eax, 8(%esp) # store xy in [8(%esp):12(%esp)] mull 20(%esp) # compute u mull 16(%esp) # compute u*m addl 8(%esp), %eax # eax is 0, now (carry is important) adcl 12(%esp), %edx movl 4(%esp), %ecx movl %edx, (%ecx) adcl $0, %eax ret """ else: print mulredc_k_rolled(k) ecm-6.4.4/athlon/mulredc3.asm0000644023561000001540000000507212106741265012730 00000000000000# mp_limb_t mulredc3(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc3 TYPE(GSYM_PREFIX`'mulredc3,`function') GSYM_PREFIX`'mulredc3: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $32, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) ########################################### movl $3, 28(%esp) .align 32 Loop: ## compute u and store in %ebp movl 56(%esp), %eax movl 60(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 68(%esp) movl %eax, %ebp movl 64(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 3 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 8(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 12(%edi) adcl $0, 16(%edi) movl 56(%esp), %eax movl (%eax), %ebp movl 60(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 3 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 8(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 12(%edi) adcl $0, 16(%edi) addl $4, 56(%esp) addl $4, %edi decl 28(%esp) jnz Loop ########################################### ### Copy result in z movl 52(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax # carry addl $32, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/mulredc.h0000644023561000001540000000462512106741265012317 00000000000000#ifndef __ASM_REDC_H__ #define __ASM_REDC_H__ #include /* Signals that we have assembly code for variable size redc */ #define HAVE_ASM_REDC3 extern void ecm_redc3(mp_limb_t *, const mp_limb_t *, mp_size_t, mp_limb_t); /* WARNING: the size-1 version doesn't take pointers in input */ extern mp_limb_t mulredc1(mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t); extern mp_limb_t mulredc2(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc3(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc4(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc5(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc6(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc7(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc8(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc9(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc10(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc11(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc12(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc13(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc14(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc15(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc16(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc17(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc18(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc19(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc20(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); #endif ecm-6.4.4/athlon/mulredc14.asm0000644023561000001540000001323312106741265013010 00000000000000# mp_limb_t mulredc14(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc14 TYPE(GSYM_PREFIX`'mulredc14,`function') GSYM_PREFIX`'mulredc14: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $120, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) movl $0, 108(%edi) movl $0, 112(%edi) ########################################### movl $14, 116(%esp) .align 32 Loop: ## compute u and store in %ebp movl 144(%esp), %eax movl 148(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 156(%esp) movl %eax, %ebp movl 152(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 14 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) movl $0, %ecx adcl %eax, %ebx movl 52(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 48(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 52(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 56(%edi) adcl $0, 60(%edi) movl 144(%esp), %eax movl (%eax), %ebp movl 148(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 14 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) movl $0, %ecx adcl %eax, %ebx movl 52(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 48(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 52(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 56(%edi) adcl $0, 60(%edi) addl $4, 144(%esp) addl $4, %edi decl 116(%esp) jnz Loop ########################################### ### Copy result in z movl 140(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax movl %eax, 52(%ebx) movl 56(%edi), %eax # carry addl $120, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/mulredc5.asm0000644023561000001540000000616612106741265012737 00000000000000# mp_limb_t mulredc5(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc5 TYPE(GSYM_PREFIX`'mulredc5,`function') GSYM_PREFIX`'mulredc5: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $48, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) ########################################### movl $5, 44(%esp) .align 32 Loop: ## compute u and store in %ebp movl 72(%esp), %eax movl 76(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 84(%esp) movl %eax, %ebp movl 80(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 5 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 16(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 20(%edi) adcl $0, 24(%edi) movl 72(%esp), %eax movl (%eax), %ebp movl 76(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 5 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 16(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 20(%edi) adcl $0, 24(%edi) addl $4, 72(%esp) addl $4, %edi decl 44(%esp) jnz Loop ########################################### ### Copy result in z movl 68(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax # carry addl $48, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/Makefile.dev0000644023561000001540000000160412106741265012722 00000000000000.PHONY: all all: test_mulredc bench CFLAGS:=-g -O2 -funroll-loops ALLMULRED:= mulredc1.o mulredc2.o mulredc3.o mulredc4.o mulredc5.o\ mulredc6.o mulredc7.o mulredc8.o mulredc9.o mulredc10.o\ mulredc11.o mulredc12.o mulredc13.o mulredc14.o\ mulredc15.o mulredc16.o mulredc17.o mulredc18.o\ mulredc19.o mulredc20.o redc.s: redc.asm m4 redc.asm > redc.s redc.o: redc.s gcc -c $(CFLAGS) redc.s -o redc.o mulredc%.o: mulredc%.asm m4 $< > tmp-mulred.s gcc -c $(CFLAGS) tmp-mulred.s -o $@ rm tmp-mulred.s mulredc%.asm: ./autogen.py ./autogen.py $* > $@ test_mulredc: test_mulredc.c redc.o $(ALLMULRED) gcc -o test_mulredc $(CFLAGS) test_mulredc.c $(ALLMULRED) redc.o -lgmp bench: bench.c redc.o $(ALLMULRED) gcc -o bench $(CFLAGS) bench.c $(ALLMULRED) redc.o -lgmp clean: rm redc.s *.o mulredc[0-9]*.s mulredc[0-9]*.asm test_mulredc ecm-6.4.4/athlon/generate_all0000755023561000001540000000016312106741265013054 00000000000000#!/bin/sh for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do ./autogen.py $i > mulredc$i.asm done ecm-6.4.4/athlon/mulredc6.asm0000644023561000001540000000662412106741265012737 00000000000000# mp_limb_t mulredc6(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc6 TYPE(GSYM_PREFIX`'mulredc6,`function') GSYM_PREFIX`'mulredc6: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $56, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) ########################################### movl $6, 52(%esp) .align 32 Loop: ## compute u and store in %ebp movl 80(%esp), %eax movl 84(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 92(%esp) movl %eax, %ebp movl 88(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 6 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 20(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 24(%edi) adcl $0, 28(%edi) movl 80(%esp), %eax movl (%eax), %ebp movl 84(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 6 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 20(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 24(%edi) adcl $0, 28(%edi) addl $4, 80(%esp) addl $4, %edi decl 52(%esp) jnz Loop ########################################### ### Copy result in z movl 76(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax # carry addl $56, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/mulredc2.asm0000644023561000001540000000443712106741265012733 00000000000000# mp_limb_t mulredc2(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc2 TYPE(GSYM_PREFIX`'mulredc2,`function') GSYM_PREFIX`'mulredc2: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $24, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) ########################################### movl $2, 20(%esp) .align 32 Loop: ## compute u and store in %ebp movl 48(%esp), %eax movl 52(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 60(%esp) movl %eax, %ebp movl 56(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 2 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 4(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 8(%edi) adcl $0, 12(%edi) movl 48(%esp), %eax movl (%eax), %ebp movl 52(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 2 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 4(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 8(%edi) adcl $0, 12(%edi) addl $4, 48(%esp) addl $4, %edi decl 20(%esp) jnz Loop ########################################### ### Copy result in z movl 44(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax # carry addl $24, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/mulredc20.asm0000644023561000001540000001653312106741265013013 00000000000000# mp_limb_t mulredc20(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc20 TYPE(GSYM_PREFIX`'mulredc20,`function') GSYM_PREFIX`'mulredc20: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $168, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) movl $0, 108(%edi) movl $0, 112(%edi) movl $0, 116(%edi) movl $0, 120(%edi) movl $0, 124(%edi) movl $0, 128(%edi) movl $0, 132(%edi) movl $0, 136(%edi) movl $0, 140(%edi) movl $0, 144(%edi) movl $0, 148(%edi) movl $0, 152(%edi) movl $0, 156(%edi) movl $0, 160(%edi) ########################################### movl $20, 164(%esp) .align 32 Loop: ## compute u and store in %ebp movl 192(%esp), %eax movl 196(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 204(%esp) movl %eax, %ebp movl 200(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 20 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) movl $0, %ecx adcl %eax, %ebx movl 52(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 48(%edi) movl $0, %ebx adcl %eax, %ecx movl 56(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 52(%edi) movl $0, %ecx adcl %eax, %ebx movl 60(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 56(%edi) movl $0, %ebx adcl %eax, %ecx movl 64(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 60(%edi) movl $0, %ecx adcl %eax, %ebx movl 68(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 64(%edi) movl $0, %ebx adcl %eax, %ecx movl 72(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 68(%edi) movl $0, %ecx adcl %eax, %ebx movl 76(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 72(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 76(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 80(%edi) adcl $0, 84(%edi) movl 192(%esp), %eax movl (%eax), %ebp movl 196(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 20 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) movl $0, %ecx adcl %eax, %ebx movl 52(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 48(%edi) movl $0, %ebx adcl %eax, %ecx movl 56(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 52(%edi) movl $0, %ecx adcl %eax, %ebx movl 60(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 56(%edi) movl $0, %ebx adcl %eax, %ecx movl 64(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 60(%edi) movl $0, %ecx adcl %eax, %ebx movl 68(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 64(%edi) movl $0, %ebx adcl %eax, %ecx movl 72(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 68(%edi) movl $0, %ecx adcl %eax, %ebx movl 76(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 72(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 76(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 80(%edi) adcl $0, 84(%edi) addl $4, 192(%esp) addl $4, %edi decl 164(%esp) jnz Loop ########################################### ### Copy result in z movl 188(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax movl %eax, 52(%ebx) movl 56(%edi), %eax movl %eax, 56(%ebx) movl 60(%edi), %eax movl %eax, 60(%ebx) movl 64(%edi), %eax movl %eax, 64(%ebx) movl 68(%edi), %eax movl %eax, 68(%ebx) movl 72(%edi), %eax movl %eax, 72(%ebx) movl 76(%edi), %eax movl %eax, 76(%ebx) movl 80(%edi), %eax # carry addl $168, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/mulredc17.asm0000644023561000001540000001477312106741265013025 00000000000000# mp_limb_t mulredc17(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc17 TYPE(GSYM_PREFIX`'mulredc17,`function') GSYM_PREFIX`'mulredc17: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $144, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) movl $0, 108(%edi) movl $0, 112(%edi) movl $0, 116(%edi) movl $0, 120(%edi) movl $0, 124(%edi) movl $0, 128(%edi) movl $0, 132(%edi) movl $0, 136(%edi) ########################################### movl $17, 140(%esp) .align 32 Loop: ## compute u and store in %ebp movl 168(%esp), %eax movl 172(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 180(%esp) movl %eax, %ebp movl 176(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 17 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) movl $0, %ecx adcl %eax, %ebx movl 52(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 48(%edi) movl $0, %ebx adcl %eax, %ecx movl 56(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 52(%edi) movl $0, %ecx adcl %eax, %ebx movl 60(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 56(%edi) movl $0, %ebx adcl %eax, %ecx movl 64(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 60(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 64(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 68(%edi) adcl $0, 72(%edi) movl 168(%esp), %eax movl (%eax), %ebp movl 172(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 17 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) movl $0, %ecx adcl %eax, %ebx movl 52(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 48(%edi) movl $0, %ebx adcl %eax, %ecx movl 56(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 52(%edi) movl $0, %ecx adcl %eax, %ebx movl 60(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 56(%edi) movl $0, %ebx adcl %eax, %ecx movl 64(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 60(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 64(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 68(%edi) adcl $0, 72(%edi) addl $4, 168(%esp) addl $4, %edi decl 140(%esp) jnz Loop ########################################### ### Copy result in z movl 164(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax movl %eax, 52(%ebx) movl 56(%edi), %eax movl %eax, 56(%ebx) movl 60(%edi), %eax movl %eax, 60(%ebx) movl 64(%edi), %eax movl %eax, 64(%ebx) movl 68(%edi), %eax # carry addl $144, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/redc.asm0000644023561000001540000001600512106741265012125 00000000000000dnl Copyright 1999, 2000, 2001, 2002, 2005 Free Software Foundation, Inc. dnl dnl This file is a modified part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or dnl modify it under the terms of the GNU Lesser General Public License as dnl published by the Free Software Foundation; either version 2.1 of the dnl License, or (at your option) any later version. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, dnl but WITHOUT ANY WARRANTY; without even the implied warranty of dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU dnl Lesser General Public License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. include(`config.m4') TEXT GLOBL GSYM_PREFIX`'ecm_redc3 TYPE(GSYM_PREFIX`'ecm_redc3,`function') GSYM_PREFIX`'ecm_redc3: push %ebp # Push registers push %edi push %esi push %ebx subl $16, %esp # SF: 2 Cpt + Jump +1 movl 44(%esp), %ecx # Read size movl 36(%esp), %edi # Read Dest Ptr movl %ecx, (%esp) # Save counter cmpl $5, %ecx jae Unroll Loop: movl 48(%esp), %ebp # Read invm movl 40(%esp), %esi # Read Source Ptr imull (%edi), %ebp # Dest[0] * invm movl %edi, 36(%esp) # Save new Dest movl 44(%esp), %ecx # Read Size (2) xorl %ebx, %ebx # Initial Carry InnerLoop: # esi: Source # edi: Dest # ebp: Multiplier # ecx: Counter movl (%esi), %eax # U1 addl $4, %edi # V1 mull %ebp # U2 addl $4, %esi # V2 addl %ebx, %eax # U3 adcl $0, %edx # U4 addl %eax, -4(%edi) # V4 adcl $0, %edx # U5 decl %ecx # V5 movl %edx, %ebx # U6 jnz InnerLoop # V6 movl 36(%esp), %edi movl %ebx, (%edi) # Save final carry decl (%esp) lea 4(%edi), %edi # Advance Dest jnz Loop # Loop End: addl $16, %esp pop %ebx pop %esi pop %edi pop %ebp ret Unroll: # %ecx Read size // %edi Dest Ptr # Precalcul du saut movl %ecx, %edx decl %ecx subl $2, %edx negl %ecx shrl $4, %edx andl $15, %ecx movl %edx, 8(%esp) # Org Cpt of 4(%esp) movl %ecx, %edx shll $4, %edx negl %ecx leal UnrollEntry (%edx, %ecx,1), %edx movl %ecx, 44(%esp) # (-size)%16 movl %edx, 12(%esp) # Org PC inside UnrollLoop: movl 48(%esp), %ebp # Read invm movl 40(%esp), %esi # Read Source Ptr imull (%edi), %ebp # Dest[0] * invm movl %edi, 36(%esp) # Save new Dest movl 44(%esp), %ecx # Read Size %16 movl 8(%esp), %edx # Read InnerLoop Cpt movl %edx, 4(%esp) # Set InnerLoop Cpt # First mull and set initial carry movl (%esi), %eax leal 4(%esi,%ecx,4), %esi mull %ebp leal (%edi,%ecx,4), %edi movl %edx, %ebx # Do the Jump inside the unrolling loop # And set up the registers differently if odd movl 12(%esp), %edx testl $1, %ecx movl %eax, %ecx cmovnz %ebx, %ecx cmovnz %eax, %ebx jmp *%edx # eax scratch # ebx carry hi # ecx carry lo # edx scratch # esi src # edi dst # ebp multiplier .align 32, 0x90 UnrollInnerLoop: addl $64, %edi UnrollEntry: # movl 0(%esi), %eax # Can't use this instruction .byte 0x8b,0x46,0x00 mull %ebp # addl %ecx, 0(%edi) # Can't use this instruction .byte 0x01,0x4f,0x00 adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, 4(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx movl 8(%esi), %eax mull %ebp addl %ecx, 8(%edi) adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 12(%esi), %eax mull %ebp addl %ebx, 12(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx movl 16(%esi), %eax mull %ebp addl %ecx, 16(%edi) adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 20(%esi), %eax mull %ebp addl %ebx, 20(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx movl 24(%esi), %eax mull %ebp addl %ecx, 24(%edi) adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 28(%esi), %eax mull %ebp addl %ebx, 28(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx movl 32(%esi), %eax mull %ebp addl %ecx, 32(%edi) adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 36(%esi), %eax mull %ebp addl %ebx, 36(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx movl 40(%esi), %eax mull %ebp addl %ecx, 40(%edi) adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 44(%esi), %eax mull %ebp addl %ebx, 44(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx movl 48(%esi), %eax mull %ebp addl %ecx, 48(%edi) adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 52(%esi), %eax mull %ebp addl %ebx, 52(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx movl 56(%esi), %eax mull %ebp addl %ecx, 56(%edi) adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 60(%esi), %eax mull %ebp addl %ebx, 60(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx decl 4(%esp) leal 64(%esi), %esi jns UnrollInnerLoop addl %ecx, 64(%edi) movl 36(%esp), %edi adcl $0, %ebx movl %ebx, (%edi) # Save final carry decl (%esp) lea 4(%edi), %edi # Advance Dest jnz UnrollLoop # Loop End2: addl $16, %esp pop %ebx pop %esi pop %edi pop %ebp ret ecm-6.4.4/athlon/mulredc18.asm0000644023561000001540000001543312106741265013020 00000000000000# mp_limb_t mulredc18(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc18 TYPE(GSYM_PREFIX`'mulredc18,`function') GSYM_PREFIX`'mulredc18: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $152, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) movl $0, 108(%edi) movl $0, 112(%edi) movl $0, 116(%edi) movl $0, 120(%edi) movl $0, 124(%edi) movl $0, 128(%edi) movl $0, 132(%edi) movl $0, 136(%edi) movl $0, 140(%edi) movl $0, 144(%edi) ########################################### movl $18, 148(%esp) .align 32 Loop: ## compute u and store in %ebp movl 176(%esp), %eax movl 180(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 188(%esp) movl %eax, %ebp movl 184(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 18 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) movl $0, %ecx adcl %eax, %ebx movl 52(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 48(%edi) movl $0, %ebx adcl %eax, %ecx movl 56(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 52(%edi) movl $0, %ecx adcl %eax, %ebx movl 60(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 56(%edi) movl $0, %ebx adcl %eax, %ecx movl 64(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 60(%edi) movl $0, %ecx adcl %eax, %ebx movl 68(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 64(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 68(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 72(%edi) adcl $0, 76(%edi) movl 176(%esp), %eax movl (%eax), %ebp movl 180(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 18 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) movl $0, %ecx adcl %eax, %ebx movl 52(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 48(%edi) movl $0, %ebx adcl %eax, %ecx movl 56(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 52(%edi) movl $0, %ecx adcl %eax, %ebx movl 60(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 56(%edi) movl $0, %ebx adcl %eax, %ecx movl 64(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 60(%edi) movl $0, %ecx adcl %eax, %ebx movl 68(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 64(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 68(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 72(%edi) adcl $0, 76(%edi) addl $4, 176(%esp) addl $4, %edi decl 148(%esp) jnz Loop ########################################### ### Copy result in z movl 172(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax movl %eax, 52(%ebx) movl 56(%edi), %eax movl %eax, 56(%ebx) movl 60(%edi), %eax movl %eax, 60(%ebx) movl 64(%edi), %eax movl %eax, 64(%ebx) movl 68(%edi), %eax movl %eax, 68(%ebx) movl 72(%edi), %eax # carry addl $152, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/mulredc19.asm0000644023561000001540000001607312106741265013022 00000000000000# mp_limb_t mulredc19(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc19 TYPE(GSYM_PREFIX`'mulredc19,`function') GSYM_PREFIX`'mulredc19: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $160, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) movl $0, 108(%edi) movl $0, 112(%edi) movl $0, 116(%edi) movl $0, 120(%edi) movl $0, 124(%edi) movl $0, 128(%edi) movl $0, 132(%edi) movl $0, 136(%edi) movl $0, 140(%edi) movl $0, 144(%edi) movl $0, 148(%edi) movl $0, 152(%edi) ########################################### movl $19, 156(%esp) .align 32 Loop: ## compute u and store in %ebp movl 184(%esp), %eax movl 188(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 196(%esp) movl %eax, %ebp movl 192(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 19 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) movl $0, %ecx adcl %eax, %ebx movl 52(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 48(%edi) movl $0, %ebx adcl %eax, %ecx movl 56(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 52(%edi) movl $0, %ecx adcl %eax, %ebx movl 60(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 56(%edi) movl $0, %ebx adcl %eax, %ecx movl 64(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 60(%edi) movl $0, %ecx adcl %eax, %ebx movl 68(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 64(%edi) movl $0, %ebx adcl %eax, %ecx movl 72(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 68(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 72(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 76(%edi) adcl $0, 80(%edi) movl 184(%esp), %eax movl (%eax), %ebp movl 188(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 19 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) movl $0, %ecx adcl %eax, %ebx movl 52(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 48(%edi) movl $0, %ebx adcl %eax, %ecx movl 56(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 52(%edi) movl $0, %ecx adcl %eax, %ebx movl 60(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 56(%edi) movl $0, %ebx adcl %eax, %ecx movl 64(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 60(%edi) movl $0, %ecx adcl %eax, %ebx movl 68(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 64(%edi) movl $0, %ebx adcl %eax, %ecx movl 72(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 68(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 72(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 76(%edi) adcl $0, 80(%edi) addl $4, 184(%esp) addl $4, %edi decl 156(%esp) jnz Loop ########################################### ### Copy result in z movl 180(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax movl %eax, 52(%ebx) movl 56(%edi), %eax movl %eax, 56(%ebx) movl 60(%edi), %eax movl %eax, 60(%ebx) movl 64(%edi), %eax movl %eax, 64(%ebx) movl 68(%edi), %eax movl %eax, 68(%ebx) movl 72(%edi), %eax movl %eax, 72(%ebx) movl 76(%edi), %eax # carry addl $160, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/mulredc9.asm0000644023561000001540000001036612106741265012740 00000000000000# mp_limb_t mulredc9(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc9 TYPE(GSYM_PREFIX`'mulredc9,`function') GSYM_PREFIX`'mulredc9: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $80, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) ########################################### movl $9, 76(%esp) .align 32 Loop: ## compute u and store in %ebp movl 104(%esp), %eax movl 108(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 116(%esp) movl %eax, %ebp movl 112(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 9 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 32(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 36(%edi) adcl $0, 40(%edi) movl 104(%esp), %eax movl (%eax), %ebp movl 108(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 9 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 32(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 36(%edi) adcl $0, 40(%edi) addl $4, 104(%esp) addl $4, %edi decl 76(%esp) jnz Loop ########################################### ### Copy result in z movl 100(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax # carry addl $80, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/mulredc13.asm0000644023561000001540000001257312106741265013015 00000000000000# mp_limb_t mulredc13(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc13 TYPE(GSYM_PREFIX`'mulredc13,`function') GSYM_PREFIX`'mulredc13: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $112, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) ########################################### movl $13, 108(%esp) .align 32 Loop: ## compute u and store in %ebp movl 136(%esp), %eax movl 140(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 148(%esp) movl %eax, %ebp movl 144(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 13 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 48(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 52(%edi) adcl $0, 56(%edi) movl 136(%esp), %eax movl (%eax), %ebp movl 140(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 13 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 48(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 52(%edi) adcl $0, 56(%edi) addl $4, 136(%esp) addl $4, %edi decl 108(%esp) jnz Loop ########################################### ### Copy result in z movl 132(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax # carry addl $112, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/mulredc12.asm0000644023561000001540000001213312106741265013004 00000000000000# mp_limb_t mulredc12(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc12 TYPE(GSYM_PREFIX`'mulredc12,`function') GSYM_PREFIX`'mulredc12: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $104, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) ########################################### movl $12, 100(%esp) .align 32 Loop: ## compute u and store in %ebp movl 128(%esp), %eax movl 132(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 140(%esp) movl %eax, %ebp movl 136(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 12 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 44(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 48(%edi) adcl $0, 52(%edi) movl 128(%esp), %eax movl (%eax), %ebp movl 132(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 12 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 44(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 48(%edi) adcl $0, 52(%edi) addl $4, 128(%esp) addl $4, %edi decl 100(%esp) jnz Loop ########################################### ### Copy result in z movl 124(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax # carry addl $104, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/Makefile.in0000644023561000001540000003524412113353767012565 00000000000000# Makefile.in generated by automake 1.11.3 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = athlon DIST_COMMON = README $(noinst_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = LTLIBRARIES = $(noinst_LTLIBRARIES) libmulredc_la_LIBADD = am__objects_1 = mulredc1.lo mulredc2.lo mulredc3.lo mulredc4.lo \ mulredc5.lo mulredc6.lo mulredc7.lo mulredc8.lo mulredc9.lo \ mulredc10.lo mulredc11.lo mulredc12.lo mulredc13.lo \ mulredc14.lo mulredc15.lo mulredc16.lo mulredc17.lo \ mulredc18.lo mulredc19.lo mulredc20.lo am_libmulredc_la_OBJECTS = $(am__objects_1) redc.lo libmulredc_la_OBJECTS = $(am_libmulredc_la_OBJECTS) DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) CCASCOMPILE = $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS) LTCCASCOMPILE = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=compile $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS) COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) CCLD = $(CC) LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ SOURCES = $(libmulredc_la_SOURCES) DIST_SOURCES = $(libmulredc_la_SOURCES) HEADERS = $(noinst_HEADERS) ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ ALLOCA = @ALLOCA@ AMTAR = @AMTAR@ AR = @AR@ ASMPATH = @ASMPATH@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCAS = @CCAS@ CCASDEPMODE = @CCASDEPMODE@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GMPLIB = @GMPLIB@ GREP = @GREP@ GSL_LD_FLAGS = @GSL_LD_FLAGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LD = @LD@ LDFLAGS = LIBOBJS = @LIBOBJS@ # The asm code does not depend on any libraries except libc for abort() # if assertions are enabled LIBS = LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ M4 = @M4@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ POW_LIB = @POW_LIB@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VALGRIND = @VALGRIND@ VERSION = @VERSION@ XSLDIR = @XSLDIR@ XSLTPROC = @XSLTPROC@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ MULREDC = mulredc1.asm mulredc2.asm mulredc3.asm mulredc4.asm mulredc5.asm \ mulredc6.asm mulredc7.asm mulredc8.asm mulredc9.asm mulredc10.asm \ mulredc11.asm mulredc12.asm mulredc13.asm mulredc14.asm \ mulredc15.asm mulredc16.asm mulredc17.asm mulredc18.asm \ mulredc19.asm mulredc20.asm EXTRA_DIST = Makefile.dev README autogen.py generate_all noinst_LTLIBRARIES = libmulredc.la # This library definition also causes the mulredc[n].asm and redc.asm files # to go in the distribution - no need for having them in EXTRA_DIST libmulredc_la_SOURCES = $(MULREDC) redc.asm noinst_HEADERS = mulredc.h # It's actually the .s files that depend on config.m4, but automake # knows them only as intermediate files, not as targets. Adding the # dependency to libmulredc.la should work so long as no stale .s # files exist. libmulredc_la_DEPENDENCIES = $(top_builddir)/config.m4 all: all-am .SUFFIXES: .SUFFIXES: .S .asm .lo .o .obj .s $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu athlon/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu athlon/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-noinstLTLIBRARIES: -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ test "$$dir" != "$$p" || dir=.; \ echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done libmulredc.la: $(libmulredc_la_OBJECTS) $(libmulredc_la_DEPENDENCIES) $(EXTRA_libmulredc_la_DEPENDENCIES) $(LINK) $(libmulredc_la_OBJECTS) $(libmulredc_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c .s.o: $(CCASCOMPILE) -c -o $@ $< .s.obj: $(CCASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .s.lo: $(LTCCASCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ mkid -fID $$unique tags: TAGS TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) set x; \ here=`pwd`; \ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: CTAGS CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(LTLIBRARIES) $(HEADERS) installdirs: install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: .MAKE: install-am install-strip .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ clean-libtool clean-noinstLTLIBRARIES ctags distclean \ distclean-compile distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ pdf pdf-am ps ps-am tags uninstall uninstall-am .asm.s: $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.s .asm.S: $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.S # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: ecm-6.4.4/athlon/mulredc16.asm0000644023561000001540000001433312106741265013014 00000000000000# mp_limb_t mulredc16(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc16 TYPE(GSYM_PREFIX`'mulredc16,`function') GSYM_PREFIX`'mulredc16: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $136, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) movl $0, 108(%edi) movl $0, 112(%edi) movl $0, 116(%edi) movl $0, 120(%edi) movl $0, 124(%edi) movl $0, 128(%edi) ########################################### movl $16, 132(%esp) .align 32 Loop: ## compute u and store in %ebp movl 160(%esp), %eax movl 164(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 172(%esp) movl %eax, %ebp movl 168(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 16 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) movl $0, %ecx adcl %eax, %ebx movl 52(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 48(%edi) movl $0, %ebx adcl %eax, %ecx movl 56(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 52(%edi) movl $0, %ecx adcl %eax, %ebx movl 60(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 56(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 60(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 64(%edi) adcl $0, 68(%edi) movl 160(%esp), %eax movl (%eax), %ebp movl 164(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 16 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) movl $0, %ecx adcl %eax, %ebx movl 52(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 48(%edi) movl $0, %ebx adcl %eax, %ecx movl 56(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 52(%edi) movl $0, %ecx adcl %eax, %ebx movl 60(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 56(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 60(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 64(%edi) adcl $0, 68(%edi) addl $4, 160(%esp) addl $4, %edi decl 132(%esp) jnz Loop ########################################### ### Copy result in z movl 156(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax movl %eax, 52(%ebx) movl 56(%edi), %eax movl %eax, 56(%ebx) movl 60(%edi), %eax movl %eax, 60(%ebx) movl 64(%edi), %eax # carry addl $136, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/mulredc1.asm0000644023561000001540000000243612106741265012727 00000000000000# # mp_limb_t mulredc1(mp_limb_t *z, const mp_limb_t x, const mp_limb_t y, # const mp_limb_t m, mp_limb_t inv_m) # # Compute z := x*y mod m, in Montgomery representation, where x, y < m # and m is n limb wide. inv_m is the less significant limb of the # inverse of m modulo 2^(n*GMP_LIMB_BITS) # # The result might be unreduced (larger than m) but becomes reduced # after subtracting m. The calling function should take care of that. # # We use a temporary space for unreduced product on the stack. # Therefore, this can not be used for large integers (anyway, the # algorithm is quadratic). # # WARNING: z is only n limbs but since it might be unreduced, there # could be a carry that does not fit in z. This carry is returned. include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc1 TYPE(GSYM_PREFIX`'mulredc1,`function') GSYM_PREFIX`'mulredc1: # Stack: # inv_m 20(%esp) # m 16 # y 12(%esp) # x 8 # z 4(%esp) movl 12(%esp), %eax mull 8(%esp) movl %edx, 12(%esp) movl %eax, 8(%esp) # store xy in [8(%esp):12(%esp)] mull 20(%esp) # compute u mull 16(%esp) # compute u*m addl 8(%esp), %eax # eax is 0, now (carry is important) adcl 12(%esp), %edx movl 4(%esp), %ecx movl %edx, (%ecx) adcl $0, %eax ret ecm-6.4.4/athlon/mulredc15.asm0000644023561000001540000001367312106741265013021 00000000000000# mp_limb_t mulredc15(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc15 TYPE(GSYM_PREFIX`'mulredc15,`function') GSYM_PREFIX`'mulredc15: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $128, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) movl $0, 108(%edi) movl $0, 112(%edi) movl $0, 116(%edi) movl $0, 120(%edi) ########################################### movl $15, 124(%esp) .align 32 Loop: ## compute u and store in %ebp movl 152(%esp), %eax movl 156(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 164(%esp) movl %eax, %ebp movl 160(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 15 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) movl $0, %ecx adcl %eax, %ebx movl 52(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 48(%edi) movl $0, %ebx adcl %eax, %ecx movl 56(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 52(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 56(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 60(%edi) adcl $0, 64(%edi) movl 152(%esp), %eax movl (%eax), %ebp movl 156(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 15 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) movl $0, %ecx adcl %eax, %ebx movl 44(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 40(%edi) movl $0, %ebx adcl %eax, %ecx movl 48(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 44(%edi) movl $0, %ecx adcl %eax, %ebx movl 52(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 48(%edi) movl $0, %ebx adcl %eax, %ecx movl 56(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 52(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 56(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 60(%edi) adcl $0, 64(%edi) addl $4, 152(%esp) addl $4, %edi decl 124(%esp) jnz Loop ########################################### ### Copy result in z movl 148(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax movl %eax, 52(%ebx) movl 56(%edi), %eax movl %eax, 56(%ebx) movl 60(%edi), %eax # carry addl $128, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/mulredc10.asm0000644023561000001540000001103312106741265013000 00000000000000# mp_limb_t mulredc10(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc10 TYPE(GSYM_PREFIX`'mulredc10,`function') GSYM_PREFIX`'mulredc10: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $88, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) ########################################### movl $10, 84(%esp) .align 32 Loop: ## compute u and store in %ebp movl 112(%esp), %eax movl 116(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 124(%esp) movl %eax, %ebp movl 120(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 10 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 36(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 40(%edi) adcl $0, 44(%edi) movl 112(%esp), %eax movl (%eax), %ebp movl 116(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 10 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 36(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 40(%edi) adcl $0, 44(%edi) addl $4, 112(%esp) addl $4, %edi decl 84(%esp) jnz Loop ########################################### ### Copy result in z movl 108(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax # carry addl $88, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/Makefile.am0000644023561000001540000000224512106741265012543 00000000000000MULREDC = mulredc1.asm mulredc2.asm mulredc3.asm mulredc4.asm mulredc5.asm \ mulredc6.asm mulredc7.asm mulredc8.asm mulredc9.asm mulredc10.asm \ mulredc11.asm mulredc12.asm mulredc13.asm mulredc14.asm \ mulredc15.asm mulredc16.asm mulredc17.asm mulredc18.asm \ mulredc19.asm mulredc20.asm EXTRA_DIST = Makefile.dev README autogen.py generate_all noinst_LTLIBRARIES = libmulredc.la # This library definition also causes the mulredc[n].asm and redc.asm files # to go in the distribution - no need for having them in EXTRA_DIST libmulredc_la_SOURCES = $(MULREDC) redc.asm noinst_HEADERS = mulredc.h # The asm code does not depend on any libraries except libc for abort() # if assertions are enabled LIBS = LDFLAGS = # It's actually the .s files that depend on config.m4, but automake # knows them only as intermediate files, not as targets. Adding the # dependency to libmulredc.la should work so long as no stale .s # files exist. libmulredc_la_DEPENDENCIES = $(top_builddir)/config.m4 .asm.s: $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.s .asm.S: $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.S ecm-6.4.4/athlon/mulredc4.asm0000644023561000001540000000553012106741265012730 00000000000000# mp_limb_t mulredc4(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc4 TYPE(GSYM_PREFIX`'mulredc4,`function') GSYM_PREFIX`'mulredc4: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $40, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) ########################################### movl $4, 36(%esp) .align 32 Loop: ## compute u and store in %ebp movl 64(%esp), %eax movl 68(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 76(%esp) movl %eax, %ebp movl 72(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 4 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 12(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 16(%edi) adcl $0, 20(%edi) movl 64(%esp), %eax movl (%eax), %ebp movl 68(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 4 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 12(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 16(%edi) adcl $0, 20(%edi) addl $4, 64(%esp) addl $4, %edi decl 36(%esp) jnz Loop ########################################### ### Copy result in z movl 60(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax # carry addl $40, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/mulredc8.asm0000644023561000001540000000772412106741265012743 00000000000000# mp_limb_t mulredc8(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc8 TYPE(GSYM_PREFIX`'mulredc8,`function') GSYM_PREFIX`'mulredc8: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $72, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) ########################################### movl $8, 68(%esp) .align 32 Loop: ## compute u and store in %ebp movl 96(%esp), %eax movl 100(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 108(%esp) movl %eax, %ebp movl 104(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 8 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 28(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 32(%edi) adcl $0, 36(%edi) movl 96(%esp), %eax movl (%eax), %ebp movl 100(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 8 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) adcl %ecx, %eax adcl $0, %edx addl %eax, 28(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 32(%edi) adcl $0, 36(%edi) addl $4, 96(%esp) addl $4, %edi decl 68(%esp) jnz Loop ########################################### ### Copy result in z movl 92(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax # carry addl $72, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/README0000644023561000001540000000121012106741265011356 00000000000000mulredc[1..20].s are size-specific asm code for mulredc. These are generated by the Python script autogen.py. In order to avoid dependency of the package to Python, this generation is not done automatically with the autoconf/automake stuff. If you need to regenerate them, the syntax is ./autogen.py 3 > mulredc3.s And you can generate all of them with the shell script ./generate_all This asm code uses no MMX/SSE2 instructions and should work on any x86 computers. redc.asm is a version of redc separated from the multiplication, since there are cases where it is needed. test_mulredc.c, bench.c and the Makefile are for developpement. ecm-6.4.4/athlon/mulredc11.asm0000644023561000001540000001147112106741265013007 00000000000000# mp_limb_t mulredc11(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc11 TYPE(GSYM_PREFIX`'mulredc11,`function') GSYM_PREFIX`'mulredc11: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $96, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) ########################################### movl $11, 92(%esp) .align 32 Loop: ## compute u and store in %ebp movl 120(%esp), %eax movl 124(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 132(%esp) movl %eax, %ebp movl 128(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 11 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 40(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 44(%edi) adcl $0, 48(%edi) movl 120(%esp), %eax movl (%eax), %ebp movl 124(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 11 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) movl $0, %ecx adcl %eax, %ebx movl 28(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 24(%edi) movl $0, %ebx adcl %eax, %ecx movl 32(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 28(%edi) movl $0, %ecx adcl %eax, %ebx movl 36(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 32(%edi) movl $0, %ebx adcl %eax, %ecx movl 40(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 36(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 40(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 44(%edi) adcl $0, 48(%edi) addl $4, 120(%esp) addl $4, %edi decl 92(%esp) jnz Loop ########################################### ### Copy result in z movl 116(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax # carry addl $96, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/athlon/mulredc7.asm0000644023561000001540000000726312106741265012740 00000000000000# mp_limb_t mulredc7(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc7 TYPE(GSYM_PREFIX`'mulredc7,`function') GSYM_PREFIX`'mulredc7: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $64, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) ########################################### movl $7, 60(%esp) .align 32 Loop: ## compute u and store in %ebp movl 88(%esp), %eax movl 92(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 100(%esp) movl %eax, %ebp movl 96(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 7 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 24(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 28(%edi) adcl $0, 32(%edi) movl 88(%esp), %eax movl (%eax), %ebp movl 92(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 7 ### kills %eax, %ebx, %ecx, %edx ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx movl (%esi), %eax mull %ebp movl %eax, %ebx movl %edx, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, (%edi) movl $0, %ebx adcl %eax, %ecx movl 8(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 4(%edi) movl $0, %ecx adcl %eax, %ebx movl 12(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 8(%edi) movl $0, %ebx adcl %eax, %ecx movl 16(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 12(%edi) movl $0, %ecx adcl %eax, %ebx movl 20(%esi), %eax adcl %edx, %ecx mull %ebp addl %ebx, 16(%edi) movl $0, %ebx adcl %eax, %ecx movl 24(%esi), %eax adcl %edx, %ebx mull %ebp addl %ecx, 20(%edi) adcl %ebx, %eax adcl $0, %edx addl %eax, 24(%edi) adcl $0, %edx ### carry limb is in %edx addl %edx, 28(%edi) adcl $0, 32(%edi) addl $4, 88(%esp) addl $4, %edi decl 60(%esp) jnz Loop ########################################### ### Copy result in z movl 84(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax # carry addl $64, %esp popl %ebx popl %esi popl %edi popl %ebp ret ecm-6.4.4/mul_fft-params.h.pentium30000644023561000001540000001073112106741273014051 00000000000000#define MUL_FFT_MODF_THRESHOLD 480 #define SQR_FFT_MODF_THRESHOLD 480 #define MUL_FFT_TABLE2 {{1, 4 /*66*/}, {305, 5 /*95*/}, {321, 4 /*97*/}, {337, 5 /*95*/}, {353, 4 /*97*/}, {369, 5 /*96*/}, {801, 6 /*96*/}, {1281, 7 /*91*/}, {1409, 6 /*97*/}, {1601, 7 /*92*/}, {1921, 6 /*98*/}, {1985, 7 /*94*/}, {2689, 8 /*91*/}, {2817, 7 /*95*/}, {3201, 8 /*92*/}, {3329, 7 /*96*/}, {3457, 8 /*87*/}, {3841, 7 /*96*/}, {3969, 8 /*88*/}, {4865, 7 /*97*/}, {4993, 8 /*90*/}, {6913, 9 /*87*/}, {7681, 8 /*96*/}, {8961, 9 /*90*/}, {9729, 8 /*97*/}, {9985, 9 /*83*/}, {11777, 8 /*97*/}, {12033, 9 /*85*/}, {13825, 10 /*87*/}, {15361, 9 /*96*/}, {15873, 8 /*98*/}, {16129, 9 /*88*/}, {19969, 10 /*83*/}, {23553, 9 /*97*/}, {26113, 10 /*81*/}, {31745, 9 /*98*/}, {34305, 10 /*85*/}, {39937, 9 /*98*/}, {40449, 10 /*83*/}, {48129, 11 /*75*/}, {63489, 10 /*98*/}, {80897, 11 /*83*/}, {96257, 12 /*75*/}, {126977, 11 /*98*/}, {129025, 9 /*98*/}, {130561, 11 /*80*/}, {194561, 12 /*75*/}, {258049, 10 /*98*/}, {261121, 9 /*99*/}, {261633, 10 /*94*/}, {277505, 9 /*99*/}, {278017, 10 /*94*/}, {293889, 9 /*99*/}, {294401, 7 /*99*/}, {294529, 8 /*99*/}, {294657, 10 /*94*/}, {310273, 9 /*99*/}, {310785, 10 /*95*/}, {326657, 12 /*83*/}, {389121, 13 /*75*/}, {516097, 11 /*98*/}, {522241, 10 /*99*/}, {523265, 11 /*94*/}, {587777, 10 /*99*/}, {588801, 11 /*94*/}, {620545, 10 /*99*/}, {621569, 9 /*99*/}, {622081, 11 /*95*/}, {653313, 10 /*99*/}, {662529, 11 /*96*/}, {686081, 10 /*99*/}, {687105, 9 /*99*/}, {687617, 11 /*95*/}, {718849, 10 /*99*/}, {752641, 9 /*99*/}, {753153, 11 /*95*/}, {784385, 10 /*99*/}, {818177, 9 /*99*/}, {818689, 11 /*96*/}, {849921, 10 /*99*/}, {850945, 11 /*96*/}, {882689, 10 /*99*/}, {883713, 9 /*99*/}, {884225, 11 /*96*/}, {980993, 10 /*99*/}, {982017, 12 /*93*/}, {LONG_MAX, 0}} #define MUL_FFTM_TABLE2 {{1, 4 /*66*/}, {273, 5 /*94*/}, {289, 4 /*97*/}, {305, 5 /*95*/}, {609, 6 /*95*/}, {641, 5 /*97*/}, {673, 6 /*95*/}, {705, 5 /*97*/}, {737, 6 /*96*/}, {1473, 7 /*96*/}, {1537, 6 /*98*/}, {1601, 7 /*96*/}, {1665, 6 /*98*/}, {1729, 7 /*96*/}, {2689, 8 /*91*/}, {2817, 7 /*97*/}, {2945, 8 /*92*/}, {3329, 7 /*98*/}, {3457, 8 /*93*/}, {5377, 9 /*91*/}, {5633, 8 /*95*/}, {6401, 9 /*92*/}, {6657, 8 /*96*/}, {6913, 9 /*87*/}, {7681, 8 /*96*/}, {7937, 9 /*88*/}, {8705, 8 /*97*/}, {8961, 9 /*90*/}, {13825, 10 /*87*/}, {15361, 9 /*96*/}, {17921, 10 /*90*/}, {19457, 9 /*97*/}, {19969, 10 /*83*/}, {23553, 9 /*97*/}, {24065, 10 /*85*/}, {27649, 11 /*87*/}, {30721, 10 /*96*/}, {31745, 9 /*98*/}, {32257, 10 /*88*/}, {39937, 11 /*83*/}, {47105, 10 /*97*/}, {48129, 12 /*75*/}, {61441, 11 /*96*/}, {63489, 10 /*98*/}, {68609, 11 /*85*/}, {79873, 10 /*98*/}, {80897, 11 /*83*/}, {96257, 12 /*75*/}, {126977, 11 /*98*/}, {161793, 12 /*83*/}, {192513, 13 /*75*/}, {253953, 12 /*98*/}, {258049, 10 /*98*/}, {261121, 9 /*99*/}, {261633, 10 /*94*/}, {277505, 12 /*85*/}, {323585, 10 /*99*/}, {326657, 9 /*99*/}, {327169, 10 /*95*/}, {330753, 12 /*84*/}, {389121, 10 /*99*/}, {392193, 9 /*99*/}, {392705, 10 /*96*/}, {408577, 9 /*99*/}, {409089, 8 /*99*/}, {409345, 10 /*96*/}, {412673, 12 /*90*/}, {454657, 13 /*87*/}, {516097, 11 /*98*/}, {522241, 10 /*99*/}, {523265, 11 /*94*/}, {555009, 10 /*99*/}, {556033, 9 /*99*/}, {556545, 11 /*94*/}, {587777, 10 /*99*/}, {588801, 11 /*94*/}, {620545, 10 /*99*/}, {621569, 9 /*99*/}, {622081, 11 /*95*/}, {653313, 10 /*99*/}, {654337, 11 /*95*/}, {686081, 13 /*87*/}, {778241, 11 /*99*/}, {817153, 10 /*99*/}, {818177, 9 /*99*/}, {818689, 11 /*96*/}, {849921, 10 /*99*/}, {850945, 11 /*96*/}, {882689, 10 /*99*/}, {883713, 9 /*99*/}, {884225, 11 /*96*/}, {915457, 12 /*93*/}, {978945, 14 /*93*/}, {LONG_MAX, 0}} #define MUL_FFT_FULL_TABLE2 {{100, 2}, {216, 1}, {256, 2}, {264, 1}, {304, 2}, {312, 1}, {544, 4}, {560, 1}, {704, 2}, {720, 1}, {896, 2}, {960, 7}, {40960, 2}, {47616, 1}, {49152, 6}, {53760, 4}, {56320, 1}, {64512, 4}, {71680, 5}, {86016, 2}, {96768, 4}, {99840, 1}, {131072, 6}, {136192, 7}, {147456, 6}, {150528, 4}, {161280, 1}, {161792, 3}, {172032, 2}, {193536, 1}, {259072, 6}, {286720, 7}, {294912, 6}, {301056, 4}, {322560, 3}, {344064, 2}, {387072, 1}, {393216, 4}, {404480, 3}, {409600, 1}, {417792, 3}, {425984, 1}, {524288, 6}, {530432, 7}, {557056, 6}, {566272, 5}, {577536, 4}, {593920, 6}, {602112, 5}, {614400, 4}, {645120, 3}, {647168, 4}, {652800, 1}, {654336, 6}, {673792, 3}, {688128, 2}, {724992, 4}, {727040, 1}, {753664, 2}, {783360, 4}, {816640, 6}, {831488, 1}, {851968, 2}, {860160, 3}, {868352, 2}, {881664, 7}, {884736, 1}, {921600, 7}, {950272, 1}, {LONG_MAX, 1}} ecm-6.4.4/ecm-params.h.corei50000644023561000001540000000314412106741273012603 00000000000000/* tuned on confit.loria.fr (Intel(R) Core(TM) i5-2500 CPU) */ #ifndef HAVE_MPIR /* tuning parameters for GMP, tuned for GMP 5.0.4 */ /* 0:mulredc 1:mul+redc_1 2:mul+redc_2 3:mul+redc_n */ #define TUNE_MULREDC_TABLE {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1} /* 0:mulredc 1:sqr+redc_1 2:sqr+redc_2 3:sqr+redc_n */ #define TUNE_SQRREDC_TABLE {0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1} #define MPZMOD_THRESHOLD 21 #define REDC_THRESHOLD 512 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 12, 13, 13, 13, 14, 14, 15, 16, 16, 17, 20, 22} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 17 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 256 #define PREREVERTDIVISION_NTT_THRESHOLD 8 #define POLYINVERT_NTT_THRESHOLD 128 #define POLYEVALT_NTT_THRESHOLD 128 #define MPZSPV_NORMALISE_STRIDE 512 #else /* tuning parameters for MPIR, tuned for MPIR 2.5.1 */ /* 0:mulredc 1:mul+redc_1 2:mul+redc_2 3:mul+redc_n */ #define TUNE_MULREDC_TABLE {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,2,2} /* 0:mulredc 1:sqr+redc_1 2:sqr+redc_2 3:sqr+redc_n */ #define TUNE_SQRREDC_TABLE {0,0,0,0,0,0,0,0,0,0,0,2,2,1,2,2,2,2,2,2,2} #define MPZMOD_THRESHOLD 21 #define REDC_THRESHOLD 512 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 11, 12, 13, 14, 15, 14, 16, 18, 18, 20, 18, 20} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 17 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 256 #define PREREVERTDIVISION_NTT_THRESHOLD 16 #define POLYINVERT_NTT_THRESHOLD 128 #define POLYEVALT_NTT_THRESHOLD 256 #define MPZSPV_NORMALISE_STRIDE 256 #endif ecm-6.4.4/pp1.c0000644023561000001540000007361412106741273010075 00000000000000/* The 'P+1' algorithm. Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Paul Zimmermann and Alexander Kruppa. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ /* References: A p+1 Method of Factoring, H. C. Williams, Mathematics of Computation, volume 39, number 159, pages 225-234, 1982. Evaluating recurrences of form X_{m+n} = f(X_m, X_n, X_{m-n}) via Lucas chains, Peter L. Montgomery, December 1983, revised January 1992. */ #include #include #include "ecm-impl.h" #ifdef HAVE_LIMITS_H # include #else # ifndef ULONG_MAX # define ULONG_MAX __GMP_ULONG_MAX # endif #endif /****************************************************************************** * * * Stage 1 * * * ******************************************************************************/ /* prime powers are accumulated up to about n^L1 */ #define L1 1 /* P1 <- V_e(P0), using P, Q as auxiliary variables, where V_{2k}(P0) = V_k(P0)^2 - 2 V_{2k-1}(P0) = V_k(P0)*V_{k-1}(P0) - P0. (More generally V_{m+n} = V_m * V_n - V_{m-n}.) Warning: P1 and P0 may be equal. */ static void pp1_mul (mpres_t P1, mpres_t P0, mpz_t e, mpmod_t n, mpres_t P, mpres_t Q) { mp_size_t size_e; unsigned long i; int sign; sign = mpz_sgn (e); mpz_abs (e, e); if (sign == 0) { mpres_set_ui (P1, 2, n); goto unnegate; } if (mpz_cmp_ui (e, 1) == 0) { mpres_set (P1, P0, n); goto unnegate; } /* now e >= 2 */ mpz_sub_ui (e, e, 1); mpres_sqr (P, P0, n); mpres_sub_ui (P, P, 2, n); /* P = V_2(P0) = P0^2-2 */ mpres_set (Q, P0, n); /* Q = V_1(P0) = P0 */ /* invariant: (P, Q) = (V_{k+1}(P0), V_k(P0)), start with k=1 */ size_e = mpz_sizeinbase (e, 2); for (i = size_e - 1; i > 0;) { if (mpz_tstbit (e, --i)) /* k -> 2k+1 */ { if (i) /* Q is not needed for last iteration */ { mpres_mul (Q, P, Q, n); mpres_sub (Q, Q, P0, n); } mpres_sqr (P, P, n); mpres_sub_ui (P, P, 2, n); } else /* k -> 2k */ { mpres_mul (P, P, Q, n); mpres_sub (P, P, P0, n); if (i) /* Q is not needed for last iteration */ { mpres_sqr (Q, Q, n); mpres_sub_ui (Q, Q, 2, n); } } } mpres_set (P1, P, n); mpz_add_ui (e, e, 1); /* recover original value of e */ unnegate: if (sign == -1) mpz_neg (e, e); return; } /* Input: P0 is the initial point (sigma) n is the number to factor B1 is the stage 1 bound B1done: stage 1 was already done up to that limit go: if <> 1, group order to preload Output: a is the factor found, or the value at end of stage 1 B1done is set to B1 if stage 1 completed normally, or to the largest prime processed if interrupted, but never to a smaller value than B1done was upon function entry. Return value: non-zero iff a factor was found. */ static int pp1_stage1 (mpz_t f, mpres_t P0, mpmod_t n, double B1, double *B1done, mpz_t go, int (*stop_asap)(void), char *chkfilename) { double B0, p, q, r, last_chkpnt_p; mpz_t g; mpres_t P, Q; mpres_t R, S, T; int youpi = ECM_NO_FACTOR_FOUND; unsigned int max_size, size_n; long last_chkpnt_time; mpz_init (g); mpres_init (P, n); mpres_init (Q, n); mpres_init (R, n); mpres_init (S, n); mpres_init (T, n); B0 = ceil (sqrt (B1)); size_n = mpz_sizeinbase (n->orig_modulus, 2); max_size = L1 * size_n; if (mpz_cmp_ui (go, 1) > 0) pp1_mul (P0, P0, go, n, P, Q); /* suggestion from Peter Montgomery: start with exponent n^2-1, as factors of Lucas and Fibonacci number are either +/-1 (mod index), and so is n. Therefore, index will appear as a factor of n^2-1 and be included in stage 1. Do this only when n is composite, otherwise all tests with prime n factor of a Cunningham number will succeed in stage 1. As in P-1, for small overhead, use that trick only when lg(n) <= sqrt(B1). */ if ((double) size_n <= B0 && mpz_probab_prime_p (n->orig_modulus, PROBAB_PRIME_TESTS) == 0) { mpz_mul (g, n->orig_modulus, n->orig_modulus); mpz_sub_ui (g, g, 1); pp1_mul (P0, P0, g, n, P, Q); } mpz_set_ui (g, 1); last_chkpnt_p = 2.; last_chkpnt_time = cputime (); /* first loop through small primes <= sqrt(B1) */ for (p = 2.0; p <= B0; p = getprime ()) { for (q = 1, r = p; r <= B1; r *= p) if (r > *B1done) q *= p; mpz_mul_d (g, g, q, Q); if (mpz_sizeinbase (g, 2) >= max_size) { pp1_mul (P0, P0, g, n, P, Q); mpz_set_ui (g, 1); if (stop_asap != NULL && (*stop_asap) ()) { outputf (OUTPUT_NORMAL, "Interrupted at prime %.0f\n", p); if (p > *B1done) *B1done = p; goto clear_and_exit; } } } pp1_mul (P0, P0, g, n, P, Q); #if 1 /* All primes sqrt(B1) < p <= B1 appear in exponent 1. All primes <= B1done are already included in exponent of at least 1, so it's save to skip ahead to B1done+1 */ if (*B1done > p) { getprime_seek ((*B1done) + 1.); p = getprime (); } #endif /* then all primes > sqrt(B1) and taken with exponent 1 */ for (; p <= B1; p = getprime ()) { pp1_mul_prac (P0, (ecm_uint) p, n, P, Q, R, S, T); if (stop_asap != NULL && (*stop_asap) ()) { outputf (OUTPUT_NORMAL, "Interrupted at prime %.0f\n", p); if (p > *B1done) *B1done = p; goto clear_and_exit; } if (chkfilename != NULL && p > last_chkpnt_p + 10000. && elltime (last_chkpnt_time, cputime ()) > CHKPNT_PERIOD) { writechkfile (chkfilename, ECM_PP1, p, n, NULL, P0, NULL); last_chkpnt_p = p; last_chkpnt_time = cputime (); } } /* If stage 1 finished normally, p is the smallest prime >B1 here. In that case, set to B1 */ if (p > B1) p = B1; if (p > *B1done) *B1done = p; mpres_sub_ui (P, P0, 2, n); mpres_gcd (f, P, n); youpi = mpz_cmp_ui (f, 1); clear_and_exit: if (chkfilename != NULL) writechkfile (chkfilename, ECM_PP1, p, n, NULL, P0, NULL); getprime_clear (); /* free the prime tables, and reinitialize */ mpres_clear (Q, n); mpres_clear (R, n); mpres_clear (S, n); mpres_clear (T, n); mpz_clear (g); mpres_clear (P, n); return youpi; } /* checks if the factor p was found by P+1 or P-1 (when prime). a is the initial seed. */ static void pp1_check_factor (mpz_t a, mpz_t p) { if (mpz_probab_prime_p (p, PROBAB_PRIME_TESTS)) { mpz_mul (a, a, a); mpz_sub_ui (a, a, 4); if (mpz_jacobi (a, p) == 1) outputf (OUTPUT_NORMAL, "[factor found by P-1]\n"); } } /****************************************************************************** * * * Stage 2 * * * ******************************************************************************/ /* let alpha, beta be the roots of x^2-Px+1=0 set a, b such that alpha^e = a*alpha+b (idem for beta), i.e. a*x+b = rem(x^e, x^2-Px+1). Since (x-alpha)*(x-beta) = x^2-Px+1, we have alpha*beta = 1 and alpha+beta = P, i.e. 1/alpha = beta = -alpha + P. It seems that if x^e % (x^2-Px+1) = a*x+b, then x^{-e+1} % (x^2-Px+1) = b*x+a. Proof? */ static void pp1_mul2 (mpres_t a, mpres_t b, mpres_t P, mpz_t e, mpmod_t n) { unsigned long l; mpres_t t; mpz_t abs_e; const int positive_e = (mpz_sgn (e) > 0); if (mpz_cmp_ui (e, 0UL) == 0) /* x^0 = 1 */ { mpres_set_ui (a, 0, n); mpres_set_ui (b, 1, n); return; } mpres_init (t, n); mpz_init (abs_e); mpz_abs (abs_e, e); if (positive_e) { mpres_set_ui (a, 1, n); mpres_set_ui (b, 0, n); } else { /* Set to -x+P */ mpres_set_ui (a, 1, n); mpres_neg (a, a, n); mpres_set (b, P, n); } l = mpz_sizeinbase (abs_e, 2) - 1; /* number of bits of e (minus 1) */ while (l--) { /* square: (ax+b)^2 = (a^2P+2ab) x + (b^2-a^2) */ mpres_sqr (t, a, n); /* a^2 */ mpres_mul (a, a, b, n); mpres_add (a, a, a, n); /* 2ab */ mpres_sqr (b, b, n); /* b^2 */ mpres_sub (b, b, t, n); /* b^2-a^2 */ mpres_mul (t, t, P, n); /* a^2P */ mpres_add (a, t, a, n); /* a^2P+2ab */ if (mpz_tstbit (abs_e, l)) { if (positive_e) { /* multiply: (ax+b)*x = (aP+b) x - a */ mpres_mul (t, a, P, n); mpres_add (t, t, b, n); mpres_neg (b, a, n); mpres_set (a, t, n); } else { /* multiply: (ax+b)*(-x+P) = -ax^2+(aP-b)x+b*P == -bx + (bP + a) (mod x^2-P*x+1) */ mpres_mul (t, b, P, n); mpres_add (t, t, a, n); mpres_neg (a, b, n); mpres_set (b, t, n); } } } mpz_clear (abs_e); mpres_clear (t, n); } /* Performs the following: for (i=0;id1, root_params->d2, root_params->S, dF); mpres_init (u, modulus); mpres_init (v, modulus); if (ABS(root_params->S) == 1) /* special code with d1/6 muls */ { mpres_init (fd[0], modulus); mpres_init (fd[1], modulus); mpres_init (fd[2], modulus); mpz_set_ui (*t, root_params->d2); pp1_mul (fd[2], *x, *t, modulus, u, v); mpres_get_z (F[0], fd[2], modulus); mpz_set_ui (*t, 7UL); pp1_mul (fd[0], fd[2], *t, modulus, u, v); mpz_set_ui (*t, 6UL); pp1_mul (fd[1], fd[2], *t, modulus, u, v); /* fd[0] = V_{7*d2}(P), fd[1] = V_{6*d2}(P), fd[2] = V_{d2}(P) */ outputf (OUTPUT_VERBOSE, "Initializing table of differences for F took %ldms\n", elltime (st1, cputime ())); i = 1; j = 7; while (i < dF) { if (gcd (j, root_params->d1) == 1) /* (d2,d1) == 1 ==> (j*d2,d1) == (j,d1) */ mpres_get_z (F[i++], fd[0], modulus); /* V_{m+n} = V_m * V_n - V_{m-n} */ /* fd[0] = V_m, fd[1] = V_n, fd[2] = V_{m-n} */ mpres_swap (fd[0], fd[2], modulus); /* fd[0] = V_{m-n}, fd[1] = V_n, fd[2] = V_m */ mpres_mul (u, fd[2], fd[1], modulus); /* u = V_n * V_m */ mpres_sub (fd[0], u, fd[0], modulus); /* fd[0] = V_n * V_m - V_{m-n} = V_{m+n}, hence */ /* fd[0] = V_{m+n}, fd[1] = V_n, fd[2] = V_m */ j += 6; muls ++; } mpres_clear (fd[0], modulus); mpres_clear (fd[1], modulus); mpres_clear (fd[2], modulus); } else /* case |S| <> 1: this code works also for S=1, but is more expensive, since it can use up to 4*(d1/6) muls */ { init_roots_params (params, root_params->S, root_params->d1, root_params->d2, 1.0); mpz_set_ui (*t, 0UL); coeffs = init_progression_coeffs (*t, params->dsieve, root_params->d2, 1, 6, params->S, params->dickson_a); if (coeffs == NULL) return ECM_ERROR; state.fd = (point *) malloc (params->size_fd * sizeof (point)); if (state.fd == NULL) { clear_list (coeffs, params->size_fd); return ECM_ERROR; } for (i = 0; i < params->size_fd; i++) { mpres_init (state.fd[i].x, modulus); mpres_init (state.fd[i].y, modulus); /* if i = k*(S+1) + S for k>=1, we can copy x and y from i - (S+1) */ if (i > params->S && (i % (params->S + 1) == params->S)) { mpres_set (state.fd[i].x, state.fd[params->S].x, modulus); mpres_set (state.fd[i].y, state.fd[params->S].y, modulus); } else pp1_mul2 (state.fd[i].x, state.fd[i].y, x[0], coeffs[i], modulus); } clear_list (coeffs, params->size_fd); outputf (OUTPUT_VERBOSE, "Initializing table of differences for F took %ldms\n", elltime (st1, cputime ())); /* Now for the actual calculation of the roots. */ for (i = 0; i < dF && !youpi;) { /* Is this a rsieve value where we computed Dickson(j * d2) * X? */ if (gcd (params->rsieve, params->dsieve) == 1) { /* Did we use every progression since the last update? */ if (params->next == params->nr) { /* Yes, time to update again */ addWnm (state.fd, x[0], modulus, params->nr, params->S, &muls); params->next = 0; } /* Is this a j value where we want Dickson(j*d2)*X as a root? */ if (gcd (params->rsieve, root_params->d1) == 1) { /* we have alpha^k = x * alpha + y thus alpha^k + beta^k = x * P + 2 * y. FIXME: can we avoid returning to the Lucas form? */ mpres_mul (u, state.fd[params->next * (params->S + 1)].x, x[0], modulus); mpres_add (v, state.fd[params->next * (params->S + 1)].y, state.fd[params->next * (params->S + 1)].y, modulus); mpres_add (u, u, v, modulus); mpres_get_z (F[i++], u, modulus); } params->next ++; } params->rsieve += 6; } for (i = 0; i < params->size_fd; i++) { mpres_clear (state.fd[i].x, modulus); mpres_clear (state.fd[i].y, modulus); } free (state.fd); } mpres_clear (u, modulus); mpres_clear (v, modulus); outputf (OUTPUT_VERBOSE, "Computing roots of F took %ldms", elltime (st, cputime ())); outputf (OUTPUT_DEVVERBOSE, " and %d muls", muls); outputf (OUTPUT_VERBOSE, "\n"); return youpi; } /* return NULL if an error occurred */ pp1_roots_state_t * pp1_rootsG_init (mpres_t *x, root_params_t *root_params, mpmod_t modulus) { mpres_t P; pp1_roots_state_t *state; progression_params_t *params; /* for less typing */ unsigned long i; ASSERT (gcd (root_params->d1, root_params->d2) == 1); state = (pp1_roots_state_t *) malloc (sizeof (pp1_roots_state_t)); if (state == NULL) return NULL; params = &(state->params); /* we don't need the sign anymore after pp1_rootsG_init */ params->S = ABS(root_params->S); if (params->S == 1) { mpz_t t; mpz_init (t); mpres_init (P, modulus); for (i = 0; i < 4; i++) mpres_init (state->tmp[i], modulus); params->dsieve = root_params->d2; /* needed in pp1_rootsG */ /* We want to skip values where gcd((i0 + i) * d1, d2) != 1. We can test for gcd(i0 + i, d2) instead and let pp1_rootsG() advance params->rsieve in steps of 1 */ /* params->rsieve = i0 % d2 */ params->rsieve = mpz_fdiv_ui (root_params->i0, root_params->d2); outputf (OUTPUT_DEVVERBOSE, "pp1_rootsG_init: i0 = %Zd, state: " "dsieve = %d, rsieve = %d, S = %d\n", root_params->i0, params->dsieve, params->rsieve, params->S); mpz_set_ui (t, root_params->d1); pp1_mul (state->tmp[1], *x, t, modulus, state->tmp[3], P); pp1_mul (state->tmp[0], state->tmp[1], root_params->i0, modulus, state->tmp[3], P); mpz_sub_ui (t, root_params->i0, 1); mpz_abs (t, t); pp1_mul (state->tmp[2], state->tmp[1], t, modulus, state->tmp[3], P); /* for P+1, tmp[0] = V_s(P), tmp[1] = V_d1(P), tmp[2] = V_{|s-d1|}(P) */ mpres_clear (P, modulus); mpz_clear (t); } else { listz_t coeffs; params->dickson_a = (root_params->S < 0) ? -1 : 0; params->nr = (root_params->d2 > 1) ? root_params->d2 - 1 : 1; params->size_fd = params->nr * (params->S + 1); params->next = 0; params->dsieve = 1; params->rsieve = 1; state->fd = (point *) malloc (params->size_fd * sizeof (point)); if (state->fd == NULL) { free (state); return NULL; } coeffs = init_progression_coeffs (root_params->i0, root_params->d2, root_params->d1, 1, 1, params->S, params->dickson_a); if (coeffs == NULL) { free (state->fd); free (state); return NULL; } for (i = 0; i < params->size_fd; i++) { mpres_init (state->fd[i].x, modulus); mpres_init (state->fd[i].y, modulus); /* The S-th coeff of all progressions is identical */ if (i > params->S && i % (params->S + 1) == params->S) { /* Simply copy from the first progression */ mpres_set (state->fd[i].x, state->fd[params->S].x, modulus); mpres_set (state->fd[i].y, state->fd[params->S].y, modulus); } else pp1_mul2 (state->fd[i].x, state->fd[i].y, x[0], coeffs[i], modulus); } clear_list (coeffs, params->size_fd); } return state; } void pp1_rootsG_clear (pp1_roots_state_t *state, ATTRIBUTE_UNUSED mpmod_t modulus) { unsigned long i; if (state->params.S == 1) { for (i = 0; i < 4; i++) mpres_clear (state->tmp[i], modulus); } else { for (i = 0; i < state->params.size_fd; i++) { mpres_clear (state->fd[i].x, modulus); mpres_clear (state->fd[i].y, modulus); } free (state->fd); } free (state); } int pp1_rootsG (listz_t G, unsigned long dF, pp1_roots_state_t *state, mpmod_t modulus, mpres_t *x) { unsigned long i; unsigned long muls = 0; long st; progression_params_t *params = &(state->params); /* for less typing */ st = cputime (); /* params->S is positive: we don't need the sign anymore, since the polynomial is defined by the table of differences */ if (params->S == 1) { for (i = 0; i < dF;) { if (gcd (params->rsieve, params->dsieve) == 1) { outputf (OUTPUT_TRACE, "pp1_rootsG: Taking root G[%d], rsieve = %d\n", i, params->rsieve); mpres_get_z (G[i++], state->tmp[0], modulus); } else { outputf (OUTPUT_TRACE, "pp1_rootsG: NOT taking root, rsieve = %d, gcd = %d\n", params->rsieve, gcd (params->rsieve, params->dsieve)); } mpres_swap (state->tmp[0], state->tmp[2], modulus); mpres_mul (state->tmp[3], state->tmp[2], state->tmp[1], modulus); mpres_sub (state->tmp[0], state->tmp[3], state->tmp[0], modulus); params->rsieve++; } } else { mpres_t u, v; mpres_init (u, modulus); mpres_init (v, modulus); for (i = 0; i < dF;) { /* Did we use every progression since the last update? */ if (params->next == params->nr) { /* Yes, time to update again */ addWnm (state->fd, x[0], modulus, params->nr, params->S, &muls); params->next = 0; } /* Is this a root we should skip? (Take only if gcd == 1) */ if (gcd (params->rsieve, params->dsieve) == 1) { mpres_mul (u, state->fd[params->next * (params->S + 1)].x, x[0], modulus); mpres_add (v, state->fd[params->next * (params->S + 1)].y, state->fd[params->next * (params->S + 1)].y, modulus); mpres_add (u, u, v, modulus); mpres_get_z (G[i++], u, modulus); } params->next ++; params->rsieve ++; } mpres_clear (u, modulus); mpres_clear (v, modulus); } outputf (OUTPUT_VERBOSE, "Computing roots of G took %ldms", elltime (st, cputime ())); outputf (OUTPUT_DEVVERBOSE, ", %lu muls", dF); outputf (OUTPUT_VERBOSE, "\n"); return ECM_NO_FACTOR_FOUND; } /****************************************************************************** * * * Williams P+1 * * * ******************************************************************************/ /* Input: p is the initial generator (sigma), if 0 generate it at random. n is the number to factor B1 is the stage 1 bound B2 is the stage 2 bound k is the number of blocks for stage 2 verbose is the verbosity level Output: p is the factor found Return value: non-zero iff a factor is found (1 for stage 1, 2 for stage 2) */ int pp1 (mpz_t f, mpz_t p, mpz_t n, mpz_t go, double *B1done, double B1, mpz_t B2min_parm, mpz_t B2_parm, double B2scale, unsigned long k, const int S, int verbose, int repr, int use_ntt, FILE *os, FILE *es, char *chkfilename, char *TreeFilename, double maxmem, gmp_randstate_t rng, int (*stop_asap)(void)) { int youpi = ECM_NO_FACTOR_FOUND; int po2 = 0; /* Whether we should use power-of-2 poly degree */ long st; mpres_t a; mpmod_t modulus; mpz_t B2min, B2; /* Local B2, B2min to avoid changing caller's values */ unsigned long dF; root_params_t root_params; faststage2_param_t faststage2_params; const int stage2_variant = (S == 1 || S == ECM_DEFAULT_S); int twopass = 0; set_verbose (verbose); ECM_STDOUT = (os == NULL) ? stdout : os; ECM_STDERR = (es == NULL) ? stdout : es; /* if n is even, return 2 */ if (mpz_divisible_2exp_p (n, 1)) { mpz_set_ui (f, 2); return ECM_FACTOR_FOUND_STEP1; } st = cputime (); if (mpz_cmp_ui (p, 0) == 0) pm1_random_seed (p, n, rng); mpz_init_set (B2min, B2min_parm); mpz_init_set (B2, B2_parm); /* Set default B2. See ecm.c for comments */ if (ECM_IS_DEFAULT_B2(B2)) { if (stage2_variant == 0) mpz_set_d (B2, B2scale * pow (B1 * PP1_COST, DEFAULT_B2_EXPONENT)); else mpz_set_d (B2, B2scale * pow (B1 * PP1FS2_COST, PM1FS2_DEFAULT_B2_EXPONENT)); } /* set B2min */ if (mpz_sgn (B2min) < 0) mpz_set_d (B2min, B1); mpmod_init (modulus, n, repr); if (use_ntt) po2 = 1; if (stage2_variant != 0) { long P; const unsigned long lmax = 1UL<<28; /* An upper bound */ unsigned long lmax_NTT, lmax_noNTT; mpz_init (faststage2_params.m_1); faststage2_params.l = 0; /* Find out what the longest transform length is we can do at all. If no maxmem is given, the non-NTT can theoretically do any length. */ lmax_NTT = 0; if (use_ntt) { unsigned long t, t2 = 0; /* See what transform length that the NTT can handle (due to limited primes and limited memory) */ t = mpzspm_max_len (n); lmax_NTT = MIN (lmax, t); if (maxmem != 0.) { t = pp1fs2_maxlen (double_to_size (maxmem), n, use_ntt, 0); t = MIN (t, lmax_NTT); /* Maybe the two pass variant lets us use a longer transform */ t2 = pp1fs2_maxlen (double_to_size (maxmem), n, use_ntt, 1); t2 = MIN (t2, lmax_NTT); if (t2 > t) { t = t2; twopass = 1; } lmax_NTT = t; } outputf (OUTPUT_DEVVERBOSE, "NTT can handle lmax <= %lu\n", lmax_NTT); } /* See what transform length that the non-NTT code can handle */ lmax_noNTT = lmax; if (maxmem != 0.) { unsigned long t; t = pp1fs2_maxlen (double_to_size (maxmem), n, 0, 0); lmax_noNTT = MIN (lmax_noNTT, t); outputf (OUTPUT_DEVVERBOSE, "non-NTT can handle lmax <= %lu\n", lmax_noNTT); } P = choose_P (B2min, B2, MAX(lmax_noNTT, lmax_NTT), k, &faststage2_params, B2min, B2, use_ntt, ECM_PP1); if (P == ECM_ERROR) { outputf (OUTPUT_ERROR, "Error: cannot choose suitable P value for your stage 2 " "parameters.\nTry a shorter B2min,B2 interval.\n"); mpz_clear (faststage2_params.m_1); return ECM_ERROR; } /* See if the selected parameters let us use NTT or not */ if (faststage2_params.l > lmax_NTT) use_ntt = 0; if (maxmem != 0.) { unsigned long MB; char *s; if (!use_ntt) s = "out"; else if (twopass) s = " two pass"; else s = " one pass"; MB = pp1fs2_memory_use (faststage2_params.l, n, use_ntt, twopass) / 1048576; outputf (OUTPUT_VERBOSE, "Using lmax = %lu with%s NTT which takes " "about %luMB of memory\n", faststage2_params.l, s, MB); } } else { mpz_init (root_params.i0); root_params.d2 = 0; /* Enable automatic choice of d2 */ if (bestD (&root_params, &k, &dF, B2min, B2, po2, use_ntt, maxmem, (TreeFilename != NULL), modulus) == ECM_ERROR) { youpi = ECM_ERROR; goto clear_and_exit; } /* Set default degree for Brent-Suyama extension */ root_params.S = S; if (root_params.S == ECM_DEFAULT_S) { if (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat > 0) { /* For Fermat numbers, default is 1 (no Brent-Suyama) */ root_params.S = 1; } else { mpz_t t; mpz_init (t); mpz_sub (t, B2, B2min); root_params.S = choose_S (t); mpz_clear (t); } } } /* Print B1, B2, polynomial and x0 */ print_B1_B2_poly (OUTPUT_NORMAL, ECM_PP1, B1, *B1done, B2min_parm, B2min, B2, (stage2_variant == 0) ? root_params.S : 1, p, 0, NULL); /* If we do a stage 2, print its parameters */ if (mpz_cmp (B2, B2min) >= 0) { if (stage2_variant != 0) outputf (OUTPUT_VERBOSE, "P = %lu, l = %lu, s_1 = %lu, k = s_2 = %lu, " "m_1 = %Zd\n", faststage2_params.P, faststage2_params.l, faststage2_params.s_1,faststage2_params.s_2, faststage2_params.m_1); else outputf (OUTPUT_VERBOSE, "dF=%lu, k=%lu, d=%lu, d2=%lu, i0=%Zd\n", dF, k, root_params.d1, root_params.d2, S == 1 ? faststage2_params.m_1 : root_params.i0); } mpres_init (a, modulus); mpres_set_z (a, p, modulus); /* since pp1_mul_prac takes an ecm_uint, we have to check that B1 <= ECM_UINT_MAX */ if (B1 > (double) ECM_UINT_MAX) { outputf (OUTPUT_ERROR, "Error, maximal step1 bound for P+1 is %lu\n", ECM_UINT_MAX); youpi = ECM_ERROR; goto clear_and_exit; } if (B1 > *B1done) youpi = pp1_stage1 (f, a, modulus, B1, B1done, go, stop_asap, chkfilename); outputf (OUTPUT_NORMAL, "Step 1 took %ldms\n", elltime (st, cputime ())); if (test_verbose (OUTPUT_RESVERBOSE)) { mpz_t t; mpz_init (t); mpres_get_z (t, a, modulus); outputf (OUTPUT_RESVERBOSE, "x=%Zd\n", t); mpz_clear (t); } mpres_get_z (p, a, modulus); if (stop_asap != NULL && (*stop_asap) ()) goto clear_and_exit; if (youpi == ECM_NO_FACTOR_FOUND && mpz_cmp (B2, B2min) >= 0) { if (stage2_variant != 0) { if (use_ntt) youpi = pp1fs2_ntt (f, a, modulus, &faststage2_params, twopass); else youpi = pp1fs2 (f, a, modulus, &faststage2_params); } else youpi = stage2 (f, &a, modulus, dF, k, &root_params, ECM_PP1, use_ntt, TreeFilename, stop_asap); } if (youpi > 0 && test_verbose (OUTPUT_NORMAL)) pp1_check_factor (p, f); /* tell user if factor was found by P-1 */ clear_and_exit: mpres_clear (a, modulus); mpmod_clear (modulus); if (stage2_variant != 0) mpz_clear (faststage2_params.m_1); else mpz_clear (root_params.i0); mpz_clear (B2); mpz_clear (B2min); return youpi; } ecm-6.4.4/README.lib0000644023561000001540000001066512106741273010653 00000000000000This is the README file for the ecm library. To use the library, you need to add the following line in your source file: #include "ecm.h" and link with -lecm. The public interface is defined in the "ecm.h" file. It contains the following functions: int ecm_factor (mpz_t f, mpz_t n, double B1, ecm_params p) where n is the number to factor, f is the factor found (if any), B1 is the stage 1 bound, and p contains auxiliary parameters (see below). When p is NULL, default values for those parameters are chosen. The ecm_factor() function returns: * a positive value if a factor was found (1 for step 1, 2 for step 2), * zero when no factor was found, * a negative value when an error occurred. void ecm_init (ecm_params p) Initialize the parameters to default values. void ecm_clear (ecm_params p) Clear the parameters. Detailed description of parameters (ecm_params): * p->method is the factorization method (ECM_ECM for ECM, ECM_PM1 for P-1, ECM_PP1 for P+1). Default is ECM_ECM. * p->x (if non zero) is the starting point (ECM, P-1, P+1). For ECM, we take as starting point (x0 : y0) where x0=x, y0=1; for P-1, we take x0; for P+1, we take x0 as starting point of the Lucas sequence. When ecm_factor() returns, p->x is the point obtained after stage 1. * p->sigma (ECM only) is the "sigma" parameter. The elliptic curve chosen is b*y^2 = x^3 + a*x^2 + x where a = (v-u)^3*(3*u+v)/(4*u^3*v)-2, u = sigma^2-5, v = 4*sigma (Suyama's parametrization). The initial point (if p->x is zero) is taken as x0=u^3/v^3, y0=1 (thus b is taken as x0^3 + a*x0^2 + x0). * p->sigma_is_A (ECM only) indicates that p->sigma is the 'a' parameter from the elliptic curve. * p->go is the initial group order to preload (default is 1). * p->B1done tells that step 1 was already done up to B1done. This means that all prime powers <= B1done were dealt with. If for example B1done=100 and B1=200, prime 2 was dealt with up to power 6, thus it remains to "multiply" once by 2 to go up to power 7. Of course, all primes p such that B1done < p <= B1 will be considered with power 1. * p->B2min is the lower bound for stage 2, which will treat all primes p such that B2min <= p <= B2. If negative, B2min will be set to B1. * p->B2 is the upper bound for stage 2 (default is automatically computed from B1, to optimize the efficiency of the method). * p->k is the number of blocks used in stage 2 (default is ECM_DEFAULT_K). * p->S defines the polynomial used for Brent-Suyama's extension in stage 2. If positive, the polynomial used is x^S; if negative, it is Dickson's polynomial of degree S with parameter a=-1, where D_{1,a}(x) = x, D_{2,a}(x) = x^2-2*a, and D_{k+2,a}(x) = x*D_{k+1,a}(x) - a*D_{k,a}(x), or equivalently D_{k,a}(2*sqrt(a)*cos(t)) = 2*a^(k/2)*cos(k*t). If zero, choice is automatic (and should be close to optimal). Default is ECM_DEFAULT_S. * p->repr defines the representation used for modular arithmetic: 1 means the 'mpz' class from GMP, 2 means 'modmuln' (Montgomery's multiplication, quadratic implementation), 3 means 'redc' (Montgomery's multiplication, subquadratic implementation), -1 indicates not to use a special base-2 representation (when the input number is a factor of 2^n +/- 1). Other values (including 0) mean the representation will be chosen automatically (hopefully in some optimal way). * p->verbose is the verbosity level: 0 for no output, 1 for normal output (like default for GMP-ECM), 2 for diagnostic output without inter- mediate residues (like -v in GMP-ECM), 3 for diagnostic output with residues (like -v -v), 4 for high diagnostic output (-v -v -v), and 5 for trace output (-v -v -v -v). * p->os is the output stream used for verbose output. Default is stdout. * p->es is the output stream used for errors. Default is stderr. * p->TreeFilename if non NULL, is the file name to store the product tree of F (option -treefile f). * p->maxmem is the maximum amount of memory in bytes that should be used in stage 2. Setting this value too low (< 10MB, say) will cause stage 2 to perform very poorly, or return with an error code. * p->stage1time is the time already spent in stage 1 (useful to get a correct estimation of the expected time to find factors). * p->rng is a random number generator state. * p->use_ntt if equal to 1, use NTT in stage 2. * p->(*stop_asap) pointer to function: if the function returns zero, continue normally, otherwise exit as soon as possible. May be NULL. ecm-6.4.4/getprime.c0000644023561000001540000002171312106741273011202 00000000000000/* Dynamic Eratosthenes sieve. Copyright 2001, 2002, 2003, 2005, 2006, 2007, 2008, 2009, 2012 Paul Zimmermann, Alexander Kruppa, Dave Newman. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #ifdef OUTSIDE_LIBECM # include "ecm-ecm.h" #else # include "ecm-impl.h" #endif /* This function returns successive odd primes, starting with 3. To perform a loop over all primes <= B1, do the following (compile this file with -DMAIN to count primes): for (p = 2.0; p <= B1; p = getprime ()) { ... } It is slightly less efficient (1.5 to 2 times) than Dan Bernstein's primegen library (http://cr.yp.to/primegen.html), however it is fast enough for our usage here. */ /* We allow primes up to 2^53. This means len, the primes in primes[] etc. will stay well below 2^32. */ static double offset = 0.0; /* offset for current primes, must be 0 or odd */ static int current = -1; /* index of previous prime */ static unsigned int *primes = NULL; /* table of small primes up to sqrt(p) */ static unsigned int nprimes = 0; /* length of primes[] */ static unsigned char *sieve = NULL; /* sieving table */ static int len = 0; /* length of sieving table, WITHOUT sentinel */ static unsigned int *moduli = NULL; /* offset for small primes, moduli[i] = offset mod primes[i] */ /* sieve[i] == 1 if offset+2*i is a prime, otherwise sieve[i] == 0. sieve has len + 1 bytes allocated, the last byte is always 1 (a sentinel). This allows us avoid testing for the array end in the loop that looks for the next prime in sieve[]. */ /* The last prime returned by getprime is offset + 2*current */ /* primes[] contains small primes to needed to sieve out composites in sieve, i.e. all primes <= sqrt(offset + 2 * (len - 1)). moduli[i] contains the smallest k so that offset+2*(len+k) is divisible by primes[i], i.e. after advancing the sieve array by len, sieve[moduli[i]] is divisible by primes[i]. */ void getprime_clear () { offset = 0.0; current = -1; free (primes); primes = NULL; nprimes = 0; free (sieve); sieve = NULL; len = 0; free (moduli); moduli = NULL; } /* For p > 1, return 1 if p is prime and 0 if p is not prime. Requires that all primes <= sqrt(p) are in *primes */ static int isprime_ui (unsigned int p, unsigned int *primes) { int i; for (i = 0; primes[i] * primes[i] <= p; i++) if (p % primes[i] == 0) return 0; return 1; } double getprime () { /* the following complex block is equivalent to: while ((++current < len) && (sieve[current] == 0)); but is faster. */ if (len > 0L) { unsigned char *ptr = sieve + current; while (*(++ptr) == 0); current = ptr - sieve; } else current = len; if (current < len) /* most calls will end here */ return offset + 2.0 * (double) current; /* otherwise we have to advance the sieve */ offset += 2.0 * (double) len; /* first enlarge sieving table if too small */ if ((double) len * (double) len < offset && len > 0) { free (sieve); len *= 2; sieve = (unsigned char *) malloc ((len + 1) * sizeof (unsigned char)); /* assume this "small" malloc will not fail in normal usage */ if (sieve == NULL) { fprintf (stderr, "Cannot allocate memory in getprime\n"); exit (1); } } /* now enlarge small prime table if too small */ if ((nprimes == 0) || (primes[nprimes-1] < sqrt(offset + 2*len))) { if (nprimes == 0) /* initialization */ { nprimes = 1; primes = (unsigned int *) malloc (nprimes * sizeof(unsigned int)); /* assume this "small" malloc will not fail in normal usage */ ASSERT(primes != NULL); moduli = (unsigned int *) malloc (nprimes * sizeof(unsigned int)); /* assume this "small" malloc will not fail in normal usage */ ASSERT(moduli != NULL); len = 1; sieve = (unsigned char *) malloc((len + 1) * sizeof(unsigned char)); /* len=1 here */ /* assume this "small" malloc will not fail in normal usage */ ASSERT(sieve != NULL); offset = 5.0; sieve[0] = 1; /* corresponding to 5 */ sieve[1] = 1; /* place the sentinel */ primes[0] = 3; moduli[0] = 1; /* After we advance sieve[], sieve[0] will correspond to 7 and sieve[1] to 9, which is the smallest odd multiple of 3 */ current = -1; return 3.0; } else { /* extend the existing table of small primes */ unsigned int i, j; i = nprimes; nprimes *= 2; primes = (unsigned int *) realloc (primes, nprimes * sizeof(unsigned int)); moduli = (unsigned int *) realloc (moduli, nprimes * sizeof(unsigned int)); /* assume those "small" realloc's will not fail in normal usage */ ASSERT_ALWAYS(primes != NULL && moduli != NULL); for (; i < nprimes; i++) { unsigned int p; /* find next (odd) prime */ for (p = primes[i - 1] + 2; !isprime_ui (p, primes); p += 2); primes[i] = p; /* moduli[i] is the smallest m such that offset + 2*m = k*p */ j = (unsigned long) fmod (offset, (double) p); j = (j == 0) ? j : p - j; /* -offset mod p */ if ((j % 2) != 0) j += p; /* ensure j is even */ moduli[i] = j / 2; } } } /* now sieve for new primes */ { int i, p; unsigned int j; /* Set sieve (including sentinel at the end) to 1 */ for (i = 0; i < len + 1; i++) sieve[i] = 1; for (j = 0; j < nprimes; j++) { p = primes[j]; for (i = moduli[j]; i < len; i += p) sieve[i] = 0; moduli[j] = i - len; /* for next sieving array */ } } current = -1; while (sieve[++current] == 0); ASSERT(current < len); /* otherwise we found a prime gap >= sqrt(x) around x */ return offset + 2.0 * (double) current; } /* Skips forward or backward in the sieve so that the next call to getprime returns the smallest prime >= pp */ void getprime_seek (double pp) { int i, p; unsigned int j; if (pp <= 3.) { getprime_clear (); return; } offset = floor (pp / 2.) * 2. + 1.; /* make sure offset is odd */ /* Choose a large enough sieve array length */ for (i = 2; (double) i * (double) i < offset; i *= 2); /* Now allocate sieving table */ if (len > 0) free (sieve); len = i; sieve = (unsigned char *) malloc ((len + 1) * sizeof (unsigned char)); /* assume this "small" malloc will not fail in normal usage */ ASSERT_ALWAYS(sieve != NULL); j = 1; /* Find out how many small odd primes we'll need */ for (p = 5; (double)p*(double)p <= offset + (double)(2*len); p += 2) { for (i = 3; i*i <= p && p % i != 0; i += 2); if (i*i <= p) continue; if ((double)p*(double)p < offset + (double)len) j++; } /* Allocate memory for small primes */ if (nprimes != 0) { free (primes); free (moduli); } nprimes = j; primes = (unsigned int *) malloc (nprimes * sizeof(unsigned int)); moduli = (unsigned int *) malloc (nprimes * sizeof(unsigned int)); ASSERT_ALWAYS(primes != NULL && moduli != NULL); /* Fill small primes and moduli arrays */ for (p = 3, j = 0; j < nprimes; p += 2) { for (i = 3; i*i <= p && p % i != 0; i += 2); if (i*i <= p) continue; primes[j] = p; i = (unsigned int) fmod (offset, (double)p); i = (i == 0) ? i : p - i; /* -offset mod p */ if (i % 2 != 0) i += p; /* ensure i is even */ moduli[j] = i / 2; j++; } /* now sieve for new primes */ for (i = 0; i < len + 1; i++) sieve[i] = 1; for (j = 0; j < nprimes; j++) { p = primes[j]; for (i = moduli[j]; i < len; i += p) sieve[i] = 0; moduli[j] = i - len; /* for next sieving array */ } current = -1; } #ifdef MAIN int main (int argc, char *argv[]) { double p, B1, B2; unsigned long pi = 0; if (argc != 3) { fprintf (stderr, "Usage: getprime \n"); exit (EXIT_FAILURE); } B1 = atof (argv[1]); B2 = atof (argv[2]); if (B1 > 0.) getprime_seek (B1); p = 0; if (B1 <= 2) { printf("2\n"); pi++; } for (p = getprime (); p <= B2; p = getprime (), pi++) printf("%1.0f\n", p); /* printf ("pi(%1.0f) - pi(%1.0f - 1) = %lu\n", B2, B1, pi); */ getprime_clear (); return 0; } #endif ecm-6.4.4/bench_mulredc.c0000644023561000001540000004306012106741273012157 00000000000000#include "config.h" #include #include #include /* for LONG_MAX */ #include #include #include #if TIME_WITH_SYS_TIME # include # include #else # if HAVE_SYS_TIME_H # include # else # include # endif #endif #define LOOPCOUNT 10000000UL #define MAXSIZE 20 int tune_mul[MAXSIZE+1], tune_sqr[MAXSIZE+1]; #include #ifdef USE_ASM_REDC #include "mulredc.h" #endif #include "mpmod.h" #ifdef HAVE___GMPN_REDC_1 #ifndef __gmpn_redc_1 void __gmpn_redc_1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); #endif #endif #ifdef HAVE___GMPN_REDC_2 #ifndef __gmpn_redc_2 void __gmpn_redc_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); #endif #endif #ifdef HAVE___GMPN_REDC_N #ifndef __gmpn_redc_N void __gmpn_redc_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); #endif #endif /* cputime () gives the elapsed time in milliseconds */ #if defined (_WIN32) /* First case - GetProcessTimes () is the only known way of getting process * time (as opposed to calendar time) under mingw32 */ #include long cputime () { FILETIME lpCreationTime, lpExitTime, lpKernelTime, lpUserTime; ULARGE_INTEGER n; HANDLE hProcess = GetCurrentProcess(); GetProcessTimes (hProcess, &lpCreationTime, &lpExitTime, &lpKernelTime, &lpUserTime); /* copy FILETIME to a ULARGE_INTEGER as recommended by MSDN docs */ n.u.LowPart = lpUserTime.dwLowDateTime; n.u.HighPart = lpUserTime.dwHighDateTime; /* lpUserTime is in units of 100 ns. Return time in milliseconds */ return (long) (n.QuadPart / 10000); } #elif defined (HAVE_GETRUSAGE) /* Next case: getrusage () has higher resolution than clock () and so is preferred. */ #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_RESOURCE_H # include #endif long cputime () { struct rusage rus; getrusage (RUSAGE_SELF, &rus); /* This overflows a 32 bit signed int after 2147483s = 24.85 days */ return rus.ru_utime.tv_sec * 1000L + rus.ru_utime.tv_usec / 1000L; } #else /* Resort to clock (), which on some systems may return calendar time. */ long cputime () { /* Return time in milliseconds */ return (long) (clock () * (1000. / (double) CLOCKS_PER_SEC)); } #endif /* defining cputime () */ void mp_print(mp_limb_t *x, int N) { int i; for (i = 0; i < N-1; ++i) gmp_printf("%Nd + W*(", x + i, 1); gmp_printf("%Nd", x + (N-1), 1); for (i = 0; i < N-1; ++i) printf(")"); printf("\n"); } static void ecm_redc_1_svoboda (mp_ptr rp, mp_ptr tmp, mp_srcptr np, mp_size_t nn, mp_limb_t invm, mp_srcptr sp) { mp_size_t j; mp_limb_t t0, cy; /* instead of adding {np, nn} * (invm * tmp[0] mod B), we add {sp, nn} * tmp[0], where {np, nn} * invm = B * {sp, nn} - 1 */ for (j = 0; j < nn - 1; j++, tmp++) rp[j + 1] = mpn_addmul_1 (tmp + 1, sp, nn, tmp[0]); /* for the last step, we reduce with {np, nn} */ t0 = mpn_addmul_1 (tmp, np, nn, tmp[0] * invm); tmp ++; rp[0] = tmp[0]; cy = mpn_add_n (rp + 1, rp + 1, tmp + 1, nn - 1); rp[nn-1] += t0; cy += rp[nn-1] < t0; if (cy != 0) mpn_sub_n (rp, rp, np, nn); /* a borrow should always occur here */ } void bench(mp_size_t N) { mp_limb_t *x, *y, *z, *m, *invm, *tmp, *svoboda1; unsigned long i; unsigned long iter; long tmul, tsqr, tredc_1, t_mulredc_1, tsvoboda1 = 0, t_sqrredc_1; long tmul_best = LONG_MAX, tsqr_best = LONG_MAX, tredc_best = LONG_MAX; mpz_t M, B; #ifdef USE_ASM_REDC long t2; #endif #ifdef HAVE_NATIVE_MULREDC1_N long t3 = 0; #endif #ifdef HAVE___GMPN_REDC_2 long tredc_2, t_mulredc_2, t_sqrredc_2; #endif #ifdef HAVE___GMPN_REDC_N long tredc_n, t_mulredc_n, t_sqrredc_n; #endif x = (mp_limb_t *) malloc(N*sizeof(mp_limb_t)); y = (mp_limb_t *) malloc(N*sizeof(mp_limb_t)); z = (mp_limb_t *) malloc((2*N)*sizeof(mp_limb_t)); m = (mp_limb_t *) malloc(N*sizeof(mp_limb_t)); tmp = (mp_limb_t *) malloc((2*N+2)*sizeof(mp_limb_t)); invm = (mp_limb_t *) malloc(N*sizeof(mp_limb_t)); svoboda1 = (mp_limb_t *) malloc(N*sizeof(mp_limb_t)); mpn_random(m, N); m[0] |= 1UL; if (m[N-1] == 0) m[N-1] = 1UL; mpz_init (M); mpz_init (B); mpz_set_ui (M, m[1]); mpz_mul_2exp (M, M, GMP_NUMB_BITS); mpz_add_ui (M, M, m[0]); mpz_set_ui (B, 1); mpz_mul_2exp (B, B, 2 * GMP_NUMB_BITS); mpz_invert (M, M, B); mpz_sub (M, B, M); for (i = 0; i < (unsigned) N; i++) invm[i] = mpz_getlimbn(M, i); tmp[N] = mpn_mul_1 (tmp, m, N, invm[0]); /* {tmp,N+1} should be = -1 mod B */ mpn_add_1 (tmp, tmp, N + 1, 1); /* now = 0 mod B */ mpn_copyi (svoboda1, tmp + 1, N); mpz_clear (M); mpz_clear (B); mpn_random(x, N); mpn_random(y, N); /* we set 'iter' to get about 100ms for each test */ tmul = cputime(); i = 0; iter = 1; do { iter = 2 * iter; for (; i < iter; i++) mpn_mul_n (tmp, x, y, N); } while (cputime() - tmul < 100); iter = (long) (((double) iter * 100.0) / (double) (cputime() - tmul)); tmul = cputime(); for (i = 0; i < iter; ++i) mpn_mul_n(tmp, x, y, N); tmul = cputime()-tmul; tsqr = cputime(); for (i = 0; i < iter; ++i) mpn_sqr (tmp, x, N); tsqr = cputime()-tsqr; #ifdef HAVE___GMPN_REDC_1 mpn_mul_n(tmp, x, y, N); tredc_1 = cputime(); for (i = 0; i < iter; ++i) __gmpn_redc_1 (z, tmp, m, N, invm[0]); tredc_1 = cputime()-tredc_1; if (tredc_1 < tredc_best) tredc_best = tredc_1; #endif if (N > 1) /* Svoboda only works for N > 1 */ { mpn_mul_n(tmp, x, y, N); tsvoboda1 = cputime(); for (i = 0; i < iter; ++i) ecm_redc_1_svoboda (z, tmp, m, N, invm[0], svoboda1); tsvoboda1 = cputime()-tsvoboda1; if (tsvoboda1 < tredc_best) tredc_best = tsvoboda1; } #ifdef HAVE___GMPN_REDC_2 mpn_mul_n(tmp, x, y, N); tredc_2 = cputime(); for (i = 0; i < iter; ++i) __gmpn_redc_2 (z, tmp, m, N, invm); tredc_2 = cputime()-tredc_2; if (tredc_2 < tredc_best) tredc_best = tredc_2; #endif #ifdef HAVE___GMPN_REDC_N mpn_mul_n(tmp, x, y, N); tredc_n = cputime(); for (i = 0; i < iter; ++i) __gmpn_redc_n (z, tmp, m, N, invm); tredc_n = cputime()-tredc_n; if (tredc_n < tredc_best) tredc_best = tredc_n; #endif #ifdef USE_ASM_REDC /* Mixed mul and redc */ t2 = cputime(); switch (N) { case 1: for (i=0; i < iter; ++i) { mulredc1(z, x[0], y[0], m[0], invm[0]); x[0] += tmp[0]; } break; case 2: for (i=0; i < iter; ++i) { mulredc2(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 3: for (i=0; i < iter; ++i) { mulredc3(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 4: for (i=0; i < iter; ++i) { mulredc4(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 5: for (i=0; i < iter; ++i) { mulredc5(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 6: for (i=0; i < iter; ++i) { mulredc6(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 7: for (i=0; i < iter; ++i) { mulredc7(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 8: for (i=0; i < iter; ++i) { mulredc8(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 9: for (i=0; i < iter; ++i) { mulredc9(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 10: for (i=0; i < iter; ++i) { mulredc10(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 11: for (i=0; i < iter; ++i) { mulredc11(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 12: for (i=0; i < iter; ++i) { mulredc12(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 13: for (i=0; i < iter; ++i) { mulredc13(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 14: for (i=0; i < iter; ++i) { mulredc14(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 15: for (i=0; i < iter; ++i) { mulredc15(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 16: for (i=0; i < iter; ++i) { mulredc16(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 17: for (i=0; i < iter; ++i) { mulredc17(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 18: for (i=0; i < iter; ++i) { mulredc18(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 19: for (i=0; i < iter; ++i) { mulredc19(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; case 20: for (i=0; i < iter; ++i) { mulredc20(z, x, y, m, invm[0]); x[0] += tmp[0]; } break; default: for (i=0; i < iter; ++i) { mulredc20(z, x, y, m, invm[0]); x[0] += tmp[0]; } } t2 = cputime()-t2; if (t2 < tmul_best) { tmul_best = t2; tune_mul[N] = MPMOD_MULREDC; } if (t2 < tsqr_best) { tsqr_best = t2; tune_sqr[N] = MPMOD_MULREDC; } #endif /* Mul followed by mpn_redc_1 */ #ifdef HAVE___GMPN_REDC_1 t_mulredc_1 = cputime(); for (i = 0; i < iter; ++i) { mpn_mul_n(tmp, x, y, N); __gmpn_redc_1 (z, tmp, m, N, invm[0]); x[0] += tmp[0]; } t_mulredc_1 = cputime()-t_mulredc_1; if (t_mulredc_1 < tmul_best) { tune_mul[N] = MPMOD_MUL_REDC1; tmul_best = t_mulredc_1; } #endif /* Mul followed by mpn_redc_2 */ #ifdef HAVE___GMPN_REDC_2 t_mulredc_2 = cputime(); for (i = 0; i < iter; ++i) { mpn_mul_n(tmp, x, y, N); __gmpn_redc_2 (z, tmp, m, N, invm); x[0] += tmp[0]; } t_mulredc_2 = cputime()-t_mulredc_2; if (t_mulredc_2 < tmul_best) { tune_mul[N] = MPMOD_MUL_REDC2; tmul_best = t_mulredc_2; } #endif /* Mul followed by mpn_redc_n */ #ifdef HAVE___GMPN_REDC_N t_mulredc_n = cputime(); for (i = 0; i < iter; ++i) { mpn_mul_n (tmp, x, y, N); __gmpn_redc_n (z, tmp, m, N, invm); } t_mulredc_n = cputime()-t_mulredc_n; if (t_mulredc_n < tmul_best) { tune_mul[N] = MPMOD_MUL_REDCN; tmul_best = t_mulredc_n; } #endif /* Sqr followed by mpn_redc_1 */ #ifdef HAVE___GMPN_REDC_1 t_sqrredc_1 = cputime(); for (i = 0; i < iter; ++i) { mpn_sqr(tmp, x, N); __gmpn_redc_1 (z, tmp, m, N, invm[0]); x[0] += tmp[0]; } t_sqrredc_1 = cputime()-t_sqrredc_1; if (t_sqrredc_1 < tsqr_best) { tune_sqr[N] = MPMOD_MUL_REDC1; tsqr_best = t_sqrredc_1; } #endif /* Sqr followed by mpn_redc_2 */ #ifdef HAVE___GMPN_REDC_2 t_sqrredc_2 = cputime(); for (i = 0; i < iter; ++i) { mpn_sqr(tmp, x, N); __gmpn_redc_2 (z, tmp, m, N, invm); x[0] += tmp[0]; } t_sqrredc_2 = cputime()-t_sqrredc_2; if (t_sqrredc_2 < tsqr_best) { tune_sqr[N] = MPMOD_MUL_REDC2; tsqr_best = t_sqrredc_2; } #endif /* Sqr followed by mpn_redc_n */ #ifdef HAVE___GMPN_REDC_N t_sqrredc_n = cputime(); for (i = 0; i < iter; ++i) { mpn_sqr (tmp, x, N); __gmpn_redc_n (z, tmp, m, N, invm); } t_sqrredc_n = cputime()-t_sqrredc_n; if (t_sqrredc_n < tsqr_best) { tune_sqr[N] = MPMOD_MUL_REDCN; tsqr_best = t_sqrredc_n; } #endif #ifdef HAVE_NATIVE_MULREDC1_N /* mulredc1 */ t3 = cputime(); switch (N) { case 1: for (i=0; i 1) { fprintf (stderr, "svoboda1 = %.3f", (double) tsvoboda1 * 1e3 / (double) iter); if (tsvoboda1 == tredc_best) fprintf (stderr, " *"); fprintf (stderr, "\n"); } #ifdef HAVE___GMPN_REDC_2 fprintf (stderr, "mpn_redc_2 = %.3f", (double) tredc_2 * 1e3 / (double) iter); if (tredc_2 == tredc_best) fprintf (stderr, " *"); fprintf (stderr, "\n"); #endif #ifdef HAVE___GMPN_REDC_N fprintf (stderr, "mpn_redc_n = %.3f", (double) tredc_n * 1e3 / (double) iter); if (tredc_n == tredc_best) fprintf (stderr, " *"); fprintf (stderr, "\n"); #endif fprintf (stderr, "\n"); /* modular multiplication */ #ifdef USE_ASM_REDC fprintf (stderr, "mulredc = %.3f", (double) t2 * 1e3 / (double) iter); if (tmul_best == t2) fprintf (stderr, " *"); fprintf (stderr, "\n"); #endif #ifdef HAVE___GMPN_REDC_1 fprintf (stderr, "mul+redc_1 = %.3f", (double) t_mulredc_1 * 1e3 / (double) iter); if (tmul_best == t_mulredc_1) fprintf (stderr, " *"); fprintf (stderr, "\n"); #endif #ifdef HAVE___GMPN_REDC_2 fprintf (stderr, "mul+redc_2 = %.3f", (double) t_mulredc_2 * 1e3 / (double) iter); if (tmul_best == t_mulredc_2) fprintf (stderr, " *"); fprintf (stderr, "\n"); #endif #ifdef HAVE___GMPN_REDC_N fprintf (stderr, "mul+redc_n = %.3f", (double) t_mulredc_n * 1e3 / (double) iter); if (tmul_best == t_mulredc_n) fprintf (stderr, " *"); fprintf (stderr, "\n"); #endif fprintf (stderr, "\n"); /* modular squaring */ #ifdef USE_ASM_REDC fprintf (stderr, "mulredc = %.3f", (double) t2 * 1e3 / (double) iter); if (tsqr_best == t2) fprintf (stderr, " *"); fprintf (stderr, "\n"); #endif #ifdef HAVE___GMPN_REDC_1 fprintf (stderr, "sqr+redc_1 = %.3f", (double) t_sqrredc_1 * 1e3 / (double) iter); if (tsqr_best == t_sqrredc_1) fprintf (stderr, " *"); fprintf (stderr, "\n"); #endif #ifdef HAVE___GMPN_REDC_2 fprintf (stderr, "sqr+redc_2 = %.3f", (double) t_sqrredc_2 * 1e3 / (double) iter); if (tsqr_best == t_sqrredc_2) fprintf (stderr, " *"); fprintf (stderr, "\n"); #endif #ifdef HAVE___GMPN_REDC_N fprintf (stderr, "sqr+redc_n = %.3f", (double) t_sqrredc_n * 1e3 / (double) iter); if (tsqr_best == t_sqrredc_n) fprintf (stderr, " *"); fprintf (stderr, "\n"); #endif #ifdef HAVE_NATIVE_MULREDC1_N /* multiplication of n limbs by one limb */ fprintf (stderr, "mulredc1 = %.3f\n", (double) t3 * 1e3 / (double) LOOPCOUNT); #endif fflush (stderr); free (tmp); free (x); free (y); free (z); free (m); free (invm); free (svoboda1); } int main(int argc, char** argv) { int i; int minsize = 1, maxsize = MAXSIZE; if (argc > 1) minsize = atoi (argv[1]); if (argc > 2) maxsize = atoi (argv[2]); for (i = minsize; i <= maxsize; ++i) bench(i); printf ("/* 0:mulredc 1:mul+redc_1 2:mul+redc_2 3:mul+redc_n */\n"); printf ("#define TUNE_MULREDC_TABLE {0"); for (i = 1; i <= maxsize; i++) printf (",%d", tune_mul[i]); printf ("}\n"); printf ("/* 0:mulredc 1:sqr+redc_1 2:sqr+redc_2 3:sqr+redc_n */\n"); printf ("#define TUNE_SQRREDC_TABLE {0"); for (i = 1; i <= maxsize; i++) printf (",%d", tune_sqr[i]); printf ("}\n"); fflush (stdout); return 0; } ecm-6.4.4/ellparam_batch.c0000644023561000001540000001536012106741273012325 00000000000000/* ellparam_batch.c - Parametrization for batch mode 2 Copyright 2012 Cyril Bouvier. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "ecm-gmp.h" #include "ecm-impl.h" #if 0 /* this function is useful in debug mode to print residues */ static void mpres_print (mpres_t x, char* name, mpmod_t n) { mp_size_t m, xn; mpres_t t; mpres_init(t, n); mpz_set_ui(t, 1); mpres_mul (t, x, t, n); xn = SIZ(t); m = ABSIZ(t); MPN_NORMALIZE(PTR(t), m); SIZ(t) = xn >= 0 ? m : -m; gmp_printf ("%s=%Zd\n", name, t); SIZ(t) = xn; mpres_clear (t, n); } #endif static void dbl_param (mpres_t x, mpres_t y, mpres_t z, mpres_t t, mpres_t u, mpres_t v, mpmod_t n) { mpres_mul (z, y, z, n); /* Y1*Z1 */ mpres_mul_ui (z, z, 2, n); /* Z3 = 2*Y1*Z1 */ mpres_sqr (u, x, n); /* A = X1*X1 */ mpres_sqr (t, y, n); /* B = Y1*Y1 */ mpres_sqr (y, t, n); /* C = B^2 */ mpres_add (v, x, t, n); /* X1+B */ mpres_sqr (v, v, n); /* (X1+B)^2 */ mpres_sub (v, v, u, n); /* (X1+B)^2-A */ mpres_sub (v, v, y, n); /* (X1+B)^2-A-C */ mpres_mul_ui (v, v, 2, n); /* D = 2*((X1+B)^2-A-C) */ mpres_mul_ui (u, u, 3, n); /* E = 3*A */ mpres_sqr (t, u, n); /* F = E^2 */ mpres_mul_ui (x, v, 2, n); /* 2*D */ mpres_sub (x, t, x, n); /* X3 = F-2*D */ mpres_sub (v, v, x, n); /* D-X3 */ mpres_mul_ui (y, y, 8, n); /* 8*C */ mpres_mul (t, u, v, n); /* E*(D-X3) */ mpres_sub (y, t, y, n); /* Y3 = E*(D-X3)-8*C */ } /*Add sgn*P=(-3:sgn*3:1) to Q=(x:y:z) */ static void add_param (mpres_t x, mpres_t y, mpres_t z, int sgn, mpres_t t, mpres_t u, mpres_t v, mpres_t w, mpmod_t n) { mpres_sqr (t, z, n); /* Z1Z1 = Z1^2 */ mpres_mul_ui (u, t, 3, n); mpres_neg (u, u, n); /* U2 = X2*Z1Z1 with X2=-3 */ mpres_mul (v, z, t, n); /* Z1*Z1Z1 */ mpres_mul_ui (v, v, 3, n); /* S2 = Y2*Z1*Z1Z1 with Y2=sgn*3 */ if (sgn == -1) mpres_neg (v, v, n); /* S2 = Y2*Z1*Z1Z1 with Y2=sgn*3 */ mpres_sub (u, u, x, n); /* H = U2-X1 */ mpres_sqr (w, u, n); /* HH = H^2 */ mpres_add (z, z, u, n); /* Z1+H */ mpres_sqr (z, z, n); /* (Z1+H)^2 */ mpres_sub (z, z, t, n); /* (Z1+H)^2-Z1Z1 */ mpres_sub (z, z, w, n); /* Z3 = (Z1+H)^2-Z1Z1-HH */ mpres_mul_ui (t, w, 4, n); /* I = 4*HH */ mpres_mul (u, u, t, n); /* J = H*I */ mpres_sub (v, v, y, n); /* S2-Y1 */ mpres_mul_ui (v, v, 2, n); /* r = 2*(S2-Y1) */ mpres_mul (t, x, t, n); /* V = X1*I */ mpres_sqr (x, v, n); /* r^2 */ mpres_mul_ui (w, t, 2, n); /* 2*V */ mpres_sub (x, x, u, n); /* r^2-J */ mpres_sub (x, x, w, n); /* X3 = r^2-J-2*V */ mpres_sub (w, t, x, n); /* V-X3 */ mpres_mul (y, y, u, n); /* Y1*J */ mpres_mul_ui (y, y, 2, n); /* 2*Y1*J */ mpres_mul (w, v, w, n); /* r*(V-X3) */ mpres_sub (y, w, y, n); /* Y3=r*(V-X3)-2*Y1*J */ } static void addchain_param (mpres_t x, mpres_t y, mpres_t z, unsigned int s, mpres_t t, mpres_t u, mpres_t v, mpres_t w, mpmod_t n) { if (s == 1) { mpres_set_si (x, -3, n); mpres_set_ui (y, 3, n); mpres_set_ui (z, 1, n); } else if (s == 3) { addchain_param(x, y, z, s-1, t, u, v, w, n); add_param (x, y, z, +1, t, u, v, w, n); } else if (s % 2 == 0) { addchain_param(x, y, z, s/2, t, u, v, w, n); dbl_param (x, y, z, t, u, v, n); } else if (s % 4 == 1) { addchain_param(x, y, z, s-1, t, u, v, w, n); add_param (x, y, z, +1, t, u, v, w, n); } else /* (s % 4 == 3) and s != 3 */ { addchain_param(x, y, z, s+1, t, u, v, w, n); add_param (x, y, z, -1, t, u, v, w, n); } } /*Parametrization for BATCHMODE 2: generate curves with a point of order 3 and starting point (2:1) Compute k*P on y^2=x^3+36 with P=(-3,3); need k>1 x3 = (3*x+y+6)/(2*(y-3)) and A=-(3*x3^4+6*x3^2-1)/(4*x3^3)*/ int get_curve_from_ell_parametrization (mpz_t f, mpres_t A, mpz_t k, mpmod_t n) { mpres_t t, u, v, w, x, y, z; unsigned int s; MEMORY_TAG; mpres_init (t, n); MEMORY_TAG; mpres_init (u, n); MEMORY_TAG; mpres_init (v, n); MEMORY_TAG; mpres_init (w, n); MEMORY_TAG; mpres_init (x, n); MEMORY_TAG; mpres_init (y, n); MEMORY_TAG; mpres_init (z, n); MEMORY_UNTAG; s = mpz_get_ui (k); addchain_param (x, y, z, s, t, u, v, w, n); /* Now (x:y:z) = k*P */ if (!mpres_invert(u, z, n)) { mpres_gcd (f, z, n); mpres_clear (t, n); mpres_clear (u, n); mpres_clear (v, n); mpres_clear (w, n); mpres_clear (x, n); mpres_clear (y, n); mpres_clear (z, n); return ECM_FACTOR_FOUND_STEP1; } mpres_sqr (v, u, n); mpres_mul (u, v, u, n); mpres_mul (x, x, v, n); mpres_mul (y, y, u, n); mpres_sub_ui (t, y, 3, n); mpres_mul_ui (t, t, 2, n); if (!mpres_invert(u, t, n)) { mpres_gcd (f, t, n); mpres_clear (t, n); mpres_clear (u, n); mpres_clear (v, n); mpres_clear (w, n); mpres_clear (x, n); mpres_clear (y, n); mpres_clear (z, n); return ECM_FACTOR_FOUND_STEP1; } mpres_mul_ui (w, x, 3, n); mpres_add (w, w, y, n); mpres_add_ui (w, w, 6, n); mpres_mul (x, w, u, n); /* Now x contains x_3 */ /* A=-(3*x3^4+6*x3^2-1)/(4*x3^3) */ mpres_sqr (u, x, n); mpres_mul (v, u, x, n); mpres_sqr (w, u, n); mpres_mul_ui (u, u, 6, n); mpres_neg (u, u, n); mpres_mul_ui (v, v, 4, n); mpres_mul_ui (w, w, 3, n); mpres_neg (w, w, n); if (!mpres_invert(t, v, n)) { mpres_gcd (f, v, n); mpres_clear (t, n); mpres_clear (u, n); mpres_clear (v, n); mpres_clear (w, n); mpres_clear (x, n); mpres_clear (y, n); mpres_clear (z, n); return ECM_FACTOR_FOUND_STEP1; } mpres_add (w, w, u, n); mpres_add_ui (w, w, 1, n); mpres_mul (A, w, t, n); mpz_mod (A, A, n->orig_modulus); mpres_clear (t, n); mpres_clear (u, n); mpres_clear (v, n); mpres_clear (w, n); mpres_clear (x, n); mpres_clear (y, n); mpres_clear (z, n); return ECM_NO_FACTOR_FOUND; } ecm-6.4.4/config.sub0000755023561000001540000010517612106744312011210 00000000000000#! /bin/sh # Configuration validation subroutine script. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, # 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, # 2011, 2012 Free Software Foundation, Inc. timestamp='2012-02-10' # This file is (in principle) common to ALL GNU software. # The presence of a machine in this file suggests that SOME GNU software # can handle that machine. It does not imply ALL GNU software can. # # This file is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # Please send patches to . Submit a context # diff and a properly formatted GNU ChangeLog entry. # # Configuration subroutine to validate and canonicalize a configuration type. # Supply the specified configuration type as an argument. # If it is invalid, we print an error message on stderr and exit with code 1. # Otherwise, we print the canonical config type on stdout and succeed. # You can get the latest version of this script from: # http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD # This file is supposed to be the same for all GNU packages # and recognize all the CPU types, system types and aliases # that are meaningful with *any* GNU software. # Each package is responsible for reporting which valid configurations # it does not support. The user should be able to distinguish # a failure to support a valid configuration from a meaningless # configuration. # The goal of this file is to map all the various variations of a given # machine specification into a single specification in the form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM # or in some cases, the newer four-part form: # CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM # It is wrong to echo any other type of specification. me=`echo "$0" | sed -e 's,.*/,,'` usage="\ Usage: $0 [OPTION] CPU-MFR-OPSYS $0 [OPTION] ALIAS Canonicalize a configuration name. Operation modes: -h, --help print this help, then exit -t, --time-stamp print date of last modification, then exit -v, --version print version number, then exit Report bugs and patches to ." version="\ GNU config.sub ($timestamp) Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." help=" Try \`$me --help' for more information." # Parse command line while test $# -gt 0 ; do case $1 in --time-stamp | --time* | -t ) echo "$timestamp" ; exit ;; --version | -v ) echo "$version" ; exit ;; --help | --h* | -h ) echo "$usage"; exit ;; -- ) # Stop option processing shift; break ;; - ) # Use stdin as input. break ;; -* ) echo "$me: invalid option $1$help" exit 1 ;; *local*) # First pass through any local machine types. echo $1 exit ;; * ) break ;; esac done case $# in 0) echo "$me: missing argument$help" >&2 exit 1;; 1) ;; *) echo "$me: too many arguments$help" >&2 exit 1;; esac # Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). # Here we must recognize all the valid KERNEL-OS combinations. maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` case $maybe_os in nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ knetbsd*-gnu* | netbsd*-gnu* | \ kopensolaris*-gnu* | \ storm-chaos* | os2-emx* | rtmk-nova*) os=-$maybe_os basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` ;; android-linux) os=-linux-android basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown ;; *) basic_machine=`echo $1 | sed 's/-[^-]*$//'` if [ $basic_machine != $1 ] then os=`echo $1 | sed 's/.*-/-/'` else os=; fi ;; esac ### Let's recognize common machines as not being operating systems so ### that things like config.sub decstation-3100 work. We also ### recognize some manufacturers as not being operating systems, so we ### can provide default operating systems below. case $os in -sun*os*) # Prevent following clause from handling this invalid input. ;; -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ -apple | -axis | -knuth | -cray | -microblaze) os= basic_machine=$1 ;; -bluegene*) os=-cnk ;; -sim | -cisco | -oki | -wec | -winbond) os= basic_machine=$1 ;; -scout) ;; -wrs) os=-vxworks basic_machine=$1 ;; -chorusos*) os=-chorusos basic_machine=$1 ;; -chorusrdb) os=-chorusrdb basic_machine=$1 ;; -hiux*) os=-hiuxwe2 ;; -sco6) os=-sco5v6 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco5) os=-sco3.2v5 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco4) os=-sco3.2v4 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco3.2.[4-9]*) os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco3.2v[4-9]*) # Don't forget version if it is 3.2v4 or newer. basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco5v6*) # Don't forget version if it is 3.2v4 or newer. basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco*) os=-sco3.2v2 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -udk*) basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -isc) os=-isc2.2 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -clix*) basic_machine=clipper-intergraph ;; -isc*) basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -lynx*) os=-lynxos ;; -ptx*) basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` ;; -windowsnt*) os=`echo $os | sed -e 's/windowsnt/winnt/'` ;; -psos*) os=-psos ;; -mint | -mint[0-9]*) basic_machine=m68k-atari os=-mint ;; esac # Decode aliases for certain CPU-COMPANY combinations. case $basic_machine in # Recognize the basic CPU types without company name. # Some are omitted here because they have special meanings below. 1750a | 580 \ | a29k \ | aarch64 | aarch64_be \ | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ | am33_2.0 \ | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ | be32 | be64 \ | bfin \ | c4x | clipper \ | d10v | d30v | dlx | dsp16xx \ | epiphany \ | fido | fr30 | frv \ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | hexagon \ | i370 | i860 | i960 | ia64 \ | ip2k | iq2000 \ | le32 | le64 \ | lm32 \ | m32c | m32r | m32rle | m68000 | m68k | m88k \ | maxq | mb | microblaze | mcore | mep | metag \ | mips | mipsbe | mipseb | mipsel | mipsle \ | mips16 \ | mips64 | mips64el \ | mips64octeon | mips64octeonel \ | mips64orion | mips64orionel \ | mips64r5900 | mips64r5900el \ | mips64vr | mips64vrel \ | mips64vr4100 | mips64vr4100el \ | mips64vr4300 | mips64vr4300el \ | mips64vr5000 | mips64vr5000el \ | mips64vr5900 | mips64vr5900el \ | mipsisa32 | mipsisa32el \ | mipsisa32r2 | mipsisa32r2el \ | mipsisa64 | mipsisa64el \ | mipsisa64r2 | mipsisa64r2el \ | mipsisa64sb1 | mipsisa64sb1el \ | mipsisa64sr71k | mipsisa64sr71kel \ | mipstx39 | mipstx39el \ | mn10200 | mn10300 \ | moxie \ | mt \ | msp430 \ | nds32 | nds32le | nds32be \ | nios | nios2 \ | ns16k | ns32k \ | open8 \ | or32 \ | pdp10 | pdp11 | pj | pjl \ | powerpc | powerpc64 | powerpc64le | powerpcle \ | pyramid \ | rl78 | rx \ | score \ | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ | sh64 | sh64le \ | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ | spu \ | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \ | ubicom32 \ | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \ | we32k \ | x86 | xc16x | xstormy16 | xtensa \ | z8k | z80) basic_machine=$basic_machine-unknown ;; c54x) basic_machine=tic54x-unknown ;; c55x) basic_machine=tic55x-unknown ;; c6x) basic_machine=tic6x-unknown ;; m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip) basic_machine=$basic_machine-unknown os=-none ;; m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) ;; ms1) basic_machine=mt-unknown ;; strongarm | thumb | xscale) basic_machine=arm-unknown ;; xgate) basic_machine=$basic_machine-unknown os=-none ;; xscaleeb) basic_machine=armeb-unknown ;; xscaleel) basic_machine=armel-unknown ;; # We use `pc' rather than `unknown' # because (1) that's what they normally are, and # (2) the word "unknown" tends to confuse beginning users. i*86 | x86_64) basic_machine=$basic_machine-pc ;; # Object if more than one company name word. *-*-*) echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 exit 1 ;; # Recognize the basic CPU types with company name. 580-* \ | a29k-* \ | aarch64-* | aarch64_be-* \ | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ | avr-* | avr32-* \ | be32-* | be64-* \ | bfin-* | bs2000-* \ | c[123]* | c30-* | [cjt]90-* | c4x-* \ | clipper-* | craynv-* | cydra-* \ | d10v-* | d30v-* | dlx-* \ | elxsi-* \ | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ | h8300-* | h8500-* \ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ | hexagon-* \ | i*86-* | i860-* | i960-* | ia64-* \ | ip2k-* | iq2000-* \ | le32-* | le64-* \ | lm32-* \ | m32c-* | m32r-* | m32rle-* \ | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \ | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ | mips16-* \ | mips64-* | mips64el-* \ | mips64octeon-* | mips64octeonel-* \ | mips64orion-* | mips64orionel-* \ | mips64r5900-* | mips64r5900el-* \ | mips64vr-* | mips64vrel-* \ | mips64vr4100-* | mips64vr4100el-* \ | mips64vr4300-* | mips64vr4300el-* \ | mips64vr5000-* | mips64vr5000el-* \ | mips64vr5900-* | mips64vr5900el-* \ | mipsisa32-* | mipsisa32el-* \ | mipsisa32r2-* | mipsisa32r2el-* \ | mipsisa64-* | mipsisa64el-* \ | mipsisa64r2-* | mipsisa64r2el-* \ | mipsisa64sb1-* | mipsisa64sb1el-* \ | mipsisa64sr71k-* | mipsisa64sr71kel-* \ | mipstx39-* | mipstx39el-* \ | mmix-* \ | mt-* \ | msp430-* \ | nds32-* | nds32le-* | nds32be-* \ | nios-* | nios2-* \ | none-* | np1-* | ns16k-* | ns32k-* \ | open8-* \ | orion-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \ | pyramid-* \ | rl78-* | romp-* | rs6000-* | rx-* \ | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ | sparclite-* \ | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \ | tahoe-* \ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ | tile*-* \ | tron-* \ | ubicom32-* \ | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \ | vax-* \ | we32k-* \ | x86-* | x86_64-* | xc16x-* | xps100-* \ | xstormy16-* | xtensa*-* \ | ymp-* \ | z8k-* | z80-*) ;; # Recognize the basic CPU types without company name, with glob match. xtensa*) basic_machine=$basic_machine-unknown ;; # Recognize the various machine names and aliases which stand # for a CPU type and a company and sometimes even an OS. 386bsd) basic_machine=i386-unknown os=-bsd ;; 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) basic_machine=m68000-att ;; 3b*) basic_machine=we32k-att ;; a29khif) basic_machine=a29k-amd os=-udi ;; abacus) basic_machine=abacus-unknown ;; adobe68k) basic_machine=m68010-adobe os=-scout ;; alliant | fx80) basic_machine=fx80-alliant ;; altos | altos3068) basic_machine=m68k-altos ;; am29k) basic_machine=a29k-none os=-bsd ;; amd64) basic_machine=x86_64-pc ;; amd64-*) basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` ;; amdahl) basic_machine=580-amdahl os=-sysv ;; amiga | amiga-*) basic_machine=m68k-unknown ;; amigaos | amigados) basic_machine=m68k-unknown os=-amigaos ;; amigaunix | amix) basic_machine=m68k-unknown os=-sysv4 ;; apollo68) basic_machine=m68k-apollo os=-sysv ;; apollo68bsd) basic_machine=m68k-apollo os=-bsd ;; aros) basic_machine=i386-pc os=-aros ;; aux) basic_machine=m68k-apple os=-aux ;; balance) basic_machine=ns32k-sequent os=-dynix ;; blackfin) basic_machine=bfin-unknown os=-linux ;; blackfin-*) basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'` os=-linux ;; bluegene*) basic_machine=powerpc-ibm os=-cnk ;; c54x-*) basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'` ;; c55x-*) basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'` ;; c6x-*) basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'` ;; c90) basic_machine=c90-cray os=-unicos ;; cegcc) basic_machine=arm-unknown os=-cegcc ;; convex-c1) basic_machine=c1-convex os=-bsd ;; convex-c2) basic_machine=c2-convex os=-bsd ;; convex-c32) basic_machine=c32-convex os=-bsd ;; convex-c34) basic_machine=c34-convex os=-bsd ;; convex-c38) basic_machine=c38-convex os=-bsd ;; cray | j90) basic_machine=j90-cray os=-unicos ;; craynv) basic_machine=craynv-cray os=-unicosmp ;; cr16 | cr16-*) basic_machine=cr16-unknown os=-elf ;; crds | unos) basic_machine=m68k-crds ;; crisv32 | crisv32-* | etraxfs*) basic_machine=crisv32-axis ;; cris | cris-* | etrax*) basic_machine=cris-axis ;; crx) basic_machine=crx-unknown os=-elf ;; da30 | da30-*) basic_machine=m68k-da30 ;; decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) basic_machine=mips-dec ;; decsystem10* | dec10*) basic_machine=pdp10-dec os=-tops10 ;; decsystem20* | dec20*) basic_machine=pdp10-dec os=-tops20 ;; delta | 3300 | motorola-3300 | motorola-delta \ | 3300-motorola | delta-motorola) basic_machine=m68k-motorola ;; delta88) basic_machine=m88k-motorola os=-sysv3 ;; dicos) basic_machine=i686-pc os=-dicos ;; djgpp) basic_machine=i586-pc os=-msdosdjgpp ;; dpx20 | dpx20-*) basic_machine=rs6000-bull os=-bosx ;; dpx2* | dpx2*-bull) basic_machine=m68k-bull os=-sysv3 ;; ebmon29k) basic_machine=a29k-amd os=-ebmon ;; elxsi) basic_machine=elxsi-elxsi os=-bsd ;; encore | umax | mmax) basic_machine=ns32k-encore ;; es1800 | OSE68k | ose68k | ose | OSE) basic_machine=m68k-ericsson os=-ose ;; fx2800) basic_machine=i860-alliant ;; genix) basic_machine=ns32k-ns ;; gmicro) basic_machine=tron-gmicro os=-sysv ;; go32) basic_machine=i386-pc os=-go32 ;; h3050r* | hiux*) basic_machine=hppa1.1-hitachi os=-hiuxwe2 ;; h8300hms) basic_machine=h8300-hitachi os=-hms ;; h8300xray) basic_machine=h8300-hitachi os=-xray ;; h8500hms) basic_machine=h8500-hitachi os=-hms ;; harris) basic_machine=m88k-harris os=-sysv3 ;; hp300-*) basic_machine=m68k-hp ;; hp300bsd) basic_machine=m68k-hp os=-bsd ;; hp300hpux) basic_machine=m68k-hp os=-hpux ;; hp3k9[0-9][0-9] | hp9[0-9][0-9]) basic_machine=hppa1.0-hp ;; hp9k2[0-9][0-9] | hp9k31[0-9]) basic_machine=m68000-hp ;; hp9k3[2-9][0-9]) basic_machine=m68k-hp ;; hp9k6[0-9][0-9] | hp6[0-9][0-9]) basic_machine=hppa1.0-hp ;; hp9k7[0-79][0-9] | hp7[0-79][0-9]) basic_machine=hppa1.1-hp ;; hp9k78[0-9] | hp78[0-9]) # FIXME: really hppa2.0-hp basic_machine=hppa1.1-hp ;; hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) # FIXME: really hppa2.0-hp basic_machine=hppa1.1-hp ;; hp9k8[0-9][13679] | hp8[0-9][13679]) basic_machine=hppa1.1-hp ;; hp9k8[0-9][0-9] | hp8[0-9][0-9]) basic_machine=hppa1.0-hp ;; hppa-next) os=-nextstep3 ;; hppaosf) basic_machine=hppa1.1-hp os=-osf ;; hppro) basic_machine=hppa1.1-hp os=-proelf ;; i370-ibm* | ibm*) basic_machine=i370-ibm ;; i*86v32) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-sysv32 ;; i*86v4*) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-sysv4 ;; i*86v) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-sysv ;; i*86sol2) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-solaris2 ;; i386mach) basic_machine=i386-mach os=-mach ;; i386-vsta | vsta) basic_machine=i386-unknown os=-vsta ;; iris | iris4d) basic_machine=mips-sgi case $os in -irix*) ;; *) os=-irix4 ;; esac ;; isi68 | isi) basic_machine=m68k-isi os=-sysv ;; m68knommu) basic_machine=m68k-unknown os=-linux ;; m68knommu-*) basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'` os=-linux ;; m88k-omron*) basic_machine=m88k-omron ;; magnum | m3230) basic_machine=mips-mips os=-sysv ;; merlin) basic_machine=ns32k-utek os=-sysv ;; microblaze) basic_machine=microblaze-xilinx ;; mingw32) basic_machine=i386-pc os=-mingw32 ;; mingw32ce) basic_machine=arm-unknown os=-mingw32ce ;; miniframe) basic_machine=m68000-convergent ;; *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) basic_machine=m68k-atari os=-mint ;; mips3*-*) basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` ;; mips3*) basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown ;; monitor) basic_machine=m68k-rom68k os=-coff ;; morphos) basic_machine=powerpc-unknown os=-morphos ;; msdos) basic_machine=i386-pc os=-msdos ;; ms1-*) basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` ;; msys) basic_machine=i386-pc os=-msys ;; mvs) basic_machine=i370-ibm os=-mvs ;; nacl) basic_machine=le32-unknown os=-nacl ;; ncr3000) basic_machine=i486-ncr os=-sysv4 ;; netbsd386) basic_machine=i386-unknown os=-netbsd ;; netwinder) basic_machine=armv4l-rebel os=-linux ;; news | news700 | news800 | news900) basic_machine=m68k-sony os=-newsos ;; news1000) basic_machine=m68030-sony os=-newsos ;; news-3600 | risc-news) basic_machine=mips-sony os=-newsos ;; necv70) basic_machine=v70-nec os=-sysv ;; next | m*-next ) basic_machine=m68k-next case $os in -nextstep* ) ;; -ns2*) os=-nextstep2 ;; *) os=-nextstep3 ;; esac ;; nh3000) basic_machine=m68k-harris os=-cxux ;; nh[45]000) basic_machine=m88k-harris os=-cxux ;; nindy960) basic_machine=i960-intel os=-nindy ;; mon960) basic_machine=i960-intel os=-mon960 ;; nonstopux) basic_machine=mips-compaq os=-nonstopux ;; np1) basic_machine=np1-gould ;; neo-tandem) basic_machine=neo-tandem ;; nse-tandem) basic_machine=nse-tandem ;; nsr-tandem) basic_machine=nsr-tandem ;; op50n-* | op60c-*) basic_machine=hppa1.1-oki os=-proelf ;; openrisc | openrisc-*) basic_machine=or32-unknown ;; os400) basic_machine=powerpc-ibm os=-os400 ;; OSE68000 | ose68000) basic_machine=m68000-ericsson os=-ose ;; os68k) basic_machine=m68k-none os=-os68k ;; pa-hitachi) basic_machine=hppa1.1-hitachi os=-hiuxwe2 ;; paragon) basic_machine=i860-intel os=-osf ;; parisc) basic_machine=hppa-unknown os=-linux ;; parisc-*) basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'` os=-linux ;; pbd) basic_machine=sparc-tti ;; pbb) basic_machine=m68k-tti ;; pc532 | pc532-*) basic_machine=ns32k-pc532 ;; pc98) basic_machine=i386-pc ;; pc98-*) basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pentium | p5 | k5 | k6 | nexgen | viac3) basic_machine=i586-pc ;; pentiumpro | p6 | 6x86 | athlon | athlon_*) basic_machine=i686-pc ;; pentiumii | pentium2 | pentiumiii | pentium3) basic_machine=i686-pc ;; pentium4) basic_machine=i786-pc ;; pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pentiumpro-* | p6-* | 6x86-* | athlon-*) basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pentium4-*) basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pn) basic_machine=pn-gould ;; power) basic_machine=power-ibm ;; ppc | ppcbe) basic_machine=powerpc-unknown ;; ppc-* | ppcbe-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` ;; ppcle | powerpclittle | ppc-le | powerpc-little) basic_machine=powerpcle-unknown ;; ppcle-* | powerpclittle-*) basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` ;; ppc64) basic_machine=powerpc64-unknown ;; ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` ;; ppc64le | powerpc64little | ppc64-le | powerpc64-little) basic_machine=powerpc64le-unknown ;; ppc64le-* | powerpc64little-*) basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` ;; ps2) basic_machine=i386-ibm ;; pw32) basic_machine=i586-unknown os=-pw32 ;; rdos) basic_machine=i386-pc os=-rdos ;; rom68k) basic_machine=m68k-rom68k os=-coff ;; rm[46]00) basic_machine=mips-siemens ;; rtpc | rtpc-*) basic_machine=romp-ibm ;; s390 | s390-*) basic_machine=s390-ibm ;; s390x | s390x-*) basic_machine=s390x-ibm ;; sa29200) basic_machine=a29k-amd os=-udi ;; sb1) basic_machine=mipsisa64sb1-unknown ;; sb1el) basic_machine=mipsisa64sb1el-unknown ;; sde) basic_machine=mipsisa32-sde os=-elf ;; sei) basic_machine=mips-sei os=-seiux ;; sequent) basic_machine=i386-sequent ;; sh) basic_machine=sh-hitachi os=-hms ;; sh5el) basic_machine=sh5le-unknown ;; sh64) basic_machine=sh64-unknown ;; sparclite-wrs | simso-wrs) basic_machine=sparclite-wrs os=-vxworks ;; sps7) basic_machine=m68k-bull os=-sysv2 ;; spur) basic_machine=spur-unknown ;; st2000) basic_machine=m68k-tandem ;; stratus) basic_machine=i860-stratus os=-sysv4 ;; strongarm-* | thumb-*) basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'` ;; sun2) basic_machine=m68000-sun ;; sun2os3) basic_machine=m68000-sun os=-sunos3 ;; sun2os4) basic_machine=m68000-sun os=-sunos4 ;; sun3os3) basic_machine=m68k-sun os=-sunos3 ;; sun3os4) basic_machine=m68k-sun os=-sunos4 ;; sun4os3) basic_machine=sparc-sun os=-sunos3 ;; sun4os4) basic_machine=sparc-sun os=-sunos4 ;; sun4sol2) basic_machine=sparc-sun os=-solaris2 ;; sun3 | sun3-*) basic_machine=m68k-sun ;; sun4) basic_machine=sparc-sun ;; sun386 | sun386i | roadrunner) basic_machine=i386-sun ;; sv1) basic_machine=sv1-cray os=-unicos ;; symmetry) basic_machine=i386-sequent os=-dynix ;; t3e) basic_machine=alphaev5-cray os=-unicos ;; t90) basic_machine=t90-cray os=-unicos ;; tile*) basic_machine=$basic_machine-unknown os=-linux-gnu ;; tx39) basic_machine=mipstx39-unknown ;; tx39el) basic_machine=mipstx39el-unknown ;; toad1) basic_machine=pdp10-xkl os=-tops20 ;; tower | tower-32) basic_machine=m68k-ncr ;; tpf) basic_machine=s390x-ibm os=-tpf ;; udi29k) basic_machine=a29k-amd os=-udi ;; ultra3) basic_machine=a29k-nyu os=-sym1 ;; v810 | necv810) basic_machine=v810-nec os=-none ;; vaxv) basic_machine=vax-dec os=-sysv ;; vms) basic_machine=vax-dec os=-vms ;; vpp*|vx|vx-*) basic_machine=f301-fujitsu ;; vxworks960) basic_machine=i960-wrs os=-vxworks ;; vxworks68) basic_machine=m68k-wrs os=-vxworks ;; vxworks29k) basic_machine=a29k-wrs os=-vxworks ;; w65*) basic_machine=w65-wdc os=-none ;; w89k-*) basic_machine=hppa1.1-winbond os=-proelf ;; xbox) basic_machine=i686-pc os=-mingw32 ;; xps | xps100) basic_machine=xps100-honeywell ;; xscale-* | xscalee[bl]-*) basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'` ;; ymp) basic_machine=ymp-cray os=-unicos ;; z8k-*-coff) basic_machine=z8k-unknown os=-sim ;; z80-*-coff) basic_machine=z80-unknown os=-sim ;; none) basic_machine=none-none os=-none ;; # Here we handle the default manufacturer of certain CPU types. It is in # some cases the only manufacturer, in others, it is the most popular. w89k) basic_machine=hppa1.1-winbond ;; op50n) basic_machine=hppa1.1-oki ;; op60c) basic_machine=hppa1.1-oki ;; romp) basic_machine=romp-ibm ;; mmix) basic_machine=mmix-knuth ;; rs6000) basic_machine=rs6000-ibm ;; vax) basic_machine=vax-dec ;; pdp10) # there are many clones, so DEC is not a safe bet basic_machine=pdp10-unknown ;; pdp11) basic_machine=pdp11-dec ;; we32k) basic_machine=we32k-att ;; sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele) basic_machine=sh-unknown ;; sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) basic_machine=sparc-sun ;; cydra) basic_machine=cydra-cydrome ;; orion) basic_machine=orion-highlevel ;; orion105) basic_machine=clipper-highlevel ;; mac | mpw | mac-mpw) basic_machine=m68k-apple ;; pmac | pmac-mpw) basic_machine=powerpc-apple ;; *-unknown) # Make sure to match an already-canonicalized machine name. ;; *) echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 exit 1 ;; esac # Here we canonicalize certain aliases for manufacturers. case $basic_machine in *-digital*) basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` ;; *-commodore*) basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` ;; *) ;; esac # Decode manufacturer-specific aliases for certain operating systems. if [ x"$os" != x"" ] then case $os in # First match some system type aliases # that might get confused with valid system types. # -solaris* is a basic system type, with this one exception. -auroraux) os=-auroraux ;; -solaris1 | -solaris1.*) os=`echo $os | sed -e 's|solaris1|sunos4|'` ;; -solaris) os=-solaris2 ;; -svr4*) os=-sysv4 ;; -unixware*) os=-sysv4.2uw ;; -gnu/linux*) os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` ;; # First accept the basic system types. # The portable systems comes first. # Each alternative MUST END IN A *, to match a version number. # -sysv* is not here because it comes later, after sysvr4. -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\ | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \ | -sym* | -kopensolaris* \ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ | -aos* | -aros* \ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ | -openbsd* | -solidbsd* \ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ | -chorusos* | -chorusrdb* | -cegcc* \ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ | -mingw32* | -linux-gnu* | -linux-android* \ | -linux-newlib* | -linux-uclibc* \ | -uxpv* | -beos* | -mpeix* | -udk* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*) # Remember, each alternative MUST END IN *, to match a version number. ;; -qnx*) case $basic_machine in x86-* | i*86-*) ;; *) os=-nto$os ;; esac ;; -nto-qnx*) ;; -nto*) os=`echo $os | sed -e 's|nto|nto-qnx|'` ;; -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) ;; -mac*) os=`echo $os | sed -e 's|mac|macos|'` ;; -linux-dietlibc) os=-linux-dietlibc ;; -linux*) os=`echo $os | sed -e 's|linux|linux-gnu|'` ;; -sunos5*) os=`echo $os | sed -e 's|sunos5|solaris2|'` ;; -sunos6*) os=`echo $os | sed -e 's|sunos6|solaris3|'` ;; -opened*) os=-openedition ;; -os400*) os=-os400 ;; -wince*) os=-wince ;; -osfrose*) os=-osfrose ;; -osf*) os=-osf ;; -utek*) os=-bsd ;; -dynix*) os=-bsd ;; -acis*) os=-aos ;; -atheos*) os=-atheos ;; -syllable*) os=-syllable ;; -386bsd) os=-bsd ;; -ctix* | -uts*) os=-sysv ;; -nova*) os=-rtmk-nova ;; -ns2 ) os=-nextstep2 ;; -nsk*) os=-nsk ;; # Preserve the version number of sinix5. -sinix5.*) os=`echo $os | sed -e 's|sinix|sysv|'` ;; -sinix*) os=-sysv4 ;; -tpf*) os=-tpf ;; -triton*) os=-sysv3 ;; -oss*) os=-sysv3 ;; -svr4) os=-sysv4 ;; -svr3) os=-sysv3 ;; -sysvr4) os=-sysv4 ;; # This must come after -sysvr4. -sysv*) ;; -ose*) os=-ose ;; -es1800*) os=-ose ;; -xenix) os=-xenix ;; -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) os=-mint ;; -aros*) os=-aros ;; -kaos*) os=-kaos ;; -zvmoe) os=-zvmoe ;; -dicos*) os=-dicos ;; -nacl*) ;; -none) ;; *) # Get rid of the `-' at the beginning of $os. os=`echo $os | sed 's/[^-]*-//'` echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 exit 1 ;; esac else # Here we handle the default operating systems that come with various machines. # The value should be what the vendor currently ships out the door with their # machine or put another way, the most popular os provided with the machine. # Note that if you're going to try to match "-MANUFACTURER" here (say, # "-sun"), then you have to tell the case statement up towards the top # that MANUFACTURER isn't an operating system. Otherwise, code above # will signal an error saying that MANUFACTURER isn't an operating # system, and we'll never get to this point. case $basic_machine in score-*) os=-elf ;; spu-*) os=-elf ;; *-acorn) os=-riscix1.2 ;; arm*-rebel) os=-linux ;; arm*-semi) os=-aout ;; c4x-* | tic4x-*) os=-coff ;; tic54x-*) os=-coff ;; tic55x-*) os=-coff ;; tic6x-*) os=-coff ;; # This must come before the *-dec entry. pdp10-*) os=-tops20 ;; pdp11-*) os=-none ;; *-dec | vax-*) os=-ultrix4.2 ;; m68*-apollo) os=-domain ;; i386-sun) os=-sunos4.0.2 ;; m68000-sun) os=-sunos3 ;; m68*-cisco) os=-aout ;; mep-*) os=-elf ;; mips*-cisco) os=-elf ;; mips*-*) os=-elf ;; or32-*) os=-coff ;; *-tti) # must be before sparc entry or we get the wrong os. os=-sysv3 ;; sparc-* | *-sun) os=-sunos4.1.1 ;; *-be) os=-beos ;; *-haiku) os=-haiku ;; *-ibm) os=-aix ;; *-knuth) os=-mmixware ;; *-wec) os=-proelf ;; *-winbond) os=-proelf ;; *-oki) os=-proelf ;; *-hp) os=-hpux ;; *-hitachi) os=-hiux ;; i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) os=-sysv ;; *-cbm) os=-amigaos ;; *-dg) os=-dgux ;; *-dolphin) os=-sysv3 ;; m68k-ccur) os=-rtu ;; m88k-omron*) os=-luna ;; *-next ) os=-nextstep ;; *-sequent) os=-ptx ;; *-crds) os=-unos ;; *-ns) os=-genix ;; i370-*) os=-mvs ;; *-next) os=-nextstep3 ;; *-gould) os=-sysv ;; *-highlevel) os=-bsd ;; *-encore) os=-bsd ;; *-sgi) os=-irix ;; *-siemens) os=-sysv4 ;; *-masscomp) os=-rtu ;; f30[01]-fujitsu | f700-fujitsu) os=-uxpv ;; *-rom68k) os=-coff ;; *-*bug) os=-coff ;; *-apple) os=-macos ;; *-atari*) os=-mint ;; *) os=-none ;; esac fi # Here we handle the case where we know the os, and the CPU type, but not the # manufacturer. We pick the logical manufacturer. vendor=unknown case $basic_machine in *-unknown) case $os in -riscix*) vendor=acorn ;; -sunos*) vendor=sun ;; -cnk*|-aix*) vendor=ibm ;; -beos*) vendor=be ;; -hpux*) vendor=hp ;; -mpeix*) vendor=hp ;; -hiux*) vendor=hitachi ;; -unos*) vendor=crds ;; -dgux*) vendor=dg ;; -luna*) vendor=omron ;; -genix*) vendor=ns ;; -mvs* | -opened*) vendor=ibm ;; -os400*) vendor=ibm ;; -ptx*) vendor=sequent ;; -tpf*) vendor=ibm ;; -vxsim* | -vxworks* | -windiss*) vendor=wrs ;; -aux*) vendor=apple ;; -hms*) vendor=hitachi ;; -mpw* | -macos*) vendor=apple ;; -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) vendor=atari ;; -vos*) vendor=stratus ;; esac basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` ;; esac echo $basic_machine$os exit # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "timestamp='" # time-stamp-format: "%:y-%02m-%02d" # time-stamp-end: "'" # End: ecm-6.4.4/ltmain.sh0000644023561000001540000105204012106744307011041 00000000000000 # libtool (GNU libtool) 2.4.2 # Written by Gordon Matzigkeit , 1996 # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, # 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. # This is free software; see the source for copying conditions. There is NO # warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # GNU Libtool is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # As a special exception to the GNU General Public License, # if you distribute this file as part of a program or library that # is built using GNU Libtool, you may include this file under the # same distribution terms that you use for the rest of that program. # # GNU Libtool is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with GNU Libtool; see the file COPYING. If not, a copy # can be downloaded from http://www.gnu.org/licenses/gpl.html, # or obtained by writing to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # Usage: $progname [OPTION]... [MODE-ARG]... # # Provide generalized library-building support services. # # --config show all configuration variables # --debug enable verbose shell tracing # -n, --dry-run display commands without modifying any files # --features display basic configuration information and exit # --mode=MODE use operation mode MODE # --preserve-dup-deps don't remove duplicate dependency libraries # --quiet, --silent don't print informational messages # --no-quiet, --no-silent # print informational messages (default) # --no-warn don't display warning messages # --tag=TAG use configuration variables from tag TAG # -v, --verbose print more informational messages than default # --no-verbose don't print the extra informational messages # --version print version information # -h, --help, --help-all print short, long, or detailed help message # # MODE must be one of the following: # # clean remove files from the build directory # compile compile a source file into a libtool object # execute automatically set library path, then run a program # finish complete the installation of libtool libraries # install install libraries or executables # link create a library or an executable # uninstall remove libraries from an installed directory # # MODE-ARGS vary depending on the MODE. When passed as first option, # `--mode=MODE' may be abbreviated as `MODE' or a unique abbreviation of that. # Try `$progname --help --mode=MODE' for a more detailed description of MODE. # # When reporting a bug, please describe a test case to reproduce it and # include the following information: # # host-triplet: $host # shell: $SHELL # compiler: $LTCC # compiler flags: $LTCFLAGS # linker: $LD (gnu? $with_gnu_ld) # $progname: (GNU libtool) 2.4.2 Debian-2.4.2-1ubuntu1 # automake: $automake_version # autoconf: $autoconf_version # # Report bugs to . # GNU libtool home page: . # General help using GNU software: . PROGRAM=libtool PACKAGE=libtool VERSION="2.4.2 Debian-2.4.2-1ubuntu1" TIMESTAMP="" package_revision=1.3337 # Be Bourne compatible if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac fi BIN_SH=xpg4; export BIN_SH # for Tru64 DUALCASE=1; export DUALCASE # for MKS sh # A function that is used when there is no print builtin or printf. func_fallback_echo () { eval 'cat <<_LTECHO_EOF $1 _LTECHO_EOF' } # NLS nuisances: We save the old values to restore during execute mode. lt_user_locale= lt_safe_locale= for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES do eval "if test \"\${$lt_var+set}\" = set; then save_$lt_var=\$$lt_var $lt_var=C export $lt_var lt_user_locale=\"$lt_var=\\\$save_\$lt_var; \$lt_user_locale\" lt_safe_locale=\"$lt_var=C; \$lt_safe_locale\" fi" done LC_ALL=C LANGUAGE=C export LANGUAGE LC_ALL $lt_unset CDPATH # Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh # is ksh but when the shell is invoked as "sh" and the current value of # the _XPG environment variable is not equal to 1 (one), the special # positional parameter $0, within a function call, is the name of the # function. progpath="$0" : ${CP="cp -f"} test "${ECHO+set}" = set || ECHO=${as_echo-'printf %s\n'} : ${MAKE="make"} : ${MKDIR="mkdir"} : ${MV="mv -f"} : ${RM="rm -f"} : ${SHELL="${CONFIG_SHELL-/bin/sh}"} : ${Xsed="$SED -e 1s/^X//"} # Global variables: EXIT_SUCCESS=0 EXIT_FAILURE=1 EXIT_MISMATCH=63 # $? = 63 is used to indicate version mismatch to missing. EXIT_SKIP=77 # $? = 77 is used to indicate a skipped test to automake. exit_status=$EXIT_SUCCESS # Make sure IFS has a sensible default lt_nl=' ' IFS=" $lt_nl" dirname="s,/[^/]*$,," basename="s,^.*/,," # func_dirname file append nondir_replacement # Compute the dirname of FILE. If nonempty, add APPEND to the result, # otherwise set result to NONDIR_REPLACEMENT. func_dirname () { func_dirname_result=`$ECHO "${1}" | $SED "$dirname"` if test "X$func_dirname_result" = "X${1}"; then func_dirname_result="${3}" else func_dirname_result="$func_dirname_result${2}" fi } # func_dirname may be replaced by extended shell implementation # func_basename file func_basename () { func_basename_result=`$ECHO "${1}" | $SED "$basename"` } # func_basename may be replaced by extended shell implementation # func_dirname_and_basename file append nondir_replacement # perform func_basename and func_dirname in a single function # call: # dirname: Compute the dirname of FILE. If nonempty, # add APPEND to the result, otherwise set result # to NONDIR_REPLACEMENT. # value returned in "$func_dirname_result" # basename: Compute filename of FILE. # value retuned in "$func_basename_result" # Implementation must be kept synchronized with func_dirname # and func_basename. For efficiency, we do not delegate to # those functions but instead duplicate the functionality here. func_dirname_and_basename () { # Extract subdirectory from the argument. func_dirname_result=`$ECHO "${1}" | $SED -e "$dirname"` if test "X$func_dirname_result" = "X${1}"; then func_dirname_result="${3}" else func_dirname_result="$func_dirname_result${2}" fi func_basename_result=`$ECHO "${1}" | $SED -e "$basename"` } # func_dirname_and_basename may be replaced by extended shell implementation # func_stripname prefix suffix name # strip PREFIX and SUFFIX off of NAME. # PREFIX and SUFFIX must not contain globbing or regex special # characters, hashes, percent signs, but SUFFIX may contain a leading # dot (in which case that matches only a dot). # func_strip_suffix prefix name func_stripname () { case ${2} in .*) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%\\\\${2}\$%%"`;; *) func_stripname_result=`$ECHO "${3}" | $SED "s%^${1}%%; s%${2}\$%%"`;; esac } # func_stripname may be replaced by extended shell implementation # These SED scripts presuppose an absolute path with a trailing slash. pathcar='s,^/\([^/]*\).*$,\1,' pathcdr='s,^/[^/]*,,' removedotparts=':dotsl s@/\./@/@g t dotsl s,/\.$,/,' collapseslashes='s@/\{1,\}@/@g' finalslash='s,/*$,/,' # func_normal_abspath PATH # Remove doubled-up and trailing slashes, "." path components, # and cancel out any ".." path components in PATH after making # it an absolute path. # value returned in "$func_normal_abspath_result" func_normal_abspath () { # Start from root dir and reassemble the path. func_normal_abspath_result= func_normal_abspath_tpath=$1 func_normal_abspath_altnamespace= case $func_normal_abspath_tpath in "") # Empty path, that just means $cwd. func_stripname '' '/' "`pwd`" func_normal_abspath_result=$func_stripname_result return ;; # The next three entries are used to spot a run of precisely # two leading slashes without using negated character classes; # we take advantage of case's first-match behaviour. ///*) # Unusual form of absolute path, do nothing. ;; //*) # Not necessarily an ordinary path; POSIX reserves leading '//' # and for example Cygwin uses it to access remote file shares # over CIFS/SMB, so we conserve a leading double slash if found. func_normal_abspath_altnamespace=/ ;; /*) # Absolute path, do nothing. ;; *) # Relative path, prepend $cwd. func_normal_abspath_tpath=`pwd`/$func_normal_abspath_tpath ;; esac # Cancel out all the simple stuff to save iterations. We also want # the path to end with a slash for ease of parsing, so make sure # there is one (and only one) here. func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ -e "$removedotparts" -e "$collapseslashes" -e "$finalslash"` while :; do # Processed it all yet? if test "$func_normal_abspath_tpath" = / ; then # If we ascended to the root using ".." the result may be empty now. if test -z "$func_normal_abspath_result" ; then func_normal_abspath_result=/ fi break fi func_normal_abspath_tcomponent=`$ECHO "$func_normal_abspath_tpath" | $SED \ -e "$pathcar"` func_normal_abspath_tpath=`$ECHO "$func_normal_abspath_tpath" | $SED \ -e "$pathcdr"` # Figure out what to do with it case $func_normal_abspath_tcomponent in "") # Trailing empty path component, ignore it. ;; ..) # Parent dir; strip last assembled component from result. func_dirname "$func_normal_abspath_result" func_normal_abspath_result=$func_dirname_result ;; *) # Actual path component, append it. func_normal_abspath_result=$func_normal_abspath_result/$func_normal_abspath_tcomponent ;; esac done # Restore leading double-slash if one was found on entry. func_normal_abspath_result=$func_normal_abspath_altnamespace$func_normal_abspath_result } # func_relative_path SRCDIR DSTDIR # generates a relative path from SRCDIR to DSTDIR, with a trailing # slash if non-empty, suitable for immediately appending a filename # without needing to append a separator. # value returned in "$func_relative_path_result" func_relative_path () { func_relative_path_result= func_normal_abspath "$1" func_relative_path_tlibdir=$func_normal_abspath_result func_normal_abspath "$2" func_relative_path_tbindir=$func_normal_abspath_result # Ascend the tree starting from libdir while :; do # check if we have found a prefix of bindir case $func_relative_path_tbindir in $func_relative_path_tlibdir) # found an exact match func_relative_path_tcancelled= break ;; $func_relative_path_tlibdir*) # found a matching prefix func_stripname "$func_relative_path_tlibdir" '' "$func_relative_path_tbindir" func_relative_path_tcancelled=$func_stripname_result if test -z "$func_relative_path_result"; then func_relative_path_result=. fi break ;; *) func_dirname $func_relative_path_tlibdir func_relative_path_tlibdir=${func_dirname_result} if test "x$func_relative_path_tlibdir" = x ; then # Have to descend all the way to the root! func_relative_path_result=../$func_relative_path_result func_relative_path_tcancelled=$func_relative_path_tbindir break fi func_relative_path_result=../$func_relative_path_result ;; esac done # Now calculate path; take care to avoid doubling-up slashes. func_stripname '' '/' "$func_relative_path_result" func_relative_path_result=$func_stripname_result func_stripname '/' '/' "$func_relative_path_tcancelled" if test "x$func_stripname_result" != x ; then func_relative_path_result=${func_relative_path_result}/${func_stripname_result} fi # Normalisation. If bindir is libdir, return empty string, # else relative path ending with a slash; either way, target # file name can be directly appended. if test ! -z "$func_relative_path_result"; then func_stripname './' '' "$func_relative_path_result/" func_relative_path_result=$func_stripname_result fi } # The name of this program: func_dirname_and_basename "$progpath" progname=$func_basename_result # Make sure we have an absolute path for reexecution: case $progpath in [\\/]*|[A-Za-z]:\\*) ;; *[\\/]*) progdir=$func_dirname_result progdir=`cd "$progdir" && pwd` progpath="$progdir/$progname" ;; *) save_IFS="$IFS" IFS=${PATH_SEPARATOR-:} for progdir in $PATH; do IFS="$save_IFS" test -x "$progdir/$progname" && break done IFS="$save_IFS" test -n "$progdir" || progdir=`pwd` progpath="$progdir/$progname" ;; esac # Sed substitution that helps us do robust quoting. It backslashifies # metacharacters that are still active within double-quoted strings. Xsed="${SED}"' -e 1s/^X//' sed_quote_subst='s/\([`"$\\]\)/\\\1/g' # Same as above, but do not quote variable references. double_quote_subst='s/\(["`\\]\)/\\\1/g' # Sed substitution that turns a string into a regex matching for the # string literally. sed_make_literal_regex='s,[].[^$\\*\/],\\&,g' # Sed substitution that converts a w32 file name or path # which contains forward slashes, into one that contains # (escaped) backslashes. A very naive implementation. lt_sed_naive_backslashify='s|\\\\*|\\|g;s|/|\\|g;s|\\|\\\\|g' # Re-`\' parameter expansions in output of double_quote_subst that were # `\'-ed in input to the same. If an odd number of `\' preceded a '$' # in input to double_quote_subst, that '$' was protected from expansion. # Since each input `\' is now two `\'s, look for any number of runs of # four `\'s followed by two `\'s and then a '$'. `\' that '$'. bs='\\' bs2='\\\\' bs4='\\\\\\\\' dollar='\$' sed_double_backslash="\ s/$bs4/&\\ /g s/^$bs2$dollar/$bs&/ s/\\([^$bs]\\)$bs2$dollar/\\1$bs2$bs$dollar/g s/\n//g" # Standard options: opt_dry_run=false opt_help=false opt_quiet=false opt_verbose=false opt_warning=: # func_echo arg... # Echo program name prefixed message, along with the current mode # name if it has been set yet. func_echo () { $ECHO "$progname: ${opt_mode+$opt_mode: }$*" } # func_verbose arg... # Echo program name prefixed message in verbose mode only. func_verbose () { $opt_verbose && func_echo ${1+"$@"} # A bug in bash halts the script if the last line of a function # fails when set -e is in force, so we need another command to # work around that: : } # func_echo_all arg... # Invoke $ECHO with all args, space-separated. func_echo_all () { $ECHO "$*" } # func_error arg... # Echo program name prefixed message to standard error. func_error () { $ECHO "$progname: ${opt_mode+$opt_mode: }"${1+"$@"} 1>&2 } # func_warning arg... # Echo program name prefixed warning message to standard error. func_warning () { $opt_warning && $ECHO "$progname: ${opt_mode+$opt_mode: }warning: "${1+"$@"} 1>&2 # bash bug again: : } # func_fatal_error arg... # Echo program name prefixed message to standard error, and exit. func_fatal_error () { func_error ${1+"$@"} exit $EXIT_FAILURE } # func_fatal_help arg... # Echo program name prefixed message to standard error, followed by # a help hint, and exit. func_fatal_help () { func_error ${1+"$@"} func_fatal_error "$help" } help="Try \`$progname --help' for more information." ## default # func_grep expression filename # Check whether EXPRESSION matches any line of FILENAME, without output. func_grep () { $GREP "$1" "$2" >/dev/null 2>&1 } # func_mkdir_p directory-path # Make sure the entire path to DIRECTORY-PATH is available. func_mkdir_p () { my_directory_path="$1" my_dir_list= if test -n "$my_directory_path" && test "$opt_dry_run" != ":"; then # Protect directory names starting with `-' case $my_directory_path in -*) my_directory_path="./$my_directory_path" ;; esac # While some portion of DIR does not yet exist... while test ! -d "$my_directory_path"; do # ...make a list in topmost first order. Use a colon delimited # list incase some portion of path contains whitespace. my_dir_list="$my_directory_path:$my_dir_list" # If the last portion added has no slash in it, the list is done case $my_directory_path in */*) ;; *) break ;; esac # ...otherwise throw away the child directory and loop my_directory_path=`$ECHO "$my_directory_path" | $SED -e "$dirname"` done my_dir_list=`$ECHO "$my_dir_list" | $SED 's,:*$,,'` save_mkdir_p_IFS="$IFS"; IFS=':' for my_dir in $my_dir_list; do IFS="$save_mkdir_p_IFS" # mkdir can fail with a `File exist' error if two processes # try to create one of the directories concurrently. Don't # stop in that case! $MKDIR "$my_dir" 2>/dev/null || : done IFS="$save_mkdir_p_IFS" # Bail out if we (or some other process) failed to create a directory. test -d "$my_directory_path" || \ func_fatal_error "Failed to create \`$1'" fi } # func_mktempdir [string] # Make a temporary directory that won't clash with other running # libtool processes, and avoids race conditions if possible. If # given, STRING is the basename for that directory. func_mktempdir () { my_template="${TMPDIR-/tmp}/${1-$progname}" if test "$opt_dry_run" = ":"; then # Return a directory name, but don't create it in dry-run mode my_tmpdir="${my_template}-$$" else # If mktemp works, use that first and foremost my_tmpdir=`mktemp -d "${my_template}-XXXXXXXX" 2>/dev/null` if test ! -d "$my_tmpdir"; then # Failing that, at least try and use $RANDOM to avoid a race my_tmpdir="${my_template}-${RANDOM-0}$$" save_mktempdir_umask=`umask` umask 0077 $MKDIR "$my_tmpdir" umask $save_mktempdir_umask fi # If we're not in dry-run mode, bomb out on failure test -d "$my_tmpdir" || \ func_fatal_error "cannot create temporary directory \`$my_tmpdir'" fi $ECHO "$my_tmpdir" } # func_quote_for_eval arg # Aesthetically quote ARG to be evaled later. # This function returns two values: FUNC_QUOTE_FOR_EVAL_RESULT # is double-quoted, suitable for a subsequent eval, whereas # FUNC_QUOTE_FOR_EVAL_UNQUOTED_RESULT has merely all characters # which are still active within double quotes backslashified. func_quote_for_eval () { case $1 in *[\\\`\"\$]*) func_quote_for_eval_unquoted_result=`$ECHO "$1" | $SED "$sed_quote_subst"` ;; *) func_quote_for_eval_unquoted_result="$1" ;; esac case $func_quote_for_eval_unquoted_result in # Double-quote args containing shell metacharacters to delay # word splitting, command substitution and and variable # expansion for a subsequent eval. # Many Bourne shells cannot handle close brackets correctly # in scan sets, so we specify it separately. *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") func_quote_for_eval_result="\"$func_quote_for_eval_unquoted_result\"" ;; *) func_quote_for_eval_result="$func_quote_for_eval_unquoted_result" esac } # func_quote_for_expand arg # Aesthetically quote ARG to be evaled later; same as above, # but do not quote variable references. func_quote_for_expand () { case $1 in *[\\\`\"]*) my_arg=`$ECHO "$1" | $SED \ -e "$double_quote_subst" -e "$sed_double_backslash"` ;; *) my_arg="$1" ;; esac case $my_arg in # Double-quote args containing shell metacharacters to delay # word splitting and command substitution for a subsequent eval. # Many Bourne shells cannot handle close brackets correctly # in scan sets, so we specify it separately. *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") my_arg="\"$my_arg\"" ;; esac func_quote_for_expand_result="$my_arg" } # func_show_eval cmd [fail_exp] # Unless opt_silent is true, then output CMD. Then, if opt_dryrun is # not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP # is given, then evaluate it. func_show_eval () { my_cmd="$1" my_fail_exp="${2-:}" ${opt_silent-false} || { func_quote_for_expand "$my_cmd" eval "func_echo $func_quote_for_expand_result" } if ${opt_dry_run-false}; then :; else eval "$my_cmd" my_status=$? if test "$my_status" -eq 0; then :; else eval "(exit $my_status); $my_fail_exp" fi fi } # func_show_eval_locale cmd [fail_exp] # Unless opt_silent is true, then output CMD. Then, if opt_dryrun is # not true, evaluate CMD. If the evaluation of CMD fails, and FAIL_EXP # is given, then evaluate it. Use the saved locale for evaluation. func_show_eval_locale () { my_cmd="$1" my_fail_exp="${2-:}" ${opt_silent-false} || { func_quote_for_expand "$my_cmd" eval "func_echo $func_quote_for_expand_result" } if ${opt_dry_run-false}; then :; else eval "$lt_user_locale $my_cmd" my_status=$? eval "$lt_safe_locale" if test "$my_status" -eq 0; then :; else eval "(exit $my_status); $my_fail_exp" fi fi } # func_tr_sh # Turn $1 into a string suitable for a shell variable name. # Result is stored in $func_tr_sh_result. All characters # not in the set a-zA-Z0-9_ are replaced with '_'. Further, # if $1 begins with a digit, a '_' is prepended as well. func_tr_sh () { case $1 in [0-9]* | *[!a-zA-Z0-9_]*) func_tr_sh_result=`$ECHO "$1" | $SED 's/^\([0-9]\)/_\1/; s/[^a-zA-Z0-9_]/_/g'` ;; * ) func_tr_sh_result=$1 ;; esac } # func_version # Echo version message to standard output and exit. func_version () { $opt_debug $SED -n '/(C)/!b go :more /\./!{ N s/\n# / / b more } :go /^# '$PROGRAM' (GNU /,/# warranty; / { s/^# // s/^# *$// s/\((C)\)[ 0-9,-]*\( [1-9][0-9]*\)/\1\2/ p }' < "$progpath" exit $? } # func_usage # Echo short help message to standard output and exit. func_usage () { $opt_debug $SED -n '/^# Usage:/,/^# *.*--help/ { s/^# // s/^# *$// s/\$progname/'$progname'/ p }' < "$progpath" echo $ECHO "run \`$progname --help | more' for full usage" exit $? } # func_help [NOEXIT] # Echo long help message to standard output and exit, # unless 'noexit' is passed as argument. func_help () { $opt_debug $SED -n '/^# Usage:/,/# Report bugs to/ { :print s/^# // s/^# *$// s*\$progname*'$progname'* s*\$host*'"$host"'* s*\$SHELL*'"$SHELL"'* s*\$LTCC*'"$LTCC"'* s*\$LTCFLAGS*'"$LTCFLAGS"'* s*\$LD*'"$LD"'* s/\$with_gnu_ld/'"$with_gnu_ld"'/ s/\$automake_version/'"`(${AUTOMAKE-automake} --version) 2>/dev/null |$SED 1q`"'/ s/\$autoconf_version/'"`(${AUTOCONF-autoconf} --version) 2>/dev/null |$SED 1q`"'/ p d } /^# .* home page:/b print /^# General help using/b print ' < "$progpath" ret=$? if test -z "$1"; then exit $ret fi } # func_missing_arg argname # Echo program name prefixed message to standard error and set global # exit_cmd. func_missing_arg () { $opt_debug func_error "missing argument for $1." exit_cmd=exit } # func_split_short_opt shortopt # Set func_split_short_opt_name and func_split_short_opt_arg shell # variables after splitting SHORTOPT after the 2nd character. func_split_short_opt () { my_sed_short_opt='1s/^\(..\).*$/\1/;q' my_sed_short_rest='1s/^..\(.*\)$/\1/;q' func_split_short_opt_name=`$ECHO "$1" | $SED "$my_sed_short_opt"` func_split_short_opt_arg=`$ECHO "$1" | $SED "$my_sed_short_rest"` } # func_split_short_opt may be replaced by extended shell implementation # func_split_long_opt longopt # Set func_split_long_opt_name and func_split_long_opt_arg shell # variables after splitting LONGOPT at the `=' sign. func_split_long_opt () { my_sed_long_opt='1s/^\(--[^=]*\)=.*/\1/;q' my_sed_long_arg='1s/^--[^=]*=//' func_split_long_opt_name=`$ECHO "$1" | $SED "$my_sed_long_opt"` func_split_long_opt_arg=`$ECHO "$1" | $SED "$my_sed_long_arg"` } # func_split_long_opt may be replaced by extended shell implementation exit_cmd=: magic="%%%MAGIC variable%%%" magic_exe="%%%MAGIC EXE variable%%%" # Global variables. nonopt= preserve_args= lo2o="s/\\.lo\$/.${objext}/" o2lo="s/\\.${objext}\$/.lo/" extracted_archives= extracted_serial=0 # If this variable is set in any of the actions, the command in it # will be execed at the end. This prevents here-documents from being # left over by shells. exec_cmd= # func_append var value # Append VALUE to the end of shell variable VAR. func_append () { eval "${1}=\$${1}\${2}" } # func_append may be replaced by extended shell implementation # func_append_quoted var value # Quote VALUE and append to the end of shell variable VAR, separated # by a space. func_append_quoted () { func_quote_for_eval "${2}" eval "${1}=\$${1}\\ \$func_quote_for_eval_result" } # func_append_quoted may be replaced by extended shell implementation # func_arith arithmetic-term... func_arith () { func_arith_result=`expr "${@}"` } # func_arith may be replaced by extended shell implementation # func_len string # STRING may not start with a hyphen. func_len () { func_len_result=`expr "${1}" : ".*" 2>/dev/null || echo $max_cmd_len` } # func_len may be replaced by extended shell implementation # func_lo2o object func_lo2o () { func_lo2o_result=`$ECHO "${1}" | $SED "$lo2o"` } # func_lo2o may be replaced by extended shell implementation # func_xform libobj-or-source func_xform () { func_xform_result=`$ECHO "${1}" | $SED 's/\.[^.]*$/.lo/'` } # func_xform may be replaced by extended shell implementation # func_fatal_configuration arg... # Echo program name prefixed message to standard error, followed by # a configuration failure hint, and exit. func_fatal_configuration () { func_error ${1+"$@"} func_error "See the $PACKAGE documentation for more information." func_fatal_error "Fatal configuration error." } # func_config # Display the configuration for all the tags in this script. func_config () { re_begincf='^# ### BEGIN LIBTOOL' re_endcf='^# ### END LIBTOOL' # Default configuration. $SED "1,/$re_begincf CONFIG/d;/$re_endcf CONFIG/,\$d" < "$progpath" # Now print the configurations for the tags. for tagname in $taglist; do $SED -n "/$re_begincf TAG CONFIG: $tagname\$/,/$re_endcf TAG CONFIG: $tagname\$/p" < "$progpath" done exit $? } # func_features # Display the features supported by this script. func_features () { echo "host: $host" if test "$build_libtool_libs" = yes; then echo "enable shared libraries" else echo "disable shared libraries" fi if test "$build_old_libs" = yes; then echo "enable static libraries" else echo "disable static libraries" fi exit $? } # func_enable_tag tagname # Verify that TAGNAME is valid, and either flag an error and exit, or # enable the TAGNAME tag. We also add TAGNAME to the global $taglist # variable here. func_enable_tag () { # Global variable: tagname="$1" re_begincf="^# ### BEGIN LIBTOOL TAG CONFIG: $tagname\$" re_endcf="^# ### END LIBTOOL TAG CONFIG: $tagname\$" sed_extractcf="/$re_begincf/,/$re_endcf/p" # Validate tagname. case $tagname in *[!-_A-Za-z0-9,/]*) func_fatal_error "invalid tag name: $tagname" ;; esac # Don't test for the "default" C tag, as we know it's # there but not specially marked. case $tagname in CC) ;; *) if $GREP "$re_begincf" "$progpath" >/dev/null 2>&1; then taglist="$taglist $tagname" # Evaluate the configuration. Be careful to quote the path # and the sed script, to avoid splitting on whitespace, but # also don't use non-portable quotes within backquotes within # quotes we have to do it in 2 steps: extractedcf=`$SED -n -e "$sed_extractcf" < "$progpath"` eval "$extractedcf" else func_error "ignoring unknown tag $tagname" fi ;; esac } # func_check_version_match # Ensure that we are using m4 macros, and libtool script from the same # release of libtool. func_check_version_match () { if test "$package_revision" != "$macro_revision"; then if test "$VERSION" != "$macro_version"; then if test -z "$macro_version"; then cat >&2 <<_LT_EOF $progname: Version mismatch error. This is $PACKAGE $VERSION, but the $progname: definition of this LT_INIT comes from an older release. $progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION $progname: and run autoconf again. _LT_EOF else cat >&2 <<_LT_EOF $progname: Version mismatch error. This is $PACKAGE $VERSION, but the $progname: definition of this LT_INIT comes from $PACKAGE $macro_version. $progname: You should recreate aclocal.m4 with macros from $PACKAGE $VERSION $progname: and run autoconf again. _LT_EOF fi else cat >&2 <<_LT_EOF $progname: Version mismatch error. This is $PACKAGE $VERSION, revision $package_revision, $progname: but the definition of this LT_INIT comes from revision $macro_revision. $progname: You should recreate aclocal.m4 with macros from revision $package_revision $progname: of $PACKAGE $VERSION and run autoconf again. _LT_EOF fi exit $EXIT_MISMATCH fi } # Shorthand for --mode=foo, only valid as the first argument case $1 in clean|clea|cle|cl) shift; set dummy --mode clean ${1+"$@"}; shift ;; compile|compil|compi|comp|com|co|c) shift; set dummy --mode compile ${1+"$@"}; shift ;; execute|execut|execu|exec|exe|ex|e) shift; set dummy --mode execute ${1+"$@"}; shift ;; finish|finis|fini|fin|fi|f) shift; set dummy --mode finish ${1+"$@"}; shift ;; install|instal|insta|inst|ins|in|i) shift; set dummy --mode install ${1+"$@"}; shift ;; link|lin|li|l) shift; set dummy --mode link ${1+"$@"}; shift ;; uninstall|uninstal|uninsta|uninst|unins|unin|uni|un|u) shift; set dummy --mode uninstall ${1+"$@"}; shift ;; esac # Option defaults: opt_debug=: opt_dry_run=false opt_config=false opt_preserve_dup_deps=false opt_features=false opt_finish=false opt_help=false opt_help_all=false opt_silent=: opt_warning=: opt_verbose=: opt_silent=false opt_verbose=false # Parse options once, thoroughly. This comes as soon as possible in the # script to make things like `--version' happen as quickly as we can. { # this just eases exit handling while test $# -gt 0; do opt="$1" shift case $opt in --debug|-x) opt_debug='set -x' func_echo "enabling shell trace mode" $opt_debug ;; --dry-run|--dryrun|-n) opt_dry_run=: ;; --config) opt_config=: func_config ;; --dlopen|-dlopen) optarg="$1" opt_dlopen="${opt_dlopen+$opt_dlopen }$optarg" shift ;; --preserve-dup-deps) opt_preserve_dup_deps=: ;; --features) opt_features=: func_features ;; --finish) opt_finish=: set dummy --mode finish ${1+"$@"}; shift ;; --help) opt_help=: ;; --help-all) opt_help_all=: opt_help=': help-all' ;; --mode) test $# = 0 && func_missing_arg $opt && break optarg="$1" opt_mode="$optarg" case $optarg in # Valid mode arguments: clean|compile|execute|finish|install|link|relink|uninstall) ;; # Catch anything else as an error *) func_error "invalid argument for $opt" exit_cmd=exit break ;; esac shift ;; --no-silent|--no-quiet) opt_silent=false func_append preserve_args " $opt" ;; --no-warning|--no-warn) opt_warning=false func_append preserve_args " $opt" ;; --no-verbose) opt_verbose=false func_append preserve_args " $opt" ;; --silent|--quiet) opt_silent=: func_append preserve_args " $opt" opt_verbose=false ;; --verbose|-v) opt_verbose=: func_append preserve_args " $opt" opt_silent=false ;; --tag) test $# = 0 && func_missing_arg $opt && break optarg="$1" opt_tag="$optarg" func_append preserve_args " $opt $optarg" func_enable_tag "$optarg" shift ;; -\?|-h) func_usage ;; --help) func_help ;; --version) func_version ;; # Separate optargs to long options: --*=*) func_split_long_opt "$opt" set dummy "$func_split_long_opt_name" "$func_split_long_opt_arg" ${1+"$@"} shift ;; # Separate non-argument short options: -\?*|-h*|-n*|-v*) func_split_short_opt "$opt" set dummy "$func_split_short_opt_name" "-$func_split_short_opt_arg" ${1+"$@"} shift ;; --) break ;; -*) func_fatal_help "unrecognized option \`$opt'" ;; *) set dummy "$opt" ${1+"$@"}; shift; break ;; esac done # Validate options: # save first non-option argument if test "$#" -gt 0; then nonopt="$opt" shift fi # preserve --debug test "$opt_debug" = : || func_append preserve_args " --debug" case $host in *cygwin* | *mingw* | *pw32* | *cegcc*) # don't eliminate duplications in $postdeps and $predeps opt_duplicate_compiler_generated_deps=: ;; *) opt_duplicate_compiler_generated_deps=$opt_preserve_dup_deps ;; esac $opt_help || { # Sanity checks first: func_check_version_match if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then func_fatal_configuration "not configured to build any kind of library" fi # Darwin sucks eval std_shrext=\"$shrext_cmds\" # Only execute mode is allowed to have -dlopen flags. if test -n "$opt_dlopen" && test "$opt_mode" != execute; then func_error "unrecognized option \`-dlopen'" $ECHO "$help" 1>&2 exit $EXIT_FAILURE fi # Change the help message to a mode-specific one. generic_help="$help" help="Try \`$progname --help --mode=$opt_mode' for more information." } # Bail if the options were screwed $exit_cmd $EXIT_FAILURE } ## ----------- ## ## Main. ## ## ----------- ## # func_lalib_p file # True iff FILE is a libtool `.la' library or `.lo' object file. # This function is only a basic sanity check; it will hardly flush out # determined imposters. func_lalib_p () { test -f "$1" && $SED -e 4q "$1" 2>/dev/null \ | $GREP "^# Generated by .*$PACKAGE" > /dev/null 2>&1 } # func_lalib_unsafe_p file # True iff FILE is a libtool `.la' library or `.lo' object file. # This function implements the same check as func_lalib_p without # resorting to external programs. To this end, it redirects stdin and # closes it afterwards, without saving the original file descriptor. # As a safety measure, use it only where a negative result would be # fatal anyway. Works if `file' does not exist. func_lalib_unsafe_p () { lalib_p=no if test -f "$1" && test -r "$1" && exec 5<&0 <"$1"; then for lalib_p_l in 1 2 3 4 do read lalib_p_line case "$lalib_p_line" in \#\ Generated\ by\ *$PACKAGE* ) lalib_p=yes; break;; esac done exec 0<&5 5<&- fi test "$lalib_p" = yes } # func_ltwrapper_script_p file # True iff FILE is a libtool wrapper script # This function is only a basic sanity check; it will hardly flush out # determined imposters. func_ltwrapper_script_p () { func_lalib_p "$1" } # func_ltwrapper_executable_p file # True iff FILE is a libtool wrapper executable # This function is only a basic sanity check; it will hardly flush out # determined imposters. func_ltwrapper_executable_p () { func_ltwrapper_exec_suffix= case $1 in *.exe) ;; *) func_ltwrapper_exec_suffix=.exe ;; esac $GREP "$magic_exe" "$1$func_ltwrapper_exec_suffix" >/dev/null 2>&1 } # func_ltwrapper_scriptname file # Assumes file is an ltwrapper_executable # uses $file to determine the appropriate filename for a # temporary ltwrapper_script. func_ltwrapper_scriptname () { func_dirname_and_basename "$1" "" "." func_stripname '' '.exe' "$func_basename_result" func_ltwrapper_scriptname_result="$func_dirname_result/$objdir/${func_stripname_result}_ltshwrapper" } # func_ltwrapper_p file # True iff FILE is a libtool wrapper script or wrapper executable # This function is only a basic sanity check; it will hardly flush out # determined imposters. func_ltwrapper_p () { func_ltwrapper_script_p "$1" || func_ltwrapper_executable_p "$1" } # func_execute_cmds commands fail_cmd # Execute tilde-delimited COMMANDS. # If FAIL_CMD is given, eval that upon failure. # FAIL_CMD may read-access the current command in variable CMD! func_execute_cmds () { $opt_debug save_ifs=$IFS; IFS='~' for cmd in $1; do IFS=$save_ifs eval cmd=\"$cmd\" func_show_eval "$cmd" "${2-:}" done IFS=$save_ifs } # func_source file # Source FILE, adding directory component if necessary. # Note that it is not necessary on cygwin/mingw to append a dot to # FILE even if both FILE and FILE.exe exist: automatic-append-.exe # behavior happens only for exec(3), not for open(2)! Also, sourcing # `FILE.' does not work on cygwin managed mounts. func_source () { $opt_debug case $1 in */* | *\\*) . "$1" ;; *) . "./$1" ;; esac } # func_resolve_sysroot PATH # Replace a leading = in PATH with a sysroot. Store the result into # func_resolve_sysroot_result func_resolve_sysroot () { func_resolve_sysroot_result=$1 case $func_resolve_sysroot_result in =*) func_stripname '=' '' "$func_resolve_sysroot_result" func_resolve_sysroot_result=$lt_sysroot$func_stripname_result ;; esac } # func_replace_sysroot PATH # If PATH begins with the sysroot, replace it with = and # store the result into func_replace_sysroot_result. func_replace_sysroot () { case "$lt_sysroot:$1" in ?*:"$lt_sysroot"*) func_stripname "$lt_sysroot" '' "$1" func_replace_sysroot_result="=$func_stripname_result" ;; *) # Including no sysroot. func_replace_sysroot_result=$1 ;; esac } # func_infer_tag arg # Infer tagged configuration to use if any are available and # if one wasn't chosen via the "--tag" command line option. # Only attempt this if the compiler in the base compile # command doesn't match the default compiler. # arg is usually of the form 'gcc ...' func_infer_tag () { $opt_debug if test -n "$available_tags" && test -z "$tagname"; then CC_quoted= for arg in $CC; do func_append_quoted CC_quoted "$arg" done CC_expanded=`func_echo_all $CC` CC_quoted_expanded=`func_echo_all $CC_quoted` case $@ in # Blanks in the command may have been stripped by the calling shell, # but not from the CC environment variable when configure was run. " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) ;; # Blanks at the start of $base_compile will cause this to fail # if we don't check for them as well. *) for z in $available_tags; do if $GREP "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then # Evaluate the configuration. eval "`${SED} -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`" CC_quoted= for arg in $CC; do # Double-quote args containing other shell metacharacters. func_append_quoted CC_quoted "$arg" done CC_expanded=`func_echo_all $CC` CC_quoted_expanded=`func_echo_all $CC_quoted` case "$@ " in " $CC "* | "$CC "* | " $CC_expanded "* | "$CC_expanded "* | \ " $CC_quoted"* | "$CC_quoted "* | " $CC_quoted_expanded "* | "$CC_quoted_expanded "*) # The compiler in the base compile command matches # the one in the tagged configuration. # Assume this is the tagged configuration we want. tagname=$z break ;; esac fi done # If $tagname still isn't set, then no tagged configuration # was found and let the user know that the "--tag" command # line option must be used. if test -z "$tagname"; then func_echo "unable to infer tagged configuration" func_fatal_error "specify a tag with \`--tag'" # else # func_verbose "using $tagname tagged configuration" fi ;; esac fi } # func_write_libtool_object output_name pic_name nonpic_name # Create a libtool object file (analogous to a ".la" file), # but don't create it if we're doing a dry run. func_write_libtool_object () { write_libobj=${1} if test "$build_libtool_libs" = yes; then write_lobj=\'${2}\' else write_lobj=none fi if test "$build_old_libs" = yes; then write_oldobj=\'${3}\' else write_oldobj=none fi $opt_dry_run || { cat >${write_libobj}T </dev/null` if test "$?" -eq 0 && test -n "${func_convert_core_file_wine_to_w32_tmp}"; then func_convert_core_file_wine_to_w32_result=`$ECHO "$func_convert_core_file_wine_to_w32_tmp" | $SED -e "$lt_sed_naive_backslashify"` else func_convert_core_file_wine_to_w32_result= fi fi } # end: func_convert_core_file_wine_to_w32 # func_convert_core_path_wine_to_w32 ARG # Helper function used by path conversion functions when $build is *nix, and # $host is mingw, cygwin, or some other w32 environment. Relies on a correctly # configured wine environment available, with the winepath program in $build's # $PATH. Assumes ARG has no leading or trailing path separator characters. # # ARG is path to be converted from $build format to win32. # Result is available in $func_convert_core_path_wine_to_w32_result. # Unconvertible file (directory) names in ARG are skipped; if no directory names # are convertible, then the result may be empty. func_convert_core_path_wine_to_w32 () { $opt_debug # unfortunately, winepath doesn't convert paths, only file names func_convert_core_path_wine_to_w32_result="" if test -n "$1"; then oldIFS=$IFS IFS=: for func_convert_core_path_wine_to_w32_f in $1; do IFS=$oldIFS func_convert_core_file_wine_to_w32 "$func_convert_core_path_wine_to_w32_f" if test -n "$func_convert_core_file_wine_to_w32_result" ; then if test -z "$func_convert_core_path_wine_to_w32_result"; then func_convert_core_path_wine_to_w32_result="$func_convert_core_file_wine_to_w32_result" else func_append func_convert_core_path_wine_to_w32_result ";$func_convert_core_file_wine_to_w32_result" fi fi done IFS=$oldIFS fi } # end: func_convert_core_path_wine_to_w32 # func_cygpath ARGS... # Wrapper around calling the cygpath program via LT_CYGPATH. This is used when # when (1) $build is *nix and Cygwin is hosted via a wine environment; or (2) # $build is MSYS and $host is Cygwin, or (3) $build is Cygwin. In case (1) or # (2), returns the Cygwin file name or path in func_cygpath_result (input # file name or path is assumed to be in w32 format, as previously converted # from $build's *nix or MSYS format). In case (3), returns the w32 file name # or path in func_cygpath_result (input file name or path is assumed to be in # Cygwin format). Returns an empty string on error. # # ARGS are passed to cygpath, with the last one being the file name or path to # be converted. # # Specify the absolute *nix (or w32) name to cygpath in the LT_CYGPATH # environment variable; do not put it in $PATH. func_cygpath () { $opt_debug if test -n "$LT_CYGPATH" && test -f "$LT_CYGPATH"; then func_cygpath_result=`$LT_CYGPATH "$@" 2>/dev/null` if test "$?" -ne 0; then # on failure, ensure result is empty func_cygpath_result= fi else func_cygpath_result= func_error "LT_CYGPATH is empty or specifies non-existent file: \`$LT_CYGPATH'" fi } #end: func_cygpath # func_convert_core_msys_to_w32 ARG # Convert file name or path ARG from MSYS format to w32 format. Return # result in func_convert_core_msys_to_w32_result. func_convert_core_msys_to_w32 () { $opt_debug # awkward: cmd appends spaces to result func_convert_core_msys_to_w32_result=`( cmd //c echo "$1" ) 2>/dev/null | $SED -e 's/[ ]*$//' -e "$lt_sed_naive_backslashify"` } #end: func_convert_core_msys_to_w32 # func_convert_file_check ARG1 ARG2 # Verify that ARG1 (a file name in $build format) was converted to $host # format in ARG2. Otherwise, emit an error message, but continue (resetting # func_to_host_file_result to ARG1). func_convert_file_check () { $opt_debug if test -z "$2" && test -n "$1" ; then func_error "Could not determine host file name corresponding to" func_error " \`$1'" func_error "Continuing, but uninstalled executables may not work." # Fallback: func_to_host_file_result="$1" fi } # end func_convert_file_check # func_convert_path_check FROM_PATHSEP TO_PATHSEP FROM_PATH TO_PATH # Verify that FROM_PATH (a path in $build format) was converted to $host # format in TO_PATH. Otherwise, emit an error message, but continue, resetting # func_to_host_file_result to a simplistic fallback value (see below). func_convert_path_check () { $opt_debug if test -z "$4" && test -n "$3"; then func_error "Could not determine the host path corresponding to" func_error " \`$3'" func_error "Continuing, but uninstalled executables may not work." # Fallback. This is a deliberately simplistic "conversion" and # should not be "improved". See libtool.info. if test "x$1" != "x$2"; then lt_replace_pathsep_chars="s|$1|$2|g" func_to_host_path_result=`echo "$3" | $SED -e "$lt_replace_pathsep_chars"` else func_to_host_path_result="$3" fi fi } # end func_convert_path_check # func_convert_path_front_back_pathsep FRONTPAT BACKPAT REPL ORIG # Modifies func_to_host_path_result by prepending REPL if ORIG matches FRONTPAT # and appending REPL if ORIG matches BACKPAT. func_convert_path_front_back_pathsep () { $opt_debug case $4 in $1 ) func_to_host_path_result="$3$func_to_host_path_result" ;; esac case $4 in $2 ) func_append func_to_host_path_result "$3" ;; esac } # end func_convert_path_front_back_pathsep ################################################## # $build to $host FILE NAME CONVERSION FUNCTIONS # ################################################## # invoked via `$to_host_file_cmd ARG' # # In each case, ARG is the path to be converted from $build to $host format. # Result will be available in $func_to_host_file_result. # func_to_host_file ARG # Converts the file name ARG from $build format to $host format. Return result # in func_to_host_file_result. func_to_host_file () { $opt_debug $to_host_file_cmd "$1" } # end func_to_host_file # func_to_tool_file ARG LAZY # converts the file name ARG from $build format to toolchain format. Return # result in func_to_tool_file_result. If the conversion in use is listed # in (the comma separated) LAZY, no conversion takes place. func_to_tool_file () { $opt_debug case ,$2, in *,"$to_tool_file_cmd",*) func_to_tool_file_result=$1 ;; *) $to_tool_file_cmd "$1" func_to_tool_file_result=$func_to_host_file_result ;; esac } # end func_to_tool_file # func_convert_file_noop ARG # Copy ARG to func_to_host_file_result. func_convert_file_noop () { func_to_host_file_result="$1" } # end func_convert_file_noop # func_convert_file_msys_to_w32 ARG # Convert file name ARG from (mingw) MSYS to (mingw) w32 format; automatic # conversion to w32 is not available inside the cwrapper. Returns result in # func_to_host_file_result. func_convert_file_msys_to_w32 () { $opt_debug func_to_host_file_result="$1" if test -n "$1"; then func_convert_core_msys_to_w32 "$1" func_to_host_file_result="$func_convert_core_msys_to_w32_result" fi func_convert_file_check "$1" "$func_to_host_file_result" } # end func_convert_file_msys_to_w32 # func_convert_file_cygwin_to_w32 ARG # Convert file name ARG from Cygwin to w32 format. Returns result in # func_to_host_file_result. func_convert_file_cygwin_to_w32 () { $opt_debug func_to_host_file_result="$1" if test -n "$1"; then # because $build is cygwin, we call "the" cygpath in $PATH; no need to use # LT_CYGPATH in this case. func_to_host_file_result=`cygpath -m "$1"` fi func_convert_file_check "$1" "$func_to_host_file_result" } # end func_convert_file_cygwin_to_w32 # func_convert_file_nix_to_w32 ARG # Convert file name ARG from *nix to w32 format. Requires a wine environment # and a working winepath. Returns result in func_to_host_file_result. func_convert_file_nix_to_w32 () { $opt_debug func_to_host_file_result="$1" if test -n "$1"; then func_convert_core_file_wine_to_w32 "$1" func_to_host_file_result="$func_convert_core_file_wine_to_w32_result" fi func_convert_file_check "$1" "$func_to_host_file_result" } # end func_convert_file_nix_to_w32 # func_convert_file_msys_to_cygwin ARG # Convert file name ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. # Returns result in func_to_host_file_result. func_convert_file_msys_to_cygwin () { $opt_debug func_to_host_file_result="$1" if test -n "$1"; then func_convert_core_msys_to_w32 "$1" func_cygpath -u "$func_convert_core_msys_to_w32_result" func_to_host_file_result="$func_cygpath_result" fi func_convert_file_check "$1" "$func_to_host_file_result" } # end func_convert_file_msys_to_cygwin # func_convert_file_nix_to_cygwin ARG # Convert file name ARG from *nix to Cygwin format. Requires Cygwin installed # in a wine environment, working winepath, and LT_CYGPATH set. Returns result # in func_to_host_file_result. func_convert_file_nix_to_cygwin () { $opt_debug func_to_host_file_result="$1" if test -n "$1"; then # convert from *nix to w32, then use cygpath to convert from w32 to cygwin. func_convert_core_file_wine_to_w32 "$1" func_cygpath -u "$func_convert_core_file_wine_to_w32_result" func_to_host_file_result="$func_cygpath_result" fi func_convert_file_check "$1" "$func_to_host_file_result" } # end func_convert_file_nix_to_cygwin ############################################# # $build to $host PATH CONVERSION FUNCTIONS # ############################################# # invoked via `$to_host_path_cmd ARG' # # In each case, ARG is the path to be converted from $build to $host format. # The result will be available in $func_to_host_path_result. # # Path separators are also converted from $build format to $host format. If # ARG begins or ends with a path separator character, it is preserved (but # converted to $host format) on output. # # All path conversion functions are named using the following convention: # file name conversion function : func_convert_file_X_to_Y () # path conversion function : func_convert_path_X_to_Y () # where, for any given $build/$host combination the 'X_to_Y' value is the # same. If conversion functions are added for new $build/$host combinations, # the two new functions must follow this pattern, or func_init_to_host_path_cmd # will break. # func_init_to_host_path_cmd # Ensures that function "pointer" variable $to_host_path_cmd is set to the # appropriate value, based on the value of $to_host_file_cmd. to_host_path_cmd= func_init_to_host_path_cmd () { $opt_debug if test -z "$to_host_path_cmd"; then func_stripname 'func_convert_file_' '' "$to_host_file_cmd" to_host_path_cmd="func_convert_path_${func_stripname_result}" fi } # func_to_host_path ARG # Converts the path ARG from $build format to $host format. Return result # in func_to_host_path_result. func_to_host_path () { $opt_debug func_init_to_host_path_cmd $to_host_path_cmd "$1" } # end func_to_host_path # func_convert_path_noop ARG # Copy ARG to func_to_host_path_result. func_convert_path_noop () { func_to_host_path_result="$1" } # end func_convert_path_noop # func_convert_path_msys_to_w32 ARG # Convert path ARG from (mingw) MSYS to (mingw) w32 format; automatic # conversion to w32 is not available inside the cwrapper. Returns result in # func_to_host_path_result. func_convert_path_msys_to_w32 () { $opt_debug func_to_host_path_result="$1" if test -n "$1"; then # Remove leading and trailing path separator characters from ARG. MSYS # behavior is inconsistent here; cygpath turns them into '.;' and ';.'; # and winepath ignores them completely. func_stripname : : "$1" func_to_host_path_tmp1=$func_stripname_result func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" func_to_host_path_result="$func_convert_core_msys_to_w32_result" func_convert_path_check : ";" \ "$func_to_host_path_tmp1" "$func_to_host_path_result" func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" fi } # end func_convert_path_msys_to_w32 # func_convert_path_cygwin_to_w32 ARG # Convert path ARG from Cygwin to w32 format. Returns result in # func_to_host_file_result. func_convert_path_cygwin_to_w32 () { $opt_debug func_to_host_path_result="$1" if test -n "$1"; then # See func_convert_path_msys_to_w32: func_stripname : : "$1" func_to_host_path_tmp1=$func_stripname_result func_to_host_path_result=`cygpath -m -p "$func_to_host_path_tmp1"` func_convert_path_check : ";" \ "$func_to_host_path_tmp1" "$func_to_host_path_result" func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" fi } # end func_convert_path_cygwin_to_w32 # func_convert_path_nix_to_w32 ARG # Convert path ARG from *nix to w32 format. Requires a wine environment and # a working winepath. Returns result in func_to_host_file_result. func_convert_path_nix_to_w32 () { $opt_debug func_to_host_path_result="$1" if test -n "$1"; then # See func_convert_path_msys_to_w32: func_stripname : : "$1" func_to_host_path_tmp1=$func_stripname_result func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" func_to_host_path_result="$func_convert_core_path_wine_to_w32_result" func_convert_path_check : ";" \ "$func_to_host_path_tmp1" "$func_to_host_path_result" func_convert_path_front_back_pathsep ":*" "*:" ";" "$1" fi } # end func_convert_path_nix_to_w32 # func_convert_path_msys_to_cygwin ARG # Convert path ARG from MSYS to Cygwin format. Requires LT_CYGPATH set. # Returns result in func_to_host_file_result. func_convert_path_msys_to_cygwin () { $opt_debug func_to_host_path_result="$1" if test -n "$1"; then # See func_convert_path_msys_to_w32: func_stripname : : "$1" func_to_host_path_tmp1=$func_stripname_result func_convert_core_msys_to_w32 "$func_to_host_path_tmp1" func_cygpath -u -p "$func_convert_core_msys_to_w32_result" func_to_host_path_result="$func_cygpath_result" func_convert_path_check : : \ "$func_to_host_path_tmp1" "$func_to_host_path_result" func_convert_path_front_back_pathsep ":*" "*:" : "$1" fi } # end func_convert_path_msys_to_cygwin # func_convert_path_nix_to_cygwin ARG # Convert path ARG from *nix to Cygwin format. Requires Cygwin installed in a # a wine environment, working winepath, and LT_CYGPATH set. Returns result in # func_to_host_file_result. func_convert_path_nix_to_cygwin () { $opt_debug func_to_host_path_result="$1" if test -n "$1"; then # Remove leading and trailing path separator characters from # ARG. msys behavior is inconsistent here, cygpath turns them # into '.;' and ';.', and winepath ignores them completely. func_stripname : : "$1" func_to_host_path_tmp1=$func_stripname_result func_convert_core_path_wine_to_w32 "$func_to_host_path_tmp1" func_cygpath -u -p "$func_convert_core_path_wine_to_w32_result" func_to_host_path_result="$func_cygpath_result" func_convert_path_check : : \ "$func_to_host_path_tmp1" "$func_to_host_path_result" func_convert_path_front_back_pathsep ":*" "*:" : "$1" fi } # end func_convert_path_nix_to_cygwin # func_mode_compile arg... func_mode_compile () { $opt_debug # Get the compilation command and the source file. base_compile= srcfile="$nonopt" # always keep a non-empty value in "srcfile" suppress_opt=yes suppress_output= arg_mode=normal libobj= later= pie_flag= for arg do case $arg_mode in arg ) # do not "continue". Instead, add this to base_compile lastarg="$arg" arg_mode=normal ;; target ) libobj="$arg" arg_mode=normal continue ;; normal ) # Accept any command-line options. case $arg in -o) test -n "$libobj" && \ func_fatal_error "you cannot specify \`-o' more than once" arg_mode=target continue ;; -pie | -fpie | -fPIE) func_append pie_flag " $arg" continue ;; -shared | -static | -prefer-pic | -prefer-non-pic) func_append later " $arg" continue ;; -no-suppress) suppress_opt=no continue ;; -Xcompiler) arg_mode=arg # the next one goes into the "base_compile" arg list continue # The current "srcfile" will either be retained or ;; # replaced later. I would guess that would be a bug. -Wc,*) func_stripname '-Wc,' '' "$arg" args=$func_stripname_result lastarg= save_ifs="$IFS"; IFS=',' for arg in $args; do IFS="$save_ifs" func_append_quoted lastarg "$arg" done IFS="$save_ifs" func_stripname ' ' '' "$lastarg" lastarg=$func_stripname_result # Add the arguments to base_compile. func_append base_compile " $lastarg" continue ;; *) # Accept the current argument as the source file. # The previous "srcfile" becomes the current argument. # lastarg="$srcfile" srcfile="$arg" ;; esac # case $arg ;; esac # case $arg_mode # Aesthetically quote the previous argument. func_append_quoted base_compile "$lastarg" done # for arg case $arg_mode in arg) func_fatal_error "you must specify an argument for -Xcompile" ;; target) func_fatal_error "you must specify a target with \`-o'" ;; *) # Get the name of the library object. test -z "$libobj" && { func_basename "$srcfile" libobj="$func_basename_result" } ;; esac # Recognize several different file suffixes. # If the user specifies -o file.o, it is replaced with file.lo case $libobj in *.[cCFSifmso] | \ *.ada | *.adb | *.ads | *.asm | \ *.c++ | *.cc | *.ii | *.class | *.cpp | *.cxx | \ *.[fF][09]? | *.for | *.java | *.go | *.obj | *.sx | *.cu | *.cup) func_xform "$libobj" libobj=$func_xform_result ;; esac case $libobj in *.lo) func_lo2o "$libobj"; obj=$func_lo2o_result ;; *) func_fatal_error "cannot determine name of library object from \`$libobj'" ;; esac func_infer_tag $base_compile for arg in $later; do case $arg in -shared) test "$build_libtool_libs" != yes && \ func_fatal_configuration "can not build a shared library" build_old_libs=no continue ;; -static) build_libtool_libs=no build_old_libs=yes continue ;; -prefer-pic) pic_mode=yes continue ;; -prefer-non-pic) pic_mode=no continue ;; esac done func_quote_for_eval "$libobj" test "X$libobj" != "X$func_quote_for_eval_result" \ && $ECHO "X$libobj" | $GREP '[]~#^*{};<>?"'"'"' &()|`$[]' \ && func_warning "libobj name \`$libobj' may not contain shell special characters." func_dirname_and_basename "$obj" "/" "" objname="$func_basename_result" xdir="$func_dirname_result" lobj=${xdir}$objdir/$objname test -z "$base_compile" && \ func_fatal_help "you must specify a compilation command" # Delete any leftover library objects. if test "$build_old_libs" = yes; then removelist="$obj $lobj $libobj ${libobj}T" else removelist="$lobj $libobj ${libobj}T" fi # On Cygwin there's no "real" PIC flag so we must build both object types case $host_os in cygwin* | mingw* | pw32* | os2* | cegcc*) pic_mode=default ;; esac if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then # non-PIC code in shared libraries is not supported pic_mode=default fi # Calculate the filename of the output object if compiler does # not support -o with -c if test "$compiler_c_o" = no; then output_obj=`$ECHO "$srcfile" | $SED 's%^.*/%%; s%\.[^.]*$%%'`.${objext} lockfile="$output_obj.lock" else output_obj= need_locks=no lockfile= fi # Lock this critical section if it is needed # We use this script file to make the link, it avoids creating a new file if test "$need_locks" = yes; then until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do func_echo "Waiting for $lockfile to be removed" sleep 2 done elif test "$need_locks" = warn; then if test -f "$lockfile"; then $ECHO "\ *** ERROR, $lockfile exists and contains: `cat $lockfile 2>/dev/null` This indicates that another process is trying to use the same temporary object file, and libtool could not work around it because your compiler does not support \`-c' and \`-o' together. If you repeat this compilation, it may succeed, by chance, but you had better avoid parallel builds (make -j) in this platform, or get a better compiler." $opt_dry_run || $RM $removelist exit $EXIT_FAILURE fi func_append removelist " $output_obj" $ECHO "$srcfile" > "$lockfile" fi $opt_dry_run || $RM $removelist func_append removelist " $lockfile" trap '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' 1 2 15 func_to_tool_file "$srcfile" func_convert_file_msys_to_w32 srcfile=$func_to_tool_file_result func_quote_for_eval "$srcfile" qsrcfile=$func_quote_for_eval_result # Only build a PIC object if we are building libtool libraries. if test "$build_libtool_libs" = yes; then # Without this assignment, base_compile gets emptied. fbsd_hideous_sh_bug=$base_compile if test "$pic_mode" != no; then command="$base_compile $qsrcfile $pic_flag" else # Don't build PIC code command="$base_compile $qsrcfile" fi func_mkdir_p "$xdir$objdir" if test -z "$output_obj"; then # Place PIC objects in $objdir func_append command " -o $lobj" fi func_show_eval_locale "$command" \ 'test -n "$output_obj" && $RM $removelist; exit $EXIT_FAILURE' if test "$need_locks" = warn && test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then $ECHO "\ *** ERROR, $lockfile contains: `cat $lockfile 2>/dev/null` but it should contain: $srcfile This indicates that another process is trying to use the same temporary object file, and libtool could not work around it because your compiler does not support \`-c' and \`-o' together. If you repeat this compilation, it may succeed, by chance, but you had better avoid parallel builds (make -j) in this platform, or get a better compiler." $opt_dry_run || $RM $removelist exit $EXIT_FAILURE fi # Just move the object if needed, then go on to compile the next one if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then func_show_eval '$MV "$output_obj" "$lobj"' \ 'error=$?; $opt_dry_run || $RM $removelist; exit $error' fi # Allow error messages only from the first compilation. if test "$suppress_opt" = yes; then suppress_output=' >/dev/null 2>&1' fi fi # Only build a position-dependent object if we build old libraries. if test "$build_old_libs" = yes; then if test "$pic_mode" != yes; then # Don't build PIC code command="$base_compile $qsrcfile$pie_flag" else command="$base_compile $qsrcfile $pic_flag" fi if test "$compiler_c_o" = yes; then func_append command " -o $obj" fi # Suppress compiler output if we already did a PIC compilation. func_append command "$suppress_output" func_show_eval_locale "$command" \ '$opt_dry_run || $RM $removelist; exit $EXIT_FAILURE' if test "$need_locks" = warn && test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then $ECHO "\ *** ERROR, $lockfile contains: `cat $lockfile 2>/dev/null` but it should contain: $srcfile This indicates that another process is trying to use the same temporary object file, and libtool could not work around it because your compiler does not support \`-c' and \`-o' together. If you repeat this compilation, it may succeed, by chance, but you had better avoid parallel builds (make -j) in this platform, or get a better compiler." $opt_dry_run || $RM $removelist exit $EXIT_FAILURE fi # Just move the object if needed if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then func_show_eval '$MV "$output_obj" "$obj"' \ 'error=$?; $opt_dry_run || $RM $removelist; exit $error' fi fi $opt_dry_run || { func_write_libtool_object "$libobj" "$objdir/$objname" "$objname" # Unlock the critical section if it was locked if test "$need_locks" != no; then removelist=$lockfile $RM "$lockfile" fi } exit $EXIT_SUCCESS } $opt_help || { test "$opt_mode" = compile && func_mode_compile ${1+"$@"} } func_mode_help () { # We need to display help for each of the modes. case $opt_mode in "") # Generic help is extracted from the usage comments # at the start of this file. func_help ;; clean) $ECHO \ "Usage: $progname [OPTION]... --mode=clean RM [RM-OPTION]... FILE... Remove files from the build directory. RM is the name of the program to use to delete files associated with each FILE (typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed to RM. If FILE is a libtool library, object or program, all the files associated with it are deleted. Otherwise, only FILE itself is deleted using RM." ;; compile) $ECHO \ "Usage: $progname [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE Compile a source file into a libtool library object. This mode accepts the following additional options: -o OUTPUT-FILE set the output file name to OUTPUT-FILE -no-suppress do not suppress compiler output for multiple passes -prefer-pic try to build PIC objects only -prefer-non-pic try to build non-PIC objects only -shared do not build a \`.o' file suitable for static linking -static only build a \`.o' file suitable for static linking -Wc,FLAG pass FLAG directly to the compiler COMPILE-COMMAND is a command to be used in creating a \`standard' object file from the given SOURCEFILE. The output file name is determined by removing the directory component from SOURCEFILE, then substituting the C source code suffix \`.c' with the library object suffix, \`.lo'." ;; execute) $ECHO \ "Usage: $progname [OPTION]... --mode=execute COMMAND [ARGS]... Automatically set library path, then run a program. This mode accepts the following additional options: -dlopen FILE add the directory containing FILE to the library path This mode sets the library path environment variable according to \`-dlopen' flags. If any of the ARGS are libtool executable wrappers, then they are translated into their corresponding uninstalled binary, and any of their required library directories are added to the library path. Then, COMMAND is executed, with ARGS as arguments." ;; finish) $ECHO \ "Usage: $progname [OPTION]... --mode=finish [LIBDIR]... Complete the installation of libtool libraries. Each LIBDIR is a directory that contains libtool libraries. The commands that this mode executes may require superuser privileges. Use the \`--dry-run' option if you just want to see what would be executed." ;; install) $ECHO \ "Usage: $progname [OPTION]... --mode=install INSTALL-COMMAND... Install executables or libraries. INSTALL-COMMAND is the installation command. The first component should be either the \`install' or \`cp' program. The following components of INSTALL-COMMAND are treated specially: -inst-prefix-dir PREFIX-DIR Use PREFIX-DIR as a staging area for installation The rest of the components are interpreted as arguments to that command (only BSD-compatible install options are recognized)." ;; link) $ECHO \ "Usage: $progname [OPTION]... --mode=link LINK-COMMAND... Link object files or libraries together to form another library, or to create an executable program. LINK-COMMAND is a command using the C compiler that you would use to create a program from several object files. The following components of LINK-COMMAND are treated specially: -all-static do not do any dynamic linking at all -avoid-version do not add a version suffix if possible -bindir BINDIR specify path to binaries directory (for systems where libraries must be found in the PATH setting at runtime) -dlopen FILE \`-dlpreopen' FILE if it cannot be dlopened at runtime -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3) -export-symbols SYMFILE try to export only the symbols listed in SYMFILE -export-symbols-regex REGEX try to export only the symbols matching REGEX -LLIBDIR search LIBDIR for required installed libraries -lNAME OUTPUT-FILE requires the installed library libNAME -module build a library that can dlopened -no-fast-install disable the fast-install mode -no-install link a not-installable executable -no-undefined declare that a library does not refer to external symbols -o OUTPUT-FILE create OUTPUT-FILE from the specified objects -objectlist FILE Use a list of object files found in FILE to specify objects -precious-files-regex REGEX don't remove output files matching REGEX -release RELEASE specify package release information -rpath LIBDIR the created library will eventually be installed in LIBDIR -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries -shared only do dynamic linking of libtool libraries -shrext SUFFIX override the standard shared library file extension -static do not do any dynamic linking of uninstalled libtool libraries -static-libtool-libs do not do any dynamic linking of libtool libraries -version-info CURRENT[:REVISION[:AGE]] specify library version info [each variable defaults to 0] -weak LIBNAME declare that the target provides the LIBNAME interface -Wc,FLAG -Xcompiler FLAG pass linker-specific FLAG directly to the compiler -Wl,FLAG -Xlinker FLAG pass linker-specific FLAG directly to the linker -XCClinker FLAG pass link-specific FLAG to the compiler driver (CC) All other options (arguments beginning with \`-') are ignored. Every other argument is treated as a filename. Files ending in \`.la' are treated as uninstalled libtool libraries, other files are standard or library object files. If the OUTPUT-FILE ends in \`.la', then a libtool library is created, only library objects (\`.lo' files) may be specified, and \`-rpath' is required, except when creating a convenience library. If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created using \`ar' and \`ranlib', or on Windows using \`lib'. If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file is created, otherwise an executable program is created." ;; uninstall) $ECHO \ "Usage: $progname [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE... Remove libraries from an installation directory. RM is the name of the program to use to delete files associated with each FILE (typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed to RM. If FILE is a libtool library, all the files associated with it are deleted. Otherwise, only FILE itself is deleted using RM." ;; *) func_fatal_help "invalid operation mode \`$opt_mode'" ;; esac echo $ECHO "Try \`$progname --help' for more information about other modes." } # Now that we've collected a possible --mode arg, show help if necessary if $opt_help; then if test "$opt_help" = :; then func_mode_help else { func_help noexit for opt_mode in compile link execute install finish uninstall clean; do func_mode_help done } | sed -n '1p; 2,$s/^Usage:/ or: /p' { func_help noexit for opt_mode in compile link execute install finish uninstall clean; do echo func_mode_help done } | sed '1d /^When reporting/,/^Report/{ H d } $x /information about other modes/d /more detailed .*MODE/d s/^Usage:.*--mode=\([^ ]*\) .*/Description of \1 mode:/' fi exit $? fi # func_mode_execute arg... func_mode_execute () { $opt_debug # The first argument is the command name. cmd="$nonopt" test -z "$cmd" && \ func_fatal_help "you must specify a COMMAND" # Handle -dlopen flags immediately. for file in $opt_dlopen; do test -f "$file" \ || func_fatal_help "\`$file' is not a file" dir= case $file in *.la) func_resolve_sysroot "$file" file=$func_resolve_sysroot_result # Check to see that this really is a libtool archive. func_lalib_unsafe_p "$file" \ || func_fatal_help "\`$lib' is not a valid libtool archive" # Read the libtool library. dlname= library_names= func_source "$file" # Skip this library if it cannot be dlopened. if test -z "$dlname"; then # Warn if it was a shared library. test -n "$library_names" && \ func_warning "\`$file' was not linked with \`-export-dynamic'" continue fi func_dirname "$file" "" "." dir="$func_dirname_result" if test -f "$dir/$objdir/$dlname"; then func_append dir "/$objdir" else if test ! -f "$dir/$dlname"; then func_fatal_error "cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'" fi fi ;; *.lo) # Just add the directory containing the .lo file. func_dirname "$file" "" "." dir="$func_dirname_result" ;; *) func_warning "\`-dlopen' is ignored for non-libtool libraries and objects" continue ;; esac # Get the absolute pathname. absdir=`cd "$dir" && pwd` test -n "$absdir" && dir="$absdir" # Now add the directory to shlibpath_var. if eval "test -z \"\$$shlibpath_var\""; then eval "$shlibpath_var=\"\$dir\"" else eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\"" fi done # This variable tells wrapper scripts just to set shlibpath_var # rather than running their programs. libtool_execute_magic="$magic" # Check if any of the arguments is a wrapper script. args= for file do case $file in -* | *.la | *.lo ) ;; *) # Do a test to see if this is really a libtool program. if func_ltwrapper_script_p "$file"; then func_source "$file" # Transform arg to wrapped name. file="$progdir/$program" elif func_ltwrapper_executable_p "$file"; then func_ltwrapper_scriptname "$file" func_source "$func_ltwrapper_scriptname_result" # Transform arg to wrapped name. file="$progdir/$program" fi ;; esac # Quote arguments (to preserve shell metacharacters). func_append_quoted args "$file" done if test "X$opt_dry_run" = Xfalse; then if test -n "$shlibpath_var"; then # Export the shlibpath_var. eval "export $shlibpath_var" fi # Restore saved environment variables for lt_var in LANG LANGUAGE LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES do eval "if test \"\${save_$lt_var+set}\" = set; then $lt_var=\$save_$lt_var; export $lt_var else $lt_unset $lt_var fi" done # Now prepare to actually exec the command. exec_cmd="\$cmd$args" else # Display what would be done. if test -n "$shlibpath_var"; then eval "\$ECHO \"\$shlibpath_var=\$$shlibpath_var\"" echo "export $shlibpath_var" fi $ECHO "$cmd$args" exit $EXIT_SUCCESS fi } test "$opt_mode" = execute && func_mode_execute ${1+"$@"} # func_mode_finish arg... func_mode_finish () { $opt_debug libs= libdirs= admincmds= for opt in "$nonopt" ${1+"$@"} do if test -d "$opt"; then func_append libdirs " $opt" elif test -f "$opt"; then if func_lalib_unsafe_p "$opt"; then func_append libs " $opt" else func_warning "\`$opt' is not a valid libtool archive" fi else func_fatal_error "invalid argument \`$opt'" fi done if test -n "$libs"; then if test -n "$lt_sysroot"; then sysroot_regex=`$ECHO "$lt_sysroot" | $SED "$sed_make_literal_regex"` sysroot_cmd="s/\([ ']\)$sysroot_regex/\1/g;" else sysroot_cmd= fi # Remove sysroot references if $opt_dry_run; then for lib in $libs; do echo "removing references to $lt_sysroot and \`=' prefixes from $lib" done else tmpdir=`func_mktempdir` for lib in $libs; do sed -e "${sysroot_cmd} s/\([ ']-[LR]\)=/\1/g; s/\([ ']\)=/\1/g" $lib \ > $tmpdir/tmp-la mv -f $tmpdir/tmp-la $lib done ${RM}r "$tmpdir" fi fi if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then for libdir in $libdirs; do if test -n "$finish_cmds"; then # Do each command in the finish commands. func_execute_cmds "$finish_cmds" 'admincmds="$admincmds '"$cmd"'"' fi if test -n "$finish_eval"; then # Do the single finish_eval. eval cmds=\"$finish_eval\" $opt_dry_run || eval "$cmds" || func_append admincmds " $cmds" fi done fi # Exit here if they wanted silent mode. $opt_silent && exit $EXIT_SUCCESS if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then echo "----------------------------------------------------------------------" echo "Libraries have been installed in:" for libdir in $libdirs; do $ECHO " $libdir" done echo echo "If you ever happen to want to link against installed libraries" echo "in a given directory, LIBDIR, you must either use libtool, and" echo "specify the full pathname of the library, or use the \`-LLIBDIR'" echo "flag during linking and do at least one of the following:" if test -n "$shlibpath_var"; then echo " - add LIBDIR to the \`$shlibpath_var' environment variable" echo " during execution" fi if test -n "$runpath_var"; then echo " - add LIBDIR to the \`$runpath_var' environment variable" echo " during linking" fi if test -n "$hardcode_libdir_flag_spec"; then libdir=LIBDIR eval flag=\"$hardcode_libdir_flag_spec\" $ECHO " - use the \`$flag' linker flag" fi if test -n "$admincmds"; then $ECHO " - have your system administrator run these commands:$admincmds" fi if test -f /etc/ld.so.conf; then echo " - have your system administrator add LIBDIR to \`/etc/ld.so.conf'" fi echo echo "See any operating system documentation about shared libraries for" case $host in solaris2.[6789]|solaris2.1[0-9]) echo "more information, such as the ld(1), crle(1) and ld.so(8) manual" echo "pages." ;; *) echo "more information, such as the ld(1) and ld.so(8) manual pages." ;; esac echo "----------------------------------------------------------------------" fi exit $EXIT_SUCCESS } test "$opt_mode" = finish && func_mode_finish ${1+"$@"} # func_mode_install arg... func_mode_install () { $opt_debug # There may be an optional sh(1) argument at the beginning of # install_prog (especially on Windows NT). if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh || # Allow the use of GNU shtool's install command. case $nonopt in *shtool*) :;; *) false;; esac; then # Aesthetically quote it. func_quote_for_eval "$nonopt" install_prog="$func_quote_for_eval_result " arg=$1 shift else install_prog= arg=$nonopt fi # The real first argument should be the name of the installation program. # Aesthetically quote it. func_quote_for_eval "$arg" func_append install_prog "$func_quote_for_eval_result" install_shared_prog=$install_prog case " $install_prog " in *[\\\ /]cp\ *) install_cp=: ;; *) install_cp=false ;; esac # We need to accept at least all the BSD install flags. dest= files= opts= prev= install_type= isdir=no stripme= no_mode=: for arg do arg2= if test -n "$dest"; then func_append files " $dest" dest=$arg continue fi case $arg in -d) isdir=yes ;; -f) if $install_cp; then :; else prev=$arg fi ;; -g | -m | -o) prev=$arg ;; -s) stripme=" -s" continue ;; -*) ;; *) # If the previous option needed an argument, then skip it. if test -n "$prev"; then if test "x$prev" = x-m && test -n "$install_override_mode"; then arg2=$install_override_mode no_mode=false fi prev= else dest=$arg continue fi ;; esac # Aesthetically quote the argument. func_quote_for_eval "$arg" func_append install_prog " $func_quote_for_eval_result" if test -n "$arg2"; then func_quote_for_eval "$arg2" fi func_append install_shared_prog " $func_quote_for_eval_result" done test -z "$install_prog" && \ func_fatal_help "you must specify an install program" test -n "$prev" && \ func_fatal_help "the \`$prev' option requires an argument" if test -n "$install_override_mode" && $no_mode; then if $install_cp; then :; else func_quote_for_eval "$install_override_mode" func_append install_shared_prog " -m $func_quote_for_eval_result" fi fi if test -z "$files"; then if test -z "$dest"; then func_fatal_help "no file or destination specified" else func_fatal_help "you must specify a destination" fi fi # Strip any trailing slash from the destination. func_stripname '' '/' "$dest" dest=$func_stripname_result # Check to see that the destination is a directory. test -d "$dest" && isdir=yes if test "$isdir" = yes; then destdir="$dest" destname= else func_dirname_and_basename "$dest" "" "." destdir="$func_dirname_result" destname="$func_basename_result" # Not a directory, so check to see that there is only one file specified. set dummy $files; shift test "$#" -gt 1 && \ func_fatal_help "\`$dest' is not a directory" fi case $destdir in [\\/]* | [A-Za-z]:[\\/]*) ;; *) for file in $files; do case $file in *.lo) ;; *) func_fatal_help "\`$destdir' must be an absolute directory name" ;; esac done ;; esac # This variable tells wrapper scripts just to set variables rather # than running their programs. libtool_install_magic="$magic" staticlibs= future_libdirs= current_libdirs= for file in $files; do # Do each installation. case $file in *.$libext) # Do the static libraries later. func_append staticlibs " $file" ;; *.la) func_resolve_sysroot "$file" file=$func_resolve_sysroot_result # Check to see that this really is a libtool archive. func_lalib_unsafe_p "$file" \ || func_fatal_help "\`$file' is not a valid libtool archive" library_names= old_library= relink_command= func_source "$file" # Add the libdir to current_libdirs if it is the destination. if test "X$destdir" = "X$libdir"; then case "$current_libdirs " in *" $libdir "*) ;; *) func_append current_libdirs " $libdir" ;; esac else # Note the libdir as a future libdir. case "$future_libdirs " in *" $libdir "*) ;; *) func_append future_libdirs " $libdir" ;; esac fi func_dirname "$file" "/" "" dir="$func_dirname_result" func_append dir "$objdir" if test -n "$relink_command"; then # Determine the prefix the user has applied to our future dir. inst_prefix_dir=`$ECHO "$destdir" | $SED -e "s%$libdir\$%%"` # Don't allow the user to place us outside of our expected # location b/c this prevents finding dependent libraries that # are installed to the same prefix. # At present, this check doesn't affect windows .dll's that # are installed into $libdir/../bin (currently, that works fine) # but it's something to keep an eye on. test "$inst_prefix_dir" = "$destdir" && \ func_fatal_error "error: cannot install \`$file' to a directory not ending in $libdir" if test -n "$inst_prefix_dir"; then # Stick the inst_prefix_dir data into the link command. relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"` else relink_command=`$ECHO "$relink_command" | $SED "s%@inst_prefix_dir@%%"` fi func_warning "relinking \`$file'" func_show_eval "$relink_command" \ 'func_fatal_error "error: relink \`$file'\'' with the above command before installing it"' fi # See the names of the shared library. set dummy $library_names; shift if test -n "$1"; then realname="$1" shift srcname="$realname" test -n "$relink_command" && srcname="$realname"T # Install the shared library and build the symlinks. func_show_eval "$install_shared_prog $dir/$srcname $destdir/$realname" \ 'exit $?' tstripme="$stripme" case $host_os in cygwin* | mingw* | pw32* | cegcc*) case $realname in *.dll.a) tstripme="" ;; esac ;; esac if test -n "$tstripme" && test -n "$striplib"; then func_show_eval "$striplib $destdir/$realname" 'exit $?' fi if test "$#" -gt 0; then # Delete the old symlinks, and create new ones. # Try `ln -sf' first, because the `ln' binary might depend on # the symlink we replace! Solaris /bin/ln does not understand -f, # so we also need to try rm && ln -s. for linkname do test "$linkname" != "$realname" \ && func_show_eval "(cd $destdir && { $LN_S -f $realname $linkname || { $RM $linkname && $LN_S $realname $linkname; }; })" done fi # Do each command in the postinstall commands. lib="$destdir/$realname" func_execute_cmds "$postinstall_cmds" 'exit $?' fi # Install the pseudo-library for information purposes. func_basename "$file" name="$func_basename_result" instname="$dir/$name"i func_show_eval "$install_prog $instname $destdir/$name" 'exit $?' # Maybe install the static library, too. test -n "$old_library" && func_append staticlibs " $dir/$old_library" ;; *.lo) # Install (i.e. copy) a libtool object. # Figure out destination file name, if it wasn't already specified. if test -n "$destname"; then destfile="$destdir/$destname" else func_basename "$file" destfile="$func_basename_result" destfile="$destdir/$destfile" fi # Deduce the name of the destination old-style object file. case $destfile in *.lo) func_lo2o "$destfile" staticdest=$func_lo2o_result ;; *.$objext) staticdest="$destfile" destfile= ;; *) func_fatal_help "cannot copy a libtool object to \`$destfile'" ;; esac # Install the libtool object if requested. test -n "$destfile" && \ func_show_eval "$install_prog $file $destfile" 'exit $?' # Install the old object if enabled. if test "$build_old_libs" = yes; then # Deduce the name of the old-style object file. func_lo2o "$file" staticobj=$func_lo2o_result func_show_eval "$install_prog \$staticobj \$staticdest" 'exit $?' fi exit $EXIT_SUCCESS ;; *) # Figure out destination file name, if it wasn't already specified. if test -n "$destname"; then destfile="$destdir/$destname" else func_basename "$file" destfile="$func_basename_result" destfile="$destdir/$destfile" fi # If the file is missing, and there is a .exe on the end, strip it # because it is most likely a libtool script we actually want to # install stripped_ext="" case $file in *.exe) if test ! -f "$file"; then func_stripname '' '.exe' "$file" file=$func_stripname_result stripped_ext=".exe" fi ;; esac # Do a test to see if this is really a libtool program. case $host in *cygwin* | *mingw*) if func_ltwrapper_executable_p "$file"; then func_ltwrapper_scriptname "$file" wrapper=$func_ltwrapper_scriptname_result else func_stripname '' '.exe' "$file" wrapper=$func_stripname_result fi ;; *) wrapper=$file ;; esac if func_ltwrapper_script_p "$wrapper"; then notinst_deplibs= relink_command= func_source "$wrapper" # Check the variables that should have been set. test -z "$generated_by_libtool_version" && \ func_fatal_error "invalid libtool wrapper script \`$wrapper'" finalize=yes for lib in $notinst_deplibs; do # Check to see that each library is installed. libdir= if test -f "$lib"; then func_source "$lib" fi libfile="$libdir/"`$ECHO "$lib" | $SED 's%^.*/%%g'` ### testsuite: skip nested quoting test if test -n "$libdir" && test ! -f "$libfile"; then func_warning "\`$lib' has not been installed in \`$libdir'" finalize=no fi done relink_command= func_source "$wrapper" outputname= if test "$fast_install" = no && test -n "$relink_command"; then $opt_dry_run || { if test "$finalize" = yes; then tmpdir=`func_mktempdir` func_basename "$file$stripped_ext" file="$func_basename_result" outputname="$tmpdir/$file" # Replace the output file specification. relink_command=`$ECHO "$relink_command" | $SED 's%@OUTPUT@%'"$outputname"'%g'` $opt_silent || { func_quote_for_expand "$relink_command" eval "func_echo $func_quote_for_expand_result" } if eval "$relink_command"; then : else func_error "error: relink \`$file' with the above command before installing it" $opt_dry_run || ${RM}r "$tmpdir" continue fi file="$outputname" else func_warning "cannot relink \`$file'" fi } else # Install the binary that we compiled earlier. file=`$ECHO "$file$stripped_ext" | $SED "s%\([^/]*\)$%$objdir/\1%"` fi fi # remove .exe since cygwin /usr/bin/install will append another # one anyway case $install_prog,$host in */usr/bin/install*,*cygwin*) case $file:$destfile in *.exe:*.exe) # this is ok ;; *.exe:*) destfile=$destfile.exe ;; *:*.exe) func_stripname '' '.exe' "$destfile" destfile=$func_stripname_result ;; esac ;; esac func_show_eval "$install_prog\$stripme \$file \$destfile" 'exit $?' $opt_dry_run || if test -n "$outputname"; then ${RM}r "$tmpdir" fi ;; esac done for file in $staticlibs; do func_basename "$file" name="$func_basename_result" # Set up the ranlib parameters. oldlib="$destdir/$name" func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 tool_oldlib=$func_to_tool_file_result func_show_eval "$install_prog \$file \$oldlib" 'exit $?' if test -n "$stripme" && test -n "$old_striplib"; then func_show_eval "$old_striplib $tool_oldlib" 'exit $?' fi # Do each command in the postinstall commands. func_execute_cmds "$old_postinstall_cmds" 'exit $?' done test -n "$future_libdirs" && \ func_warning "remember to run \`$progname --finish$future_libdirs'" if test -n "$current_libdirs"; then # Maybe just do a dry run. $opt_dry_run && current_libdirs=" -n$current_libdirs" exec_cmd='$SHELL $progpath $preserve_args --finish$current_libdirs' else exit $EXIT_SUCCESS fi } test "$opt_mode" = install && func_mode_install ${1+"$@"} # func_generate_dlsyms outputname originator pic_p # Extract symbols from dlprefiles and create ${outputname}S.o with # a dlpreopen symbol table. func_generate_dlsyms () { $opt_debug my_outputname="$1" my_originator="$2" my_pic_p="${3-no}" my_prefix=`$ECHO "$my_originator" | sed 's%[^a-zA-Z0-9]%_%g'` my_dlsyms= if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then if test -n "$NM" && test -n "$global_symbol_pipe"; then my_dlsyms="${my_outputname}S.c" else func_error "not configured to extract global symbols from dlpreopened files" fi fi if test -n "$my_dlsyms"; then case $my_dlsyms in "") ;; *.c) # Discover the nlist of each of the dlfiles. nlist="$output_objdir/${my_outputname}.nm" func_show_eval "$RM $nlist ${nlist}S ${nlist}T" # Parse the name list into a source file. func_verbose "creating $output_objdir/$my_dlsyms" $opt_dry_run || $ECHO > "$output_objdir/$my_dlsyms" "\ /* $my_dlsyms - symbol resolution table for \`$my_outputname' dlsym emulation. */ /* Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION */ #ifdef __cplusplus extern \"C\" { #endif #if defined(__GNUC__) && (((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)) || (__GNUC__ > 4)) #pragma GCC diagnostic ignored \"-Wstrict-prototypes\" #endif /* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests. */ #if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE) /* DATA imports from DLLs on WIN32 con't be const, because runtime relocations are performed -- see ld's documentation on pseudo-relocs. */ # define LT_DLSYM_CONST #elif defined(__osf__) /* This system does not cope well with relocations in const data. */ # define LT_DLSYM_CONST #else # define LT_DLSYM_CONST const #endif /* External symbol declarations for the compiler. */\ " if test "$dlself" = yes; then func_verbose "generating symbol list for \`$output'" $opt_dry_run || echo ': @PROGRAM@ ' > "$nlist" # Add our own program objects to the symbol list. progfiles=`$ECHO "$objs$old_deplibs" | $SP2NL | $SED "$lo2o" | $NL2SP` for progfile in $progfiles; do func_to_tool_file "$progfile" func_convert_file_msys_to_w32 func_verbose "extracting global C symbols from \`$func_to_tool_file_result'" $opt_dry_run || eval "$NM $func_to_tool_file_result | $global_symbol_pipe >> '$nlist'" done if test -n "$exclude_expsyms"; then $opt_dry_run || { eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T' eval '$MV "$nlist"T "$nlist"' } fi if test -n "$export_symbols_regex"; then $opt_dry_run || { eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T' eval '$MV "$nlist"T "$nlist"' } fi # Prepare the list of exported symbols if test -z "$export_symbols"; then export_symbols="$output_objdir/$outputname.exp" $opt_dry_run || { $RM $export_symbols eval "${SED} -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' case $host in *cygwin* | *mingw* | *cegcc* ) eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"' ;; esac } else $opt_dry_run || { eval "${SED} -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"' eval '$GREP -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T' eval '$MV "$nlist"T "$nlist"' case $host in *cygwin* | *mingw* | *cegcc* ) eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' eval 'cat "$nlist" >> "$output_objdir/$outputname.def"' ;; esac } fi fi for dlprefile in $dlprefiles; do func_verbose "extracting global C symbols from \`$dlprefile'" func_basename "$dlprefile" name="$func_basename_result" case $host in *cygwin* | *mingw* | *cegcc* ) # if an import library, we need to obtain dlname if func_win32_import_lib_p "$dlprefile"; then func_tr_sh "$dlprefile" eval "curr_lafile=\$libfile_$func_tr_sh_result" dlprefile_dlbasename="" if test -n "$curr_lafile" && func_lalib_p "$curr_lafile"; then # Use subshell, to avoid clobbering current variable values dlprefile_dlname=`source "$curr_lafile" && echo "$dlname"` if test -n "$dlprefile_dlname" ; then func_basename "$dlprefile_dlname" dlprefile_dlbasename="$func_basename_result" else # no lafile. user explicitly requested -dlpreopen . $sharedlib_from_linklib_cmd "$dlprefile" dlprefile_dlbasename=$sharedlib_from_linklib_result fi fi $opt_dry_run || { if test -n "$dlprefile_dlbasename" ; then eval '$ECHO ": $dlprefile_dlbasename" >> "$nlist"' else func_warning "Could not compute DLL name from $name" eval '$ECHO ": $name " >> "$nlist"' fi func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe | $SED -e '/I __imp/d' -e 's/I __nm_/D /;s/_nm__//' >> '$nlist'" } else # not an import lib $opt_dry_run || { eval '$ECHO ": $name " >> "$nlist"' func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" } fi ;; *) $opt_dry_run || { eval '$ECHO ": $name " >> "$nlist"' func_to_tool_file "$dlprefile" func_convert_file_msys_to_w32 eval "$NM \"$func_to_tool_file_result\" 2>/dev/null | $global_symbol_pipe >> '$nlist'" } ;; esac done $opt_dry_run || { # Make sure we have at least an empty file. test -f "$nlist" || : > "$nlist" if test -n "$exclude_expsyms"; then $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T $MV "$nlist"T "$nlist" fi # Try sorting and uniquifying the output. if $GREP -v "^: " < "$nlist" | if sort -k 3 /dev/null 2>&1; then sort -k 3 else sort +2 fi | uniq > "$nlist"S; then : else $GREP -v "^: " < "$nlist" > "$nlist"S fi if test -f "$nlist"S; then eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$my_dlsyms"' else echo '/* NONE */' >> "$output_objdir/$my_dlsyms" fi echo >> "$output_objdir/$my_dlsyms" "\ /* The mapping between symbol names and symbols. */ typedef struct { const char *name; void *address; } lt_dlsymlist; extern LT_DLSYM_CONST lt_dlsymlist lt_${my_prefix}_LTX_preloaded_symbols[]; LT_DLSYM_CONST lt_dlsymlist lt_${my_prefix}_LTX_preloaded_symbols[] = {\ { \"$my_originator\", (void *) 0 }," case $need_lib_prefix in no) eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$my_dlsyms" ;; *) eval "$global_symbol_to_c_name_address_lib_prefix" < "$nlist" >> "$output_objdir/$my_dlsyms" ;; esac echo >> "$output_objdir/$my_dlsyms" "\ {0, (void *) 0} }; /* This works around a problem in FreeBSD linker */ #ifdef FREEBSD_WORKAROUND static const void *lt_preloaded_setup() { return lt_${my_prefix}_LTX_preloaded_symbols; } #endif #ifdef __cplusplus } #endif\ " } # !$opt_dry_run pic_flag_for_symtable= case "$compile_command " in *" -static "*) ;; *) case $host in # compiling the symbol table file with pic_flag works around # a FreeBSD bug that causes programs to crash when -lm is # linked before any other PIC object. But we must not use # pic_flag when linking with -static. The problem exists in # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1. *-*-freebsd2.*|*-*-freebsd3.0*|*-*-freebsdelf3.0*) pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND" ;; *-*-hpux*) pic_flag_for_symtable=" $pic_flag" ;; *) if test "X$my_pic_p" != Xno; then pic_flag_for_symtable=" $pic_flag" fi ;; esac ;; esac symtab_cflags= for arg in $LTCFLAGS; do case $arg in -pie | -fpie | -fPIE) ;; *) func_append symtab_cflags " $arg" ;; esac done # Now compile the dynamic symbol file. func_show_eval '(cd $output_objdir && $LTCC$symtab_cflags -c$no_builtin_flag$pic_flag_for_symtable "$my_dlsyms")' 'exit $?' # Clean up the generated files. func_show_eval '$RM "$output_objdir/$my_dlsyms" "$nlist" "${nlist}S" "${nlist}T"' # Transform the symbol file into the correct name. symfileobj="$output_objdir/${my_outputname}S.$objext" case $host in *cygwin* | *mingw* | *cegcc* ) if test -f "$output_objdir/$my_outputname.def"; then compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$output_objdir/$my_outputname.def $symfileobj%"` else compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` fi ;; *) compile_command=`$ECHO "$compile_command" | $SED "s%@SYMFILE@%$symfileobj%"` finalize_command=`$ECHO "$finalize_command" | $SED "s%@SYMFILE@%$symfileobj%"` ;; esac ;; *) func_fatal_error "unknown suffix for \`$my_dlsyms'" ;; esac else # We keep going just in case the user didn't refer to # lt_preloaded_symbols. The linker will fail if global_symbol_pipe # really was required. # Nullify the symbol file. compile_command=`$ECHO "$compile_command" | $SED "s% @SYMFILE@%%"` finalize_command=`$ECHO "$finalize_command" | $SED "s% @SYMFILE@%%"` fi } # func_win32_libid arg # return the library type of file 'arg' # # Need a lot of goo to handle *both* DLLs and import libs # Has to be a shell function in order to 'eat' the argument # that is supplied when $file_magic_command is called. # Despite the name, also deal with 64 bit binaries. func_win32_libid () { $opt_debug win32_libid_type="unknown" win32_fileres=`file -L $1 2>/dev/null` case $win32_fileres in *ar\ archive\ import\ library*) # definitely import win32_libid_type="x86 archive import" ;; *ar\ archive*) # could be an import, or static # Keep the egrep pattern in sync with the one in _LT_CHECK_MAGIC_METHOD. if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null | $EGREP 'file format (pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)' >/dev/null; then func_to_tool_file "$1" func_convert_file_msys_to_w32 win32_nmres=`eval $NM -f posix -A \"$func_to_tool_file_result\" | $SED -n -e ' 1,100{ / I /{ s,.*,import, p q } }'` case $win32_nmres in import*) win32_libid_type="x86 archive import";; *) win32_libid_type="x86 archive static";; esac fi ;; *DLL*) win32_libid_type="x86 DLL" ;; *executable*) # but shell scripts are "executable" too... case $win32_fileres in *MS\ Windows\ PE\ Intel*) win32_libid_type="x86 DLL" ;; esac ;; esac $ECHO "$win32_libid_type" } # func_cygming_dll_for_implib ARG # # Platform-specific function to extract the # name of the DLL associated with the specified # import library ARG. # Invoked by eval'ing the libtool variable # $sharedlib_from_linklib_cmd # Result is available in the variable # $sharedlib_from_linklib_result func_cygming_dll_for_implib () { $opt_debug sharedlib_from_linklib_result=`$DLLTOOL --identify-strict --identify "$1"` } # func_cygming_dll_for_implib_fallback_core SECTION_NAME LIBNAMEs # # The is the core of a fallback implementation of a # platform-specific function to extract the name of the # DLL associated with the specified import library LIBNAME. # # SECTION_NAME is either .idata$6 or .idata$7, depending # on the platform and compiler that created the implib. # # Echos the name of the DLL associated with the # specified import library. func_cygming_dll_for_implib_fallback_core () { $opt_debug match_literal=`$ECHO "$1" | $SED "$sed_make_literal_regex"` $OBJDUMP -s --section "$1" "$2" 2>/dev/null | $SED '/^Contents of section '"$match_literal"':/{ # Place marker at beginning of archive member dllname section s/.*/====MARK====/ p d } # These lines can sometimes be longer than 43 characters, but # are always uninteresting /:[ ]*file format pe[i]\{,1\}-/d /^In archive [^:]*:/d # Ensure marker is printed /^====MARK====/p # Remove all lines with less than 43 characters /^.\{43\}/!d # From remaining lines, remove first 43 characters s/^.\{43\}//' | $SED -n ' # Join marker and all lines until next marker into a single line /^====MARK====/ b para H $ b para b :para x s/\n//g # Remove the marker s/^====MARK====// # Remove trailing dots and whitespace s/[\. \t]*$// # Print /./p' | # we now have a list, one entry per line, of the stringified # contents of the appropriate section of all members of the # archive which possess that section. Heuristic: eliminate # all those which have a first or second character that is # a '.' (that is, objdump's representation of an unprintable # character.) This should work for all archives with less than # 0x302f exports -- but will fail for DLLs whose name actually # begins with a literal '.' or a single character followed by # a '.'. # # Of those that remain, print the first one. $SED -e '/^\./d;/^.\./d;q' } # func_cygming_gnu_implib_p ARG # This predicate returns with zero status (TRUE) if # ARG is a GNU/binutils-style import library. Returns # with nonzero status (FALSE) otherwise. func_cygming_gnu_implib_p () { $opt_debug func_to_tool_file "$1" func_convert_file_msys_to_w32 func_cygming_gnu_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $EGREP ' (_head_[A-Za-z0-9_]+_[ad]l*|[A-Za-z0-9_]+_[ad]l*_iname)$'` test -n "$func_cygming_gnu_implib_tmp" } # func_cygming_ms_implib_p ARG # This predicate returns with zero status (TRUE) if # ARG is an MS-style import library. Returns # with nonzero status (FALSE) otherwise. func_cygming_ms_implib_p () { $opt_debug func_to_tool_file "$1" func_convert_file_msys_to_w32 func_cygming_ms_implib_tmp=`$NM "$func_to_tool_file_result" | eval "$global_symbol_pipe" | $GREP '_NULL_IMPORT_DESCRIPTOR'` test -n "$func_cygming_ms_implib_tmp" } # func_cygming_dll_for_implib_fallback ARG # Platform-specific function to extract the # name of the DLL associated with the specified # import library ARG. # # This fallback implementation is for use when $DLLTOOL # does not support the --identify-strict option. # Invoked by eval'ing the libtool variable # $sharedlib_from_linklib_cmd # Result is available in the variable # $sharedlib_from_linklib_result func_cygming_dll_for_implib_fallback () { $opt_debug if func_cygming_gnu_implib_p "$1" ; then # binutils import library sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$7' "$1"` elif func_cygming_ms_implib_p "$1" ; then # ms-generated import library sharedlib_from_linklib_result=`func_cygming_dll_for_implib_fallback_core '.idata$6' "$1"` else # unknown sharedlib_from_linklib_result="" fi } # func_extract_an_archive dir oldlib func_extract_an_archive () { $opt_debug f_ex_an_ar_dir="$1"; shift f_ex_an_ar_oldlib="$1" if test "$lock_old_archive_extraction" = yes; then lockfile=$f_ex_an_ar_oldlib.lock until $opt_dry_run || ln "$progpath" "$lockfile" 2>/dev/null; do func_echo "Waiting for $lockfile to be removed" sleep 2 done fi func_show_eval "(cd \$f_ex_an_ar_dir && $AR x \"\$f_ex_an_ar_oldlib\")" \ 'stat=$?; rm -f "$lockfile"; exit $stat' if test "$lock_old_archive_extraction" = yes; then $opt_dry_run || rm -f "$lockfile" fi if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then : else func_fatal_error "object name conflicts in archive: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib" fi } # func_extract_archives gentop oldlib ... func_extract_archives () { $opt_debug my_gentop="$1"; shift my_oldlibs=${1+"$@"} my_oldobjs="" my_xlib="" my_xabs="" my_xdir="" for my_xlib in $my_oldlibs; do # Extract the objects. case $my_xlib in [\\/]* | [A-Za-z]:[\\/]*) my_xabs="$my_xlib" ;; *) my_xabs=`pwd`"/$my_xlib" ;; esac func_basename "$my_xlib" my_xlib="$func_basename_result" my_xlib_u=$my_xlib while :; do case " $extracted_archives " in *" $my_xlib_u "*) func_arith $extracted_serial + 1 extracted_serial=$func_arith_result my_xlib_u=lt$extracted_serial-$my_xlib ;; *) break ;; esac done extracted_archives="$extracted_archives $my_xlib_u" my_xdir="$my_gentop/$my_xlib_u" func_mkdir_p "$my_xdir" case $host in *-darwin*) func_verbose "Extracting $my_xabs" # Do not bother doing anything if just a dry run $opt_dry_run || { darwin_orig_dir=`pwd` cd $my_xdir || exit $? darwin_archive=$my_xabs darwin_curdir=`pwd` darwin_base_archive=`basename "$darwin_archive"` darwin_arches=`$LIPO -info "$darwin_archive" 2>/dev/null | $GREP Architectures 2>/dev/null || true` if test -n "$darwin_arches"; then darwin_arches=`$ECHO "$darwin_arches" | $SED -e 's/.*are://'` darwin_arch= func_verbose "$darwin_base_archive has multiple architectures $darwin_arches" for darwin_arch in $darwin_arches ; do func_mkdir_p "unfat-$$/${darwin_base_archive}-${darwin_arch}" $LIPO -thin $darwin_arch -output "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" "${darwin_archive}" cd "unfat-$$/${darwin_base_archive}-${darwin_arch}" func_extract_an_archive "`pwd`" "${darwin_base_archive}" cd "$darwin_curdir" $RM "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" done # $darwin_arches ## Okay now we've a bunch of thin objects, gotta fatten them up :) darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print | $SED -e "$basename" | sort -u` darwin_file= darwin_files= for darwin_file in $darwin_filelist; do darwin_files=`find unfat-$$ -name $darwin_file -print | sort | $NL2SP` $LIPO -create -output "$darwin_file" $darwin_files done # $darwin_filelist $RM -rf unfat-$$ cd "$darwin_orig_dir" else cd $darwin_orig_dir func_extract_an_archive "$my_xdir" "$my_xabs" fi # $darwin_arches } # !$opt_dry_run ;; *) func_extract_an_archive "$my_xdir" "$my_xabs" ;; esac my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | sort | $NL2SP` done func_extract_archives_result="$my_oldobjs" } # func_emit_wrapper [arg=no] # # Emit a libtool wrapper script on stdout. # Don't directly open a file because we may want to # incorporate the script contents within a cygwin/mingw # wrapper executable. Must ONLY be called from within # func_mode_link because it depends on a number of variables # set therein. # # ARG is the value that the WRAPPER_SCRIPT_BELONGS_IN_OBJDIR # variable will take. If 'yes', then the emitted script # will assume that the directory in which it is stored is # the $objdir directory. This is a cygwin/mingw-specific # behavior. func_emit_wrapper () { func_emit_wrapper_arg1=${1-no} $ECHO "\ #! $SHELL # $output - temporary wrapper script for $objdir/$outputname # Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION # # The $output program cannot be directly executed until all the libtool # libraries that it depends on are installed. # # This wrapper script should never be moved out of the build directory. # If it is, it will not operate correctly. # Sed substitution that helps us do robust quoting. It backslashifies # metacharacters that are still active within double-quoted strings. sed_quote_subst='$sed_quote_subst' # Be Bourne compatible if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on \${1+\"\$@\"}, which # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST else case \`(set -o) 2>/dev/null\` in *posix*) set -o posix;; esac fi BIN_SH=xpg4; export BIN_SH # for Tru64 DUALCASE=1; export DUALCASE # for MKS sh # The HP-UX ksh and POSIX shell print the target directory to stdout # if CDPATH is set. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH relink_command=\"$relink_command\" # This environment variable determines our operation mode. if test \"\$libtool_install_magic\" = \"$magic\"; then # install mode needs the following variables: generated_by_libtool_version='$macro_version' notinst_deplibs='$notinst_deplibs' else # When we are sourced in execute mode, \$file and \$ECHO are already set. if test \"\$libtool_execute_magic\" != \"$magic\"; then file=\"\$0\"" qECHO=`$ECHO "$ECHO" | $SED "$sed_quote_subst"` $ECHO "\ # A function that is used when there is no print builtin or printf. func_fallback_echo () { eval 'cat <<_LTECHO_EOF \$1 _LTECHO_EOF' } ECHO=\"$qECHO\" fi # Very basic option parsing. These options are (a) specific to # the libtool wrapper, (b) are identical between the wrapper # /script/ and the wrapper /executable/ which is used only on # windows platforms, and (c) all begin with the string "--lt-" # (application programs are unlikely to have options which match # this pattern). # # There are only two supported options: --lt-debug and # --lt-dump-script. There is, deliberately, no --lt-help. # # The first argument to this parsing function should be the # script's $0 value, followed by "$@". lt_option_debug= func_parse_lt_options () { lt_script_arg0=\$0 shift for lt_opt do case \"\$lt_opt\" in --lt-debug) lt_option_debug=1 ;; --lt-dump-script) lt_dump_D=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%/[^/]*$%%'\` test \"X\$lt_dump_D\" = \"X\$lt_script_arg0\" && lt_dump_D=. lt_dump_F=\`\$ECHO \"X\$lt_script_arg0\" | $SED -e 's/^X//' -e 's%^.*/%%'\` cat \"\$lt_dump_D/\$lt_dump_F\" exit 0 ;; --lt-*) \$ECHO \"Unrecognized --lt- option: '\$lt_opt'\" 1>&2 exit 1 ;; esac done # Print the debug banner immediately: if test -n \"\$lt_option_debug\"; then echo \"${outputname}:${output}:\${LINENO}: libtool wrapper (GNU $PACKAGE$TIMESTAMP) $VERSION\" 1>&2 fi } # Used when --lt-debug. Prints its arguments to stdout # (redirection is the responsibility of the caller) func_lt_dump_args () { lt_dump_args_N=1; for lt_arg do \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[\$lt_dump_args_N]: \$lt_arg\" lt_dump_args_N=\`expr \$lt_dump_args_N + 1\` done } # Core function for launching the target application func_exec_program_core () { " case $host in # Backslashes separate directories on plain windows *-*-mingw | *-*-os2* | *-cegcc*) $ECHO "\ if test -n \"\$lt_option_debug\"; then \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir\\\\\$program\" 1>&2 func_lt_dump_args \${1+\"\$@\"} 1>&2 fi exec \"\$progdir\\\\\$program\" \${1+\"\$@\"} " ;; *) $ECHO "\ if test -n \"\$lt_option_debug\"; then \$ECHO \"${outputname}:${output}:\${LINENO}: newargv[0]: \$progdir/\$program\" 1>&2 func_lt_dump_args \${1+\"\$@\"} 1>&2 fi exec \"\$progdir/\$program\" \${1+\"\$@\"} " ;; esac $ECHO "\ \$ECHO \"\$0: cannot exec \$program \$*\" 1>&2 exit 1 } # A function to encapsulate launching the target application # Strips options in the --lt-* namespace from \$@ and # launches target application with the remaining arguments. func_exec_program () { case \" \$* \" in *\\ --lt-*) for lt_wr_arg do case \$lt_wr_arg in --lt-*) ;; *) set x \"\$@\" \"\$lt_wr_arg\"; shift;; esac shift done ;; esac func_exec_program_core \${1+\"\$@\"} } # Parse options func_parse_lt_options \"\$0\" \${1+\"\$@\"} # Find the directory that this script lives in. thisdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*$%%'\` test \"x\$thisdir\" = \"x\$file\" && thisdir=. # Follow symbolic links until we get to the real thisdir. file=\`ls -ld \"\$file\" | $SED -n 's/.*-> //p'\` while test -n \"\$file\"; do destdir=\`\$ECHO \"\$file\" | $SED 's%/[^/]*\$%%'\` # If there was a directory component, then change thisdir. if test \"x\$destdir\" != \"x\$file\"; then case \"\$destdir\" in [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;; *) thisdir=\"\$thisdir/\$destdir\" ;; esac fi file=\`\$ECHO \"\$file\" | $SED 's%^.*/%%'\` file=\`ls -ld \"\$thisdir/\$file\" | $SED -n 's/.*-> //p'\` done # Usually 'no', except on cygwin/mingw when embedded into # the cwrapper. WRAPPER_SCRIPT_BELONGS_IN_OBJDIR=$func_emit_wrapper_arg1 if test \"\$WRAPPER_SCRIPT_BELONGS_IN_OBJDIR\" = \"yes\"; then # special case for '.' if test \"\$thisdir\" = \".\"; then thisdir=\`pwd\` fi # remove .libs from thisdir case \"\$thisdir\" in *[\\\\/]$objdir ) thisdir=\`\$ECHO \"\$thisdir\" | $SED 's%[\\\\/][^\\\\/]*$%%'\` ;; $objdir ) thisdir=. ;; esac fi # Try to get the absolute directory name. absdir=\`cd \"\$thisdir\" && pwd\` test -n \"\$absdir\" && thisdir=\"\$absdir\" " if test "$fast_install" = yes; then $ECHO "\ program=lt-'$outputname'$exeext progdir=\"\$thisdir/$objdir\" if test ! -f \"\$progdir/\$program\" || { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | ${SED} 1q\`; \\ test \"X\$file\" != \"X\$progdir/\$program\"; }; then file=\"\$\$-\$program\" if test ! -d \"\$progdir\"; then $MKDIR \"\$progdir\" else $RM \"\$progdir/\$file\" fi" $ECHO "\ # relink executable if necessary if test -n \"\$relink_command\"; then if relink_command_output=\`eval \$relink_command 2>&1\`; then : else $ECHO \"\$relink_command_output\" >&2 $RM \"\$progdir/\$file\" exit 1 fi fi $MV \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null || { $RM \"\$progdir/\$program\"; $MV \"\$progdir/\$file\" \"\$progdir/\$program\"; } $RM \"\$progdir/\$file\" fi" else $ECHO "\ program='$outputname' progdir=\"\$thisdir/$objdir\" " fi $ECHO "\ if test -f \"\$progdir/\$program\"; then" # fixup the dll searchpath if we need to. # # Fix the DLL searchpath if we need to. Do this before prepending # to shlibpath, because on Windows, both are PATH and uninstalled # libraries must come first. if test -n "$dllsearchpath"; then $ECHO "\ # Add the dll search path components to the executable PATH PATH=$dllsearchpath:\$PATH " fi # Export our shlibpath_var if we have one. if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then $ECHO "\ # Add our own library path to $shlibpath_var $shlibpath_var=\"$temp_rpath\$$shlibpath_var\" # Some systems cannot cope with colon-terminated $shlibpath_var # The second colon is a workaround for a bug in BeOS R4 sed $shlibpath_var=\`\$ECHO \"\$$shlibpath_var\" | $SED 's/::*\$//'\` export $shlibpath_var " fi $ECHO "\ if test \"\$libtool_execute_magic\" != \"$magic\"; then # Run the actual program with our arguments. func_exec_program \${1+\"\$@\"} fi else # The program doesn't exist. \$ECHO \"\$0: error: \\\`\$progdir/\$program' does not exist\" 1>&2 \$ECHO \"This script is just a wrapper for \$program.\" 1>&2 \$ECHO \"See the $PACKAGE documentation for more information.\" 1>&2 exit 1 fi fi\ " } # func_emit_cwrapperexe_src # emit the source code for a wrapper executable on stdout # Must ONLY be called from within func_mode_link because # it depends on a number of variable set therein. func_emit_cwrapperexe_src () { cat < #include #ifdef _MSC_VER # include # include # include #else # include # include # ifdef __CYGWIN__ # include # endif #endif #include #include #include #include #include #include #include #include /* declarations of non-ANSI functions */ #if defined(__MINGW32__) # ifdef __STRICT_ANSI__ int _putenv (const char *); # endif #elif defined(__CYGWIN__) # ifdef __STRICT_ANSI__ char *realpath (const char *, char *); int putenv (char *); int setenv (const char *, const char *, int); # endif /* #elif defined (other platforms) ... */ #endif /* portability defines, excluding path handling macros */ #if defined(_MSC_VER) # define setmode _setmode # define stat _stat # define chmod _chmod # define getcwd _getcwd # define putenv _putenv # define S_IXUSR _S_IEXEC # ifndef _INTPTR_T_DEFINED # define _INTPTR_T_DEFINED # define intptr_t int # endif #elif defined(__MINGW32__) # define setmode _setmode # define stat _stat # define chmod _chmod # define getcwd _getcwd # define putenv _putenv #elif defined(__CYGWIN__) # define HAVE_SETENV # define FOPEN_WB "wb" /* #elif defined (other platforms) ... */ #endif #if defined(PATH_MAX) # define LT_PATHMAX PATH_MAX #elif defined(MAXPATHLEN) # define LT_PATHMAX MAXPATHLEN #else # define LT_PATHMAX 1024 #endif #ifndef S_IXOTH # define S_IXOTH 0 #endif #ifndef S_IXGRP # define S_IXGRP 0 #endif /* path handling portability macros */ #ifndef DIR_SEPARATOR # define DIR_SEPARATOR '/' # define PATH_SEPARATOR ':' #endif #if defined (_WIN32) || defined (__MSDOS__) || defined (__DJGPP__) || \ defined (__OS2__) # define HAVE_DOS_BASED_FILE_SYSTEM # define FOPEN_WB "wb" # ifndef DIR_SEPARATOR_2 # define DIR_SEPARATOR_2 '\\' # endif # ifndef PATH_SEPARATOR_2 # define PATH_SEPARATOR_2 ';' # endif #endif #ifndef DIR_SEPARATOR_2 # define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR) #else /* DIR_SEPARATOR_2 */ # define IS_DIR_SEPARATOR(ch) \ (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2)) #endif /* DIR_SEPARATOR_2 */ #ifndef PATH_SEPARATOR_2 # define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR) #else /* PATH_SEPARATOR_2 */ # define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2) #endif /* PATH_SEPARATOR_2 */ #ifndef FOPEN_WB # define FOPEN_WB "w" #endif #ifndef _O_BINARY # define _O_BINARY 0 #endif #define XMALLOC(type, num) ((type *) xmalloc ((num) * sizeof(type))) #define XFREE(stale) do { \ if (stale) { free ((void *) stale); stale = 0; } \ } while (0) #if defined(LT_DEBUGWRAPPER) static int lt_debug = 1; #else static int lt_debug = 0; #endif const char *program_name = "libtool-wrapper"; /* in case xstrdup fails */ void *xmalloc (size_t num); char *xstrdup (const char *string); const char *base_name (const char *name); char *find_executable (const char *wrapper); char *chase_symlinks (const char *pathspec); int make_executable (const char *path); int check_executable (const char *path); char *strendzap (char *str, const char *pat); void lt_debugprintf (const char *file, int line, const char *fmt, ...); void lt_fatal (const char *file, int line, const char *message, ...); static const char *nonnull (const char *s); static const char *nonempty (const char *s); void lt_setenv (const char *name, const char *value); char *lt_extend_str (const char *orig_value, const char *add, int to_end); void lt_update_exe_path (const char *name, const char *value); void lt_update_lib_path (const char *name, const char *value); char **prepare_spawn (char **argv); void lt_dump_script (FILE *f); EOF cat <= 0) && (st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) return 1; else return 0; } int make_executable (const char *path) { int rval = 0; struct stat st; lt_debugprintf (__FILE__, __LINE__, "(make_executable): %s\n", nonempty (path)); if ((!path) || (!*path)) return 0; if (stat (path, &st) >= 0) { rval = chmod (path, st.st_mode | S_IXOTH | S_IXGRP | S_IXUSR); } return rval; } /* Searches for the full path of the wrapper. Returns newly allocated full path name if found, NULL otherwise Does not chase symlinks, even on platforms that support them. */ char * find_executable (const char *wrapper) { int has_slash = 0; const char *p; const char *p_next; /* static buffer for getcwd */ char tmp[LT_PATHMAX + 1]; int tmp_len; char *concat_name; lt_debugprintf (__FILE__, __LINE__, "(find_executable): %s\n", nonempty (wrapper)); if ((wrapper == NULL) || (*wrapper == '\0')) return NULL; /* Absolute path? */ #if defined (HAVE_DOS_BASED_FILE_SYSTEM) if (isalpha ((unsigned char) wrapper[0]) && wrapper[1] == ':') { concat_name = xstrdup (wrapper); if (check_executable (concat_name)) return concat_name; XFREE (concat_name); } else { #endif if (IS_DIR_SEPARATOR (wrapper[0])) { concat_name = xstrdup (wrapper); if (check_executable (concat_name)) return concat_name; XFREE (concat_name); } #if defined (HAVE_DOS_BASED_FILE_SYSTEM) } #endif for (p = wrapper; *p; p++) if (*p == '/') { has_slash = 1; break; } if (!has_slash) { /* no slashes; search PATH */ const char *path = getenv ("PATH"); if (path != NULL) { for (p = path; *p; p = p_next) { const char *q; size_t p_len; for (q = p; *q; q++) if (IS_PATH_SEPARATOR (*q)) break; p_len = q - p; p_next = (*q == '\0' ? q : q + 1); if (p_len == 0) { /* empty path: current directory */ if (getcwd (tmp, LT_PATHMAX) == NULL) lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", nonnull (strerror (errno))); tmp_len = strlen (tmp); concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); memcpy (concat_name, tmp, tmp_len); concat_name[tmp_len] = '/'; strcpy (concat_name + tmp_len + 1, wrapper); } else { concat_name = XMALLOC (char, p_len + 1 + strlen (wrapper) + 1); memcpy (concat_name, p, p_len); concat_name[p_len] = '/'; strcpy (concat_name + p_len + 1, wrapper); } if (check_executable (concat_name)) return concat_name; XFREE (concat_name); } } /* not found in PATH; assume curdir */ } /* Relative path | not found in path: prepend cwd */ if (getcwd (tmp, LT_PATHMAX) == NULL) lt_fatal (__FILE__, __LINE__, "getcwd failed: %s", nonnull (strerror (errno))); tmp_len = strlen (tmp); concat_name = XMALLOC (char, tmp_len + 1 + strlen (wrapper) + 1); memcpy (concat_name, tmp, tmp_len); concat_name[tmp_len] = '/'; strcpy (concat_name + tmp_len + 1, wrapper); if (check_executable (concat_name)) return concat_name; XFREE (concat_name); return NULL; } char * chase_symlinks (const char *pathspec) { #ifndef S_ISLNK return xstrdup (pathspec); #else char buf[LT_PATHMAX]; struct stat s; char *tmp_pathspec = xstrdup (pathspec); char *p; int has_symlinks = 0; while (strlen (tmp_pathspec) && !has_symlinks) { lt_debugprintf (__FILE__, __LINE__, "checking path component for symlinks: %s\n", tmp_pathspec); if (lstat (tmp_pathspec, &s) == 0) { if (S_ISLNK (s.st_mode) != 0) { has_symlinks = 1; break; } /* search backwards for last DIR_SEPARATOR */ p = tmp_pathspec + strlen (tmp_pathspec) - 1; while ((p > tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) p--; if ((p == tmp_pathspec) && (!IS_DIR_SEPARATOR (*p))) { /* no more DIR_SEPARATORS left */ break; } *p = '\0'; } else { lt_fatal (__FILE__, __LINE__, "error accessing file \"%s\": %s", tmp_pathspec, nonnull (strerror (errno))); } } XFREE (tmp_pathspec); if (!has_symlinks) { return xstrdup (pathspec); } tmp_pathspec = realpath (pathspec, buf); if (tmp_pathspec == 0) { lt_fatal (__FILE__, __LINE__, "could not follow symlinks for %s", pathspec); } return xstrdup (tmp_pathspec); #endif } char * strendzap (char *str, const char *pat) { size_t len, patlen; assert (str != NULL); assert (pat != NULL); len = strlen (str); patlen = strlen (pat); if (patlen <= len) { str += len - patlen; if (strcmp (str, pat) == 0) *str = '\0'; } return str; } void lt_debugprintf (const char *file, int line, const char *fmt, ...) { va_list args; if (lt_debug) { (void) fprintf (stderr, "%s:%s:%d: ", program_name, file, line); va_start (args, fmt); (void) vfprintf (stderr, fmt, args); va_end (args); } } static void lt_error_core (int exit_status, const char *file, int line, const char *mode, const char *message, va_list ap) { fprintf (stderr, "%s:%s:%d: %s: ", program_name, file, line, mode); vfprintf (stderr, message, ap); fprintf (stderr, ".\n"); if (exit_status >= 0) exit (exit_status); } void lt_fatal (const char *file, int line, const char *message, ...) { va_list ap; va_start (ap, message); lt_error_core (EXIT_FAILURE, file, line, "FATAL", message, ap); va_end (ap); } static const char * nonnull (const char *s) { return s ? s : "(null)"; } static const char * nonempty (const char *s) { return (s && !*s) ? "(empty)" : nonnull (s); } void lt_setenv (const char *name, const char *value) { lt_debugprintf (__FILE__, __LINE__, "(lt_setenv) setting '%s' to '%s'\n", nonnull (name), nonnull (value)); { #ifdef HAVE_SETENV /* always make a copy, for consistency with !HAVE_SETENV */ char *str = xstrdup (value); setenv (name, str, 1); #else int len = strlen (name) + 1 + strlen (value) + 1; char *str = XMALLOC (char, len); sprintf (str, "%s=%s", name, value); if (putenv (str) != EXIT_SUCCESS) { XFREE (str); } #endif } } char * lt_extend_str (const char *orig_value, const char *add, int to_end) { char *new_value; if (orig_value && *orig_value) { int orig_value_len = strlen (orig_value); int add_len = strlen (add); new_value = XMALLOC (char, add_len + orig_value_len + 1); if (to_end) { strcpy (new_value, orig_value); strcpy (new_value + orig_value_len, add); } else { strcpy (new_value, add); strcpy (new_value + add_len, orig_value); } } else { new_value = xstrdup (add); } return new_value; } void lt_update_exe_path (const char *name, const char *value) { lt_debugprintf (__FILE__, __LINE__, "(lt_update_exe_path) modifying '%s' by prepending '%s'\n", nonnull (name), nonnull (value)); if (name && *name && value && *value) { char *new_value = lt_extend_str (getenv (name), value, 0); /* some systems can't cope with a ':'-terminated path #' */ int len = strlen (new_value); while (((len = strlen (new_value)) > 0) && IS_PATH_SEPARATOR (new_value[len-1])) { new_value[len-1] = '\0'; } lt_setenv (name, new_value); XFREE (new_value); } } void lt_update_lib_path (const char *name, const char *value) { lt_debugprintf (__FILE__, __LINE__, "(lt_update_lib_path) modifying '%s' by prepending '%s'\n", nonnull (name), nonnull (value)); if (name && *name && value && *value) { char *new_value = lt_extend_str (getenv (name), value, 0); lt_setenv (name, new_value); XFREE (new_value); } } EOF case $host_os in mingw*) cat <<"EOF" /* Prepares an argument vector before calling spawn(). Note that spawn() does not by itself call the command interpreter (getenv ("COMSPEC") != NULL ? getenv ("COMSPEC") : ({ OSVERSIONINFO v; v.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); GetVersionEx(&v); v.dwPlatformId == VER_PLATFORM_WIN32_NT; }) ? "cmd.exe" : "command.com"). Instead it simply concatenates the arguments, separated by ' ', and calls CreateProcess(). We must quote the arguments since Win32 CreateProcess() interprets characters like ' ', '\t', '\\', '"' (but not '<' and '>') in a special way: - Space and tab are interpreted as delimiters. They are not treated as delimiters if they are surrounded by double quotes: "...". - Unescaped double quotes are removed from the input. Their only effect is that within double quotes, space and tab are treated like normal characters. - Backslashes not followed by double quotes are not special. - But 2*n+1 backslashes followed by a double quote become n backslashes followed by a double quote (n >= 0): \" -> " \\\" -> \" \\\\\" -> \\" */ #define SHELL_SPECIAL_CHARS "\"\\ \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" #define SHELL_SPACE_CHARS " \001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" char ** prepare_spawn (char **argv) { size_t argc; char **new_argv; size_t i; /* Count number of arguments. */ for (argc = 0; argv[argc] != NULL; argc++) ; /* Allocate new argument vector. */ new_argv = XMALLOC (char *, argc + 1); /* Put quoted arguments into the new argument vector. */ for (i = 0; i < argc; i++) { const char *string = argv[i]; if (string[0] == '\0') new_argv[i] = xstrdup ("\"\""); else if (strpbrk (string, SHELL_SPECIAL_CHARS) != NULL) { int quote_around = (strpbrk (string, SHELL_SPACE_CHARS) != NULL); size_t length; unsigned int backslashes; const char *s; char *quoted_string; char *p; length = 0; backslashes = 0; if (quote_around) length++; for (s = string; *s != '\0'; s++) { char c = *s; if (c == '"') length += backslashes + 1; length++; if (c == '\\') backslashes++; else backslashes = 0; } if (quote_around) length += backslashes + 1; quoted_string = XMALLOC (char, length + 1); p = quoted_string; backslashes = 0; if (quote_around) *p++ = '"'; for (s = string; *s != '\0'; s++) { char c = *s; if (c == '"') { unsigned int j; for (j = backslashes + 1; j > 0; j--) *p++ = '\\'; } *p++ = c; if (c == '\\') backslashes++; else backslashes = 0; } if (quote_around) { unsigned int j; for (j = backslashes; j > 0; j--) *p++ = '\\'; *p++ = '"'; } *p = '\0'; new_argv[i] = quoted_string; } else new_argv[i] = (char *) string; } new_argv[argc] = NULL; return new_argv; } EOF ;; esac cat <<"EOF" void lt_dump_script (FILE* f) { EOF func_emit_wrapper yes | $SED -n -e ' s/^\(.\{79\}\)\(..*\)/\1\ \2/ h s/\([\\"]\)/\\\1/g s/$/\\n/ s/\([^\n]*\).*/ fputs ("\1", f);/p g D' cat <<"EOF" } EOF } # end: func_emit_cwrapperexe_src # func_win32_import_lib_p ARG # True if ARG is an import lib, as indicated by $file_magic_cmd func_win32_import_lib_p () { $opt_debug case `eval $file_magic_cmd \"\$1\" 2>/dev/null | $SED -e 10q` in *import*) : ;; *) false ;; esac } # func_mode_link arg... func_mode_link () { $opt_debug case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) # It is impossible to link a dll without this setting, and # we shouldn't force the makefile maintainer to figure out # which system we are compiling for in order to pass an extra # flag for every libtool invocation. # allow_undefined=no # FIXME: Unfortunately, there are problems with the above when trying # to make a dll which has undefined symbols, in which case not # even a static library is built. For now, we need to specify # -no-undefined on the libtool link line when we can be certain # that all symbols are satisfied, otherwise we get a static library. allow_undefined=yes ;; *) allow_undefined=yes ;; esac libtool_args=$nonopt base_compile="$nonopt $@" compile_command=$nonopt finalize_command=$nonopt compile_rpath= finalize_rpath= compile_shlibpath= finalize_shlibpath= convenience= old_convenience= deplibs= old_deplibs= compiler_flags= linker_flags= dllsearchpath= lib_search_path=`pwd` inst_prefix_dir= new_inherited_linker_flags= avoid_version=no bindir= dlfiles= dlprefiles= dlself=no export_dynamic=no export_symbols= export_symbols_regex= generated= libobjs= ltlibs= module=no no_install=no objs= non_pic_objects= precious_files_regex= prefer_static_libs=no preload=no prev= prevarg= release= rpath= xrpath= perm_rpath= temp_rpath= thread_safe=no vinfo= vinfo_number=no weak_libs= single_module="${wl}-single_module" func_infer_tag $base_compile # We need to know -static, to get the right output filenames. for arg do case $arg in -shared) test "$build_libtool_libs" != yes && \ func_fatal_configuration "can not build a shared library" build_old_libs=no break ;; -all-static | -static | -static-libtool-libs) case $arg in -all-static) if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then func_warning "complete static linking is impossible in this configuration" fi if test -n "$link_static_flag"; then dlopen_self=$dlopen_self_static fi prefer_static_libs=yes ;; -static) if test -z "$pic_flag" && test -n "$link_static_flag"; then dlopen_self=$dlopen_self_static fi prefer_static_libs=built ;; -static-libtool-libs) if test -z "$pic_flag" && test -n "$link_static_flag"; then dlopen_self=$dlopen_self_static fi prefer_static_libs=yes ;; esac build_libtool_libs=no build_old_libs=yes break ;; esac done # See if our shared archives depend on static archives. test -n "$old_archive_from_new_cmds" && build_old_libs=yes # Go through the arguments, transforming them on the way. while test "$#" -gt 0; do arg="$1" shift func_quote_for_eval "$arg" qarg=$func_quote_for_eval_unquoted_result func_append libtool_args " $func_quote_for_eval_result" # If the previous option needs an argument, assign it. if test -n "$prev"; then case $prev in output) func_append compile_command " @OUTPUT@" func_append finalize_command " @OUTPUT@" ;; esac case $prev in bindir) bindir="$arg" prev= continue ;; dlfiles|dlprefiles) if test "$preload" = no; then # Add the symbol object into the linking commands. func_append compile_command " @SYMFILE@" func_append finalize_command " @SYMFILE@" preload=yes fi case $arg in *.la | *.lo) ;; # We handle these cases below. force) if test "$dlself" = no; then dlself=needless export_dynamic=yes fi prev= continue ;; self) if test "$prev" = dlprefiles; then dlself=yes elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then dlself=yes else dlself=needless export_dynamic=yes fi prev= continue ;; *) if test "$prev" = dlfiles; then func_append dlfiles " $arg" else func_append dlprefiles " $arg" fi prev= continue ;; esac ;; expsyms) export_symbols="$arg" test -f "$arg" \ || func_fatal_error "symbol file \`$arg' does not exist" prev= continue ;; expsyms_regex) export_symbols_regex="$arg" prev= continue ;; framework) case $host in *-*-darwin*) case "$deplibs " in *" $qarg.ltframework "*) ;; *) func_append deplibs " $qarg.ltframework" # this is fixed later ;; esac ;; esac prev= continue ;; inst_prefix) inst_prefix_dir="$arg" prev= continue ;; objectlist) if test -f "$arg"; then save_arg=$arg moreargs= for fil in `cat "$save_arg"` do # func_append moreargs " $fil" arg=$fil # A libtool-controlled object. # Check to see that this really is a libtool object. if func_lalib_unsafe_p "$arg"; then pic_object= non_pic_object= # Read the .lo file func_source "$arg" if test -z "$pic_object" || test -z "$non_pic_object" || test "$pic_object" = none && test "$non_pic_object" = none; then func_fatal_error "cannot find name of object for \`$arg'" fi # Extract subdirectory from the argument. func_dirname "$arg" "/" "" xdir="$func_dirname_result" if test "$pic_object" != none; then # Prepend the subdirectory the object is found in. pic_object="$xdir$pic_object" if test "$prev" = dlfiles; then if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then func_append dlfiles " $pic_object" prev= continue else # If libtool objects are unsupported, then we need to preload. prev=dlprefiles fi fi # CHECK ME: I think I busted this. -Ossama if test "$prev" = dlprefiles; then # Preload the old-style object. func_append dlprefiles " $pic_object" prev= fi # A PIC object. func_append libobjs " $pic_object" arg="$pic_object" fi # Non-PIC object. if test "$non_pic_object" != none; then # Prepend the subdirectory the object is found in. non_pic_object="$xdir$non_pic_object" # A standard non-PIC object func_append non_pic_objects " $non_pic_object" if test -z "$pic_object" || test "$pic_object" = none ; then arg="$non_pic_object" fi else # If the PIC object exists, use it instead. # $xdir was prepended to $pic_object above. non_pic_object="$pic_object" func_append non_pic_objects " $non_pic_object" fi else # Only an error if not doing a dry-run. if $opt_dry_run; then # Extract subdirectory from the argument. func_dirname "$arg" "/" "" xdir="$func_dirname_result" func_lo2o "$arg" pic_object=$xdir$objdir/$func_lo2o_result non_pic_object=$xdir$func_lo2o_result func_append libobjs " $pic_object" func_append non_pic_objects " $non_pic_object" else func_fatal_error "\`$arg' is not a valid libtool object" fi fi done else func_fatal_error "link input file \`$arg' does not exist" fi arg=$save_arg prev= continue ;; precious_regex) precious_files_regex="$arg" prev= continue ;; release) release="-$arg" prev= continue ;; rpath | xrpath) # We need an absolute path. case $arg in [\\/]* | [A-Za-z]:[\\/]*) ;; *) func_fatal_error "only absolute run-paths are allowed" ;; esac if test "$prev" = rpath; then case "$rpath " in *" $arg "*) ;; *) func_append rpath " $arg" ;; esac else case "$xrpath " in *" $arg "*) ;; *) func_append xrpath " $arg" ;; esac fi prev= continue ;; shrext) shrext_cmds="$arg" prev= continue ;; weak) func_append weak_libs " $arg" prev= continue ;; xcclinker) func_append linker_flags " $qarg" func_append compiler_flags " $qarg" prev= func_append compile_command " $qarg" func_append finalize_command " $qarg" continue ;; xcompiler) func_append compiler_flags " $qarg" prev= func_append compile_command " $qarg" func_append finalize_command " $qarg" continue ;; xlinker) func_append linker_flags " $qarg" func_append compiler_flags " $wl$qarg" prev= func_append compile_command " $wl$qarg" func_append finalize_command " $wl$qarg" continue ;; *) eval "$prev=\"\$arg\"" prev= continue ;; esac fi # test -n "$prev" prevarg="$arg" case $arg in -all-static) if test -n "$link_static_flag"; then # See comment for -static flag below, for more details. func_append compile_command " $link_static_flag" func_append finalize_command " $link_static_flag" fi continue ;; -allow-undefined) # FIXME: remove this flag sometime in the future. func_fatal_error "\`-allow-undefined' must not be used because it is the default" ;; -avoid-version) avoid_version=yes continue ;; -bindir) prev=bindir continue ;; -dlopen) prev=dlfiles continue ;; -dlpreopen) prev=dlprefiles continue ;; -export-dynamic) export_dynamic=yes continue ;; -export-symbols | -export-symbols-regex) if test -n "$export_symbols" || test -n "$export_symbols_regex"; then func_fatal_error "more than one -exported-symbols argument is not allowed" fi if test "X$arg" = "X-export-symbols"; then prev=expsyms else prev=expsyms_regex fi continue ;; -framework) prev=framework continue ;; -inst-prefix-dir) prev=inst_prefix continue ;; # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:* # so, if we see these flags be careful not to treat them like -L -L[A-Z][A-Z]*:*) case $with_gcc/$host in no/*-*-irix* | /*-*-irix*) func_append compile_command " $arg" func_append finalize_command " $arg" ;; esac continue ;; -L*) func_stripname "-L" '' "$arg" if test -z "$func_stripname_result"; then if test "$#" -gt 0; then func_fatal_error "require no space between \`-L' and \`$1'" else func_fatal_error "need path for \`-L' option" fi fi func_resolve_sysroot "$func_stripname_result" dir=$func_resolve_sysroot_result # We need an absolute path. case $dir in [\\/]* | [A-Za-z]:[\\/]*) ;; *) absdir=`cd "$dir" && pwd` test -z "$absdir" && \ func_fatal_error "cannot determine absolute directory name of \`$dir'" dir="$absdir" ;; esac case "$deplibs " in *" -L$dir "* | *" $arg "*) # Will only happen for absolute or sysroot arguments ;; *) # Preserve sysroot, but never include relative directories case $dir in [\\/]* | [A-Za-z]:[\\/]* | =*) func_append deplibs " $arg" ;; *) func_append deplibs " -L$dir" ;; esac func_append lib_search_path " $dir" ;; esac case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) testbindir=`$ECHO "$dir" | $SED 's*/lib$*/bin*'` case :$dllsearchpath: in *":$dir:"*) ;; ::) dllsearchpath=$dir;; *) func_append dllsearchpath ":$dir";; esac case :$dllsearchpath: in *":$testbindir:"*) ;; ::) dllsearchpath=$testbindir;; *) func_append dllsearchpath ":$testbindir";; esac ;; esac continue ;; -l*) if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos* | *-cegcc* | *-*-haiku*) # These systems don't actually have a C or math library (as such) continue ;; *-*-os2*) # These systems don't actually have a C library (as such) test "X$arg" = "X-lc" && continue ;; *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) # Do not include libc due to us having libc/libc_r. test "X$arg" = "X-lc" && continue ;; *-*-rhapsody* | *-*-darwin1.[012]) # Rhapsody C and math libraries are in the System framework func_append deplibs " System.ltframework" continue ;; *-*-sco3.2v5* | *-*-sco5v6*) # Causes problems with __ctype test "X$arg" = "X-lc" && continue ;; *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) # Compiler inserts libc in the correct place for threads to work test "X$arg" = "X-lc" && continue ;; esac elif test "X$arg" = "X-lc_r"; then case $host in *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) # Do not include libc_r directly, use -pthread flag. continue ;; esac fi func_append deplibs " $arg" continue ;; -module) module=yes continue ;; # Tru64 UNIX uses -model [arg] to determine the layout of C++ # classes, name mangling, and exception handling. # Darwin uses the -arch flag to determine output architecture. -model|-arch|-isysroot|--sysroot) func_append compiler_flags " $arg" func_append compile_command " $arg" func_append finalize_command " $arg" prev=xcompiler continue ;; -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) func_append compiler_flags " $arg" func_append compile_command " $arg" func_append finalize_command " $arg" case "$new_inherited_linker_flags " in *" $arg "*) ;; * ) func_append new_inherited_linker_flags " $arg" ;; esac continue ;; -multi_module) single_module="${wl}-multi_module" continue ;; -no-fast-install) fast_install=no continue ;; -no-install) case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-darwin* | *-cegcc*) # The PATH hackery in wrapper scripts is required on Windows # and Darwin in order for the loader to find any dlls it needs. func_warning "\`-no-install' is ignored for $host" func_warning "assuming \`-no-fast-install' instead" fast_install=no ;; *) no_install=yes ;; esac continue ;; -no-undefined) allow_undefined=no continue ;; -objectlist) prev=objectlist continue ;; -o) prev=output ;; -precious-files-regex) prev=precious_regex continue ;; -release) prev=release continue ;; -rpath) prev=rpath continue ;; -R) prev=xrpath continue ;; -R*) func_stripname '-R' '' "$arg" dir=$func_stripname_result # We need an absolute path. case $dir in [\\/]* | [A-Za-z]:[\\/]*) ;; =*) func_stripname '=' '' "$dir" dir=$lt_sysroot$func_stripname_result ;; *) func_fatal_error "only absolute run-paths are allowed" ;; esac case "$xrpath " in *" $dir "*) ;; *) func_append xrpath " $dir" ;; esac continue ;; -shared) # The effects of -shared are defined in a previous loop. continue ;; -shrext) prev=shrext continue ;; -static | -static-libtool-libs) # The effects of -static are defined in a previous loop. # We used to do the same as -all-static on platforms that # didn't have a PIC flag, but the assumption that the effects # would be equivalent was wrong. It would break on at least # Digital Unix and AIX. continue ;; -thread-safe) thread_safe=yes continue ;; -version-info) prev=vinfo continue ;; -version-number) prev=vinfo vinfo_number=yes continue ;; -weak) prev=weak continue ;; -Wc,*) func_stripname '-Wc,' '' "$arg" args=$func_stripname_result arg= save_ifs="$IFS"; IFS=',' for flag in $args; do IFS="$save_ifs" func_quote_for_eval "$flag" func_append arg " $func_quote_for_eval_result" func_append compiler_flags " $func_quote_for_eval_result" done IFS="$save_ifs" func_stripname ' ' '' "$arg" arg=$func_stripname_result ;; -Wl,*) func_stripname '-Wl,' '' "$arg" args=$func_stripname_result arg= save_ifs="$IFS"; IFS=',' for flag in $args; do IFS="$save_ifs" func_quote_for_eval "$flag" func_append arg " $wl$func_quote_for_eval_result" func_append compiler_flags " $wl$func_quote_for_eval_result" func_append linker_flags " $func_quote_for_eval_result" done IFS="$save_ifs" func_stripname ' ' '' "$arg" arg=$func_stripname_result ;; -Xcompiler) prev=xcompiler continue ;; -Xlinker) prev=xlinker continue ;; -XCClinker) prev=xcclinker continue ;; # -msg_* for osf cc -msg_*) func_quote_for_eval "$arg" arg="$func_quote_for_eval_result" ;; # Flags to be passed through unchanged, with rationale: # -64, -mips[0-9] enable 64-bit mode for the SGI compiler # -r[0-9][0-9]* specify processor for the SGI compiler # -xarch=*, -xtarget=* enable 64-bit mode for the Sun compiler # +DA*, +DD* enable 64-bit mode for the HP compiler # -q* compiler args for the IBM compiler # -m*, -t[45]*, -txscale* architecture-specific flags for GCC # -F/path path to uninstalled frameworks, gcc on darwin # -p, -pg, --coverage, -fprofile-* profiling flags for GCC # @file GCC response files # -tp=* Portland pgcc target processor selection # --sysroot=* for sysroot support # -O*, -flto*, -fwhopr*, -fuse-linker-plugin GCC link-time optimization -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \ -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \ -O*|-flto*|-fwhopr*|-fuse-linker-plugin) func_quote_for_eval "$arg" arg="$func_quote_for_eval_result" func_append compile_command " $arg" func_append finalize_command " $arg" func_append compiler_flags " $arg" continue ;; # Some other compiler flag. -* | +*) func_quote_for_eval "$arg" arg="$func_quote_for_eval_result" ;; *.$objext) # A standard object. func_append objs " $arg" ;; *.lo) # A libtool-controlled object. # Check to see that this really is a libtool object. if func_lalib_unsafe_p "$arg"; then pic_object= non_pic_object= # Read the .lo file func_source "$arg" if test -z "$pic_object" || test -z "$non_pic_object" || test "$pic_object" = none && test "$non_pic_object" = none; then func_fatal_error "cannot find name of object for \`$arg'" fi # Extract subdirectory from the argument. func_dirname "$arg" "/" "" xdir="$func_dirname_result" if test "$pic_object" != none; then # Prepend the subdirectory the object is found in. pic_object="$xdir$pic_object" if test "$prev" = dlfiles; then if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then func_append dlfiles " $pic_object" prev= continue else # If libtool objects are unsupported, then we need to preload. prev=dlprefiles fi fi # CHECK ME: I think I busted this. -Ossama if test "$prev" = dlprefiles; then # Preload the old-style object. func_append dlprefiles " $pic_object" prev= fi # A PIC object. func_append libobjs " $pic_object" arg="$pic_object" fi # Non-PIC object. if test "$non_pic_object" != none; then # Prepend the subdirectory the object is found in. non_pic_object="$xdir$non_pic_object" # A standard non-PIC object func_append non_pic_objects " $non_pic_object" if test -z "$pic_object" || test "$pic_object" = none ; then arg="$non_pic_object" fi else # If the PIC object exists, use it instead. # $xdir was prepended to $pic_object above. non_pic_object="$pic_object" func_append non_pic_objects " $non_pic_object" fi else # Only an error if not doing a dry-run. if $opt_dry_run; then # Extract subdirectory from the argument. func_dirname "$arg" "/" "" xdir="$func_dirname_result" func_lo2o "$arg" pic_object=$xdir$objdir/$func_lo2o_result non_pic_object=$xdir$func_lo2o_result func_append libobjs " $pic_object" func_append non_pic_objects " $non_pic_object" else func_fatal_error "\`$arg' is not a valid libtool object" fi fi ;; *.$libext) # An archive. func_append deplibs " $arg" func_append old_deplibs " $arg" continue ;; *.la) # A libtool-controlled library. func_resolve_sysroot "$arg" if test "$prev" = dlfiles; then # This library was specified with -dlopen. func_append dlfiles " $func_resolve_sysroot_result" prev= elif test "$prev" = dlprefiles; then # The library was specified with -dlpreopen. func_append dlprefiles " $func_resolve_sysroot_result" prev= else func_append deplibs " $func_resolve_sysroot_result" fi continue ;; # Some other compiler argument. *) # Unknown arguments in both finalize_command and compile_command need # to be aesthetically quoted because they are evaled later. func_quote_for_eval "$arg" arg="$func_quote_for_eval_result" ;; esac # arg # Now actually substitute the argument into the commands. if test -n "$arg"; then func_append compile_command " $arg" func_append finalize_command " $arg" fi done # argument parsing loop test -n "$prev" && \ func_fatal_help "the \`$prevarg' option requires an argument" if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then eval arg=\"$export_dynamic_flag_spec\" func_append compile_command " $arg" func_append finalize_command " $arg" fi oldlibs= # calculate the name of the file, without its directory func_basename "$output" outputname="$func_basename_result" libobjs_save="$libobjs" if test -n "$shlibpath_var"; then # get the directories listed in $shlibpath_var eval shlib_search_path=\`\$ECHO \"\${$shlibpath_var}\" \| \$SED \'s/:/ /g\'\` else shlib_search_path= fi eval sys_lib_search_path=\"$sys_lib_search_path_spec\" eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\" func_dirname "$output" "/" "" output_objdir="$func_dirname_result$objdir" func_to_tool_file "$output_objdir/" tool_output_objdir=$func_to_tool_file_result # Create the object directory. func_mkdir_p "$output_objdir" # Determine the type of output case $output in "") func_fatal_help "you must specify an output file" ;; *.$libext) linkmode=oldlib ;; *.lo | *.$objext) linkmode=obj ;; *.la) linkmode=lib ;; *) linkmode=prog ;; # Anything else should be a program. esac specialdeplibs= libs= # Find all interdependent deplibs by searching for libraries # that are linked more than once (e.g. -la -lb -la) for deplib in $deplibs; do if $opt_preserve_dup_deps ; then case "$libs " in *" $deplib "*) func_append specialdeplibs " $deplib" ;; esac fi func_append libs " $deplib" done if test "$linkmode" = lib; then libs="$predeps $libs $compiler_lib_search_path $postdeps" # Compute libraries that are listed more than once in $predeps # $postdeps and mark them as special (i.e., whose duplicates are # not to be eliminated). pre_post_deps= if $opt_duplicate_compiler_generated_deps; then for pre_post_dep in $predeps $postdeps; do case "$pre_post_deps " in *" $pre_post_dep "*) func_append specialdeplibs " $pre_post_deps" ;; esac func_append pre_post_deps " $pre_post_dep" done fi pre_post_deps= fi deplibs= newdependency_libs= newlib_search_path= need_relink=no # whether we're linking any uninstalled libtool libraries notinst_deplibs= # not-installed libtool libraries notinst_path= # paths that contain not-installed libtool libraries case $linkmode in lib) passes="conv dlpreopen link" for file in $dlfiles $dlprefiles; do case $file in *.la) ;; *) func_fatal_help "libraries can \`-dlopen' only libtool libraries: $file" ;; esac done ;; prog) compile_deplibs= finalize_deplibs= alldeplibs=no newdlfiles= newdlprefiles= passes="conv scan dlopen dlpreopen link" ;; *) passes="conv" ;; esac for pass in $passes; do # The preopen pass in lib mode reverses $deplibs; put it back here # so that -L comes before libs that need it for instance... if test "$linkmode,$pass" = "lib,link"; then ## FIXME: Find the place where the list is rebuilt in the wrong ## order, and fix it there properly tmp_deplibs= for deplib in $deplibs; do tmp_deplibs="$deplib $tmp_deplibs" done deplibs="$tmp_deplibs" fi if test "$linkmode,$pass" = "lib,link" || test "$linkmode,$pass" = "prog,scan"; then libs="$deplibs" deplibs= fi if test "$linkmode" = prog; then case $pass in dlopen) libs="$dlfiles" ;; dlpreopen) libs="$dlprefiles" ;; link) libs="$deplibs %DEPLIBS%" test "X$link_all_deplibs" != Xno && libs="$libs $dependency_libs" ;; esac fi if test "$linkmode,$pass" = "lib,dlpreopen"; then # Collect and forward deplibs of preopened libtool libs for lib in $dlprefiles; do # Ignore non-libtool-libs dependency_libs= func_resolve_sysroot "$lib" case $lib in *.la) func_source "$func_resolve_sysroot_result" ;; esac # Collect preopened libtool deplibs, except any this library # has declared as weak libs for deplib in $dependency_libs; do func_basename "$deplib" deplib_base=$func_basename_result case " $weak_libs " in *" $deplib_base "*) ;; *) func_append deplibs " $deplib" ;; esac done done libs="$dlprefiles" fi if test "$pass" = dlopen; then # Collect dlpreopened libraries save_deplibs="$deplibs" deplibs= fi for deplib in $libs; do lib= found=no case $deplib in -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \ |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*) if test "$linkmode,$pass" = "prog,link"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else func_append compiler_flags " $deplib" if test "$linkmode" = lib ; then case "$new_inherited_linker_flags " in *" $deplib "*) ;; * ) func_append new_inherited_linker_flags " $deplib" ;; esac fi fi continue ;; -l*) if test "$linkmode" != lib && test "$linkmode" != prog; then func_warning "\`-l' is ignored for archives/objects" continue fi func_stripname '-l' '' "$deplib" name=$func_stripname_result if test "$linkmode" = lib; then searchdirs="$newlib_search_path $lib_search_path $compiler_lib_search_dirs $sys_lib_search_path $shlib_search_path" else searchdirs="$newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path" fi for searchdir in $searchdirs; do for search_ext in .la $std_shrext .so .a; do # Search the libtool library lib="$searchdir/lib${name}${search_ext}" if test -f "$lib"; then if test "$search_ext" = ".la"; then found=yes else found=no fi break 2 fi done done if test "$found" != yes; then # deplib doesn't seem to be a libtool library if test "$linkmode,$pass" = "prog,link"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else deplibs="$deplib $deplibs" test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs" fi continue else # deplib is a libtool library # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib, # We need to do some special things here, and not later. if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then case " $predeps $postdeps " in *" $deplib "*) if func_lalib_p "$lib"; then library_names= old_library= func_source "$lib" for l in $old_library $library_names; do ll="$l" done if test "X$ll" = "X$old_library" ; then # only static version available found=no func_dirname "$lib" "" "." ladir="$func_dirname_result" lib=$ladir/$old_library if test "$linkmode,$pass" = "prog,link"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else deplibs="$deplib $deplibs" test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs" fi continue fi fi ;; *) ;; esac fi fi ;; # -l *.ltframework) if test "$linkmode,$pass" = "prog,link"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else deplibs="$deplib $deplibs" if test "$linkmode" = lib ; then case "$new_inherited_linker_flags " in *" $deplib "*) ;; * ) func_append new_inherited_linker_flags " $deplib" ;; esac fi fi continue ;; -L*) case $linkmode in lib) deplibs="$deplib $deplibs" test "$pass" = conv && continue newdependency_libs="$deplib $newdependency_libs" func_stripname '-L' '' "$deplib" func_resolve_sysroot "$func_stripname_result" func_append newlib_search_path " $func_resolve_sysroot_result" ;; prog) if test "$pass" = conv; then deplibs="$deplib $deplibs" continue fi if test "$pass" = scan; then deplibs="$deplib $deplibs" else compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" fi func_stripname '-L' '' "$deplib" func_resolve_sysroot "$func_stripname_result" func_append newlib_search_path " $func_resolve_sysroot_result" ;; *) func_warning "\`-L' is ignored for archives/objects" ;; esac # linkmode continue ;; # -L -R*) if test "$pass" = link; then func_stripname '-R' '' "$deplib" func_resolve_sysroot "$func_stripname_result" dir=$func_resolve_sysroot_result # Make sure the xrpath contains only unique directories. case "$xrpath " in *" $dir "*) ;; *) func_append xrpath " $dir" ;; esac fi deplibs="$deplib $deplibs" continue ;; *.la) func_resolve_sysroot "$deplib" lib=$func_resolve_sysroot_result ;; *.$libext) if test "$pass" = conv; then deplibs="$deplib $deplibs" continue fi case $linkmode in lib) # Linking convenience modules into shared libraries is allowed, # but linking other static libraries is non-portable. case " $dlpreconveniencelibs " in *" $deplib "*) ;; *) valid_a_lib=no case $deplibs_check_method in match_pattern*) set dummy $deplibs_check_method; shift match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` if eval "\$ECHO \"$deplib\"" 2>/dev/null | $SED 10q \ | $EGREP "$match_pattern_regex" > /dev/null; then valid_a_lib=yes fi ;; pass_all) valid_a_lib=yes ;; esac if test "$valid_a_lib" != yes; then echo $ECHO "*** Warning: Trying to link with static lib archive $deplib." echo "*** I have the capability to make that library automatically link in when" echo "*** you link to this library. But I can only do this if you have a" echo "*** shared version of the library, which you do not appear to have" echo "*** because the file extensions .$libext of this argument makes me believe" echo "*** that it is just a static archive that I should not use here." else echo $ECHO "*** Warning: Linking the shared library $output against the" $ECHO "*** static library $deplib is not portable!" deplibs="$deplib $deplibs" fi ;; esac continue ;; prog) if test "$pass" != link; then deplibs="$deplib $deplibs" else compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" fi continue ;; esac # linkmode ;; # *.$libext *.lo | *.$objext) if test "$pass" = conv; then deplibs="$deplib $deplibs" elif test "$linkmode" = prog; then if test "$pass" = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then # If there is no dlopen support or we're linking statically, # we need to preload. func_append newdlprefiles " $deplib" compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else func_append newdlfiles " $deplib" fi fi continue ;; %DEPLIBS%) alldeplibs=yes continue ;; esac # case $deplib if test "$found" = yes || test -f "$lib"; then : else func_fatal_error "cannot find the library \`$lib' or unhandled argument \`$deplib'" fi # Check to see that this really is a libtool archive. func_lalib_unsafe_p "$lib" \ || func_fatal_error "\`$lib' is not a valid libtool archive" func_dirname "$lib" "" "." ladir="$func_dirname_result" dlname= dlopen= dlpreopen= libdir= library_names= old_library= inherited_linker_flags= # If the library was installed with an old release of libtool, # it will not redefine variables installed, or shouldnotlink installed=yes shouldnotlink=no avoidtemprpath= # Read the .la file func_source "$lib" # Convert "-framework foo" to "foo.ltframework" if test -n "$inherited_linker_flags"; then tmp_inherited_linker_flags=`$ECHO "$inherited_linker_flags" | $SED 's/-framework \([^ $]*\)/\1.ltframework/g'` for tmp_inherited_linker_flag in $tmp_inherited_linker_flags; do case " $new_inherited_linker_flags " in *" $tmp_inherited_linker_flag "*) ;; *) func_append new_inherited_linker_flags " $tmp_inherited_linker_flag";; esac done fi dependency_libs=`$ECHO " $dependency_libs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` if test "$linkmode,$pass" = "lib,link" || test "$linkmode,$pass" = "prog,scan" || { test "$linkmode" != prog && test "$linkmode" != lib; }; then test -n "$dlopen" && func_append dlfiles " $dlopen" test -n "$dlpreopen" && func_append dlprefiles " $dlpreopen" fi if test "$pass" = conv; then # Only check for convenience libraries deplibs="$lib $deplibs" if test -z "$libdir"; then if test -z "$old_library"; then func_fatal_error "cannot find name of link library for \`$lib'" fi # It is a libtool convenience library, so add in its objects. func_append convenience " $ladir/$objdir/$old_library" func_append old_convenience " $ladir/$objdir/$old_library" tmp_libs= for deplib in $dependency_libs; do deplibs="$deplib $deplibs" if $opt_preserve_dup_deps ; then case "$tmp_libs " in *" $deplib "*) func_append specialdeplibs " $deplib" ;; esac fi func_append tmp_libs " $deplib" done elif test "$linkmode" != prog && test "$linkmode" != lib; then func_fatal_error "\`$lib' is not a convenience library" fi continue fi # $pass = conv # Get the name of the library we link against. linklib= if test -n "$old_library" && { test "$prefer_static_libs" = yes || test "$prefer_static_libs,$installed" = "built,no"; }; then linklib=$old_library else for l in $old_library $library_names; do linklib="$l" done fi if test -z "$linklib"; then func_fatal_error "cannot find name of link library for \`$lib'" fi # This library was specified with -dlopen. if test "$pass" = dlopen; then if test -z "$libdir"; then func_fatal_error "cannot -dlopen a convenience library: \`$lib'" fi if test -z "$dlname" || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then # If there is no dlname, no dlopen support or we're linking # statically, we need to preload. We also need to preload any # dependent libraries so libltdl's deplib preloader doesn't # bomb out in the load deplibs phase. func_append dlprefiles " $lib $dependency_libs" else func_append newdlfiles " $lib" fi continue fi # $pass = dlopen # We need an absolute path. case $ladir in [\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;; *) abs_ladir=`cd "$ladir" && pwd` if test -z "$abs_ladir"; then func_warning "cannot determine absolute directory name of \`$ladir'" func_warning "passing it literally to the linker, although it might fail" abs_ladir="$ladir" fi ;; esac func_basename "$lib" laname="$func_basename_result" # Find the relevant object directory and library name. if test "X$installed" = Xyes; then if test ! -f "$lt_sysroot$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then func_warning "library \`$lib' was moved." dir="$ladir" absdir="$abs_ladir" libdir="$abs_ladir" else dir="$lt_sysroot$libdir" absdir="$lt_sysroot$libdir" fi test "X$hardcode_automatic" = Xyes && avoidtemprpath=yes else if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then dir="$ladir" absdir="$abs_ladir" # Remove this search path later func_append notinst_path " $abs_ladir" else dir="$ladir/$objdir" absdir="$abs_ladir/$objdir" # Remove this search path later func_append notinst_path " $abs_ladir" fi fi # $installed = yes func_stripname 'lib' '.la' "$laname" name=$func_stripname_result # This library was specified with -dlpreopen. if test "$pass" = dlpreopen; then if test -z "$libdir" && test "$linkmode" = prog; then func_fatal_error "only libraries may -dlpreopen a convenience library: \`$lib'" fi case "$host" in # special handling for platforms with PE-DLLs. *cygwin* | *mingw* | *cegcc* ) # Linker will automatically link against shared library if both # static and shared are present. Therefore, ensure we extract # symbols from the import library if a shared library is present # (otherwise, the dlopen module name will be incorrect). We do # this by putting the import library name into $newdlprefiles. # We recover the dlopen module name by 'saving' the la file # name in a special purpose variable, and (later) extracting the # dlname from the la file. if test -n "$dlname"; then func_tr_sh "$dir/$linklib" eval "libfile_$func_tr_sh_result=\$abs_ladir/\$laname" func_append newdlprefiles " $dir/$linklib" else func_append newdlprefiles " $dir/$old_library" # Keep a list of preopened convenience libraries to check # that they are being used correctly in the link pass. test -z "$libdir" && \ func_append dlpreconveniencelibs " $dir/$old_library" fi ;; * ) # Prefer using a static library (so that no silly _DYNAMIC symbols # are required to link). if test -n "$old_library"; then func_append newdlprefiles " $dir/$old_library" # Keep a list of preopened convenience libraries to check # that they are being used correctly in the link pass. test -z "$libdir" && \ func_append dlpreconveniencelibs " $dir/$old_library" # Otherwise, use the dlname, so that lt_dlopen finds it. elif test -n "$dlname"; then func_append newdlprefiles " $dir/$dlname" else func_append newdlprefiles " $dir/$linklib" fi ;; esac fi # $pass = dlpreopen if test -z "$libdir"; then # Link the convenience library if test "$linkmode" = lib; then deplibs="$dir/$old_library $deplibs" elif test "$linkmode,$pass" = "prog,link"; then compile_deplibs="$dir/$old_library $compile_deplibs" finalize_deplibs="$dir/$old_library $finalize_deplibs" else deplibs="$lib $deplibs" # used for prog,scan pass fi continue fi if test "$linkmode" = prog && test "$pass" != link; then func_append newlib_search_path " $ladir" deplibs="$lib $deplibs" linkalldeplibs=no if test "$link_all_deplibs" != no || test -z "$library_names" || test "$build_libtool_libs" = no; then linkalldeplibs=yes fi tmp_libs= for deplib in $dependency_libs; do case $deplib in -L*) func_stripname '-L' '' "$deplib" func_resolve_sysroot "$func_stripname_result" func_append newlib_search_path " $func_resolve_sysroot_result" ;; esac # Need to link against all dependency_libs? if test "$linkalldeplibs" = yes; then deplibs="$deplib $deplibs" else # Need to hardcode shared library paths # or/and link against static libraries newdependency_libs="$deplib $newdependency_libs" fi if $opt_preserve_dup_deps ; then case "$tmp_libs " in *" $deplib "*) func_append specialdeplibs " $deplib" ;; esac fi func_append tmp_libs " $deplib" done # for deplib continue fi # $linkmode = prog... if test "$linkmode,$pass" = "prog,link"; then if test -n "$library_names" && { { test "$prefer_static_libs" = no || test "$prefer_static_libs,$installed" = "built,yes"; } || test -z "$old_library"; }; then # We need to hardcode the library path if test -n "$shlibpath_var" && test -z "$avoidtemprpath" ; then # Make sure the rpath contains only unique directories. case "$temp_rpath:" in *"$absdir:"*) ;; *) func_append temp_rpath "$absdir:" ;; esac fi # Hardcode the library path. # Skip directories that are in the system default run-time # search path. case " $sys_lib_dlsearch_path " in *" $absdir "*) ;; *) case "$compile_rpath " in *" $absdir "*) ;; *) func_append compile_rpath " $absdir" ;; esac ;; esac case " $sys_lib_dlsearch_path " in *" $libdir "*) ;; *) case "$finalize_rpath " in *" $libdir "*) ;; *) func_append finalize_rpath " $libdir" ;; esac ;; esac fi # $linkmode,$pass = prog,link... if test "$alldeplibs" = yes && { test "$deplibs_check_method" = pass_all || { test "$build_libtool_libs" = yes && test -n "$library_names"; }; }; then # We only need to search for static libraries continue fi fi link_static=no # Whether the deplib will be linked statically use_static_libs=$prefer_static_libs if test "$use_static_libs" = built && test "$installed" = yes; then use_static_libs=no fi if test -n "$library_names" && { test "$use_static_libs" = no || test -z "$old_library"; }; then case $host in *cygwin* | *mingw* | *cegcc*) # No point in relinking DLLs because paths are not encoded func_append notinst_deplibs " $lib" need_relink=no ;; *) if test "$installed" = no; then func_append notinst_deplibs " $lib" need_relink=yes fi ;; esac # This is a shared library # Warn about portability, can't link against -module's on some # systems (darwin). Don't bleat about dlopened modules though! dlopenmodule="" for dlpremoduletest in $dlprefiles; do if test "X$dlpremoduletest" = "X$lib"; then dlopenmodule="$dlpremoduletest" break fi done if test -z "$dlopenmodule" && test "$shouldnotlink" = yes && test "$pass" = link; then echo if test "$linkmode" = prog; then $ECHO "*** Warning: Linking the executable $output against the loadable module" else $ECHO "*** Warning: Linking the shared library $output against the loadable module" fi $ECHO "*** $linklib is not portable!" fi if test "$linkmode" = lib && test "$hardcode_into_libs" = yes; then # Hardcode the library path. # Skip directories that are in the system default run-time # search path. case " $sys_lib_dlsearch_path " in *" $absdir "*) ;; *) case "$compile_rpath " in *" $absdir "*) ;; *) func_append compile_rpath " $absdir" ;; esac ;; esac case " $sys_lib_dlsearch_path " in *" $libdir "*) ;; *) case "$finalize_rpath " in *" $libdir "*) ;; *) func_append finalize_rpath " $libdir" ;; esac ;; esac fi if test -n "$old_archive_from_expsyms_cmds"; then # figure out the soname set dummy $library_names shift realname="$1" shift libname=`eval "\\$ECHO \"$libname_spec\""` # use dlname if we got it. it's perfectly good, no? if test -n "$dlname"; then soname="$dlname" elif test -n "$soname_spec"; then # bleh windows case $host in *cygwin* | mingw* | *cegcc*) func_arith $current - $age major=$func_arith_result versuffix="-$major" ;; esac eval soname=\"$soname_spec\" else soname="$realname" fi # Make a new name for the extract_expsyms_cmds to use soroot="$soname" func_basename "$soroot" soname="$func_basename_result" func_stripname 'lib' '.dll' "$soname" newlib=libimp-$func_stripname_result.a # If the library has no export list, then create one now if test -f "$output_objdir/$soname-def"; then : else func_verbose "extracting exported symbol list from \`$soname'" func_execute_cmds "$extract_expsyms_cmds" 'exit $?' fi # Create $newlib if test -f "$output_objdir/$newlib"; then :; else func_verbose "generating import library for \`$soname'" func_execute_cmds "$old_archive_from_expsyms_cmds" 'exit $?' fi # make sure the library variables are pointing to the new library dir=$output_objdir linklib=$newlib fi # test -n "$old_archive_from_expsyms_cmds" if test "$linkmode" = prog || test "$opt_mode" != relink; then add_shlibpath= add_dir= add= lib_linked=yes case $hardcode_action in immediate | unsupported) if test "$hardcode_direct" = no; then add="$dir/$linklib" case $host in *-*-sco3.2v5.0.[024]*) add_dir="-L$dir" ;; *-*-sysv4*uw2*) add_dir="-L$dir" ;; *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \ *-*-unixware7*) add_dir="-L$dir" ;; *-*-darwin* ) # if the lib is a (non-dlopened) module then we can not # link against it, someone is ignoring the earlier warnings if /usr/bin/file -L $add 2> /dev/null | $GREP ": [^:]* bundle" >/dev/null ; then if test "X$dlopenmodule" != "X$lib"; then $ECHO "*** Warning: lib $linklib is a module, not a shared library" if test -z "$old_library" ; then echo echo "*** And there doesn't seem to be a static archive available" echo "*** The link will probably fail, sorry" else add="$dir/$old_library" fi elif test -n "$old_library"; then add="$dir/$old_library" fi fi esac elif test "$hardcode_minus_L" = no; then case $host in *-*-sunos*) add_shlibpath="$dir" ;; esac add_dir="-L$dir" add="-l$name" elif test "$hardcode_shlibpath_var" = no; then add_shlibpath="$dir" add="-l$name" else lib_linked=no fi ;; relink) if test "$hardcode_direct" = yes && test "$hardcode_direct_absolute" = no; then add="$dir/$linklib" elif test "$hardcode_minus_L" = yes; then add_dir="-L$absdir" # Try looking first in the location we're being installed to. if test -n "$inst_prefix_dir"; then case $libdir in [\\/]*) func_append add_dir " -L$inst_prefix_dir$libdir" ;; esac fi add="-l$name" elif test "$hardcode_shlibpath_var" = yes; then add_shlibpath="$dir" add="-l$name" else lib_linked=no fi ;; *) lib_linked=no ;; esac if test "$lib_linked" != yes; then func_fatal_configuration "unsupported hardcode properties" fi if test -n "$add_shlibpath"; then case :$compile_shlibpath: in *":$add_shlibpath:"*) ;; *) func_append compile_shlibpath "$add_shlibpath:" ;; esac fi if test "$linkmode" = prog; then test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs" test -n "$add" && compile_deplibs="$add $compile_deplibs" else test -n "$add_dir" && deplibs="$add_dir $deplibs" test -n "$add" && deplibs="$add $deplibs" if test "$hardcode_direct" != yes && test "$hardcode_minus_L" != yes && test "$hardcode_shlibpath_var" = yes; then case :$finalize_shlibpath: in *":$libdir:"*) ;; *) func_append finalize_shlibpath "$libdir:" ;; esac fi fi fi if test "$linkmode" = prog || test "$opt_mode" = relink; then add_shlibpath= add_dir= add= # Finalize command for both is simple: just hardcode it. if test "$hardcode_direct" = yes && test "$hardcode_direct_absolute" = no; then add="$libdir/$linklib" elif test "$hardcode_minus_L" = yes; then add_dir="-L$libdir" add="-l$name" elif test "$hardcode_shlibpath_var" = yes; then case :$finalize_shlibpath: in *":$libdir:"*) ;; *) func_append finalize_shlibpath "$libdir:" ;; esac add="-l$name" elif test "$hardcode_automatic" = yes; then if test -n "$inst_prefix_dir" && test -f "$inst_prefix_dir$libdir/$linklib" ; then add="$inst_prefix_dir$libdir/$linklib" else add="$libdir/$linklib" fi else # We cannot seem to hardcode it, guess we'll fake it. add_dir="-L$libdir" # Try looking first in the location we're being installed to. if test -n "$inst_prefix_dir"; then case $libdir in [\\/]*) func_append add_dir " -L$inst_prefix_dir$libdir" ;; esac fi add="-l$name" fi if test "$linkmode" = prog; then test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs" test -n "$add" && finalize_deplibs="$add $finalize_deplibs" else test -n "$add_dir" && deplibs="$add_dir $deplibs" test -n "$add" && deplibs="$add $deplibs" fi fi elif test "$linkmode" = prog; then # Here we assume that one of hardcode_direct or hardcode_minus_L # is not unsupported. This is valid on all known static and # shared platforms. if test "$hardcode_direct" != unsupported; then test -n "$old_library" && linklib="$old_library" compile_deplibs="$dir/$linklib $compile_deplibs" finalize_deplibs="$dir/$linklib $finalize_deplibs" else compile_deplibs="-l$name -L$dir $compile_deplibs" finalize_deplibs="-l$name -L$dir $finalize_deplibs" fi elif test "$build_libtool_libs" = yes; then # Not a shared library if test "$deplibs_check_method" != pass_all; then # We're trying link a shared library against a static one # but the system doesn't support it. # Just print a warning and add the library to dependency_libs so # that the program can be linked against the static library. echo $ECHO "*** Warning: This system can not link to static lib archive $lib." echo "*** I have the capability to make that library automatically link in when" echo "*** you link to this library. But I can only do this if you have a" echo "*** shared version of the library, which you do not appear to have." if test "$module" = yes; then echo "*** But as you try to build a module library, libtool will still create " echo "*** a static module, that should work as long as the dlopening application" echo "*** is linked with the -dlopen flag to resolve symbols at runtime." if test -z "$global_symbol_pipe"; then echo echo "*** However, this would only work if libtool was able to extract symbol" echo "*** lists from a program, using \`nm' or equivalent, but libtool could" echo "*** not find such a program. So, this module is probably useless." echo "*** \`nm' from GNU binutils and a full rebuild may help." fi if test "$build_old_libs" = no; then build_libtool_libs=module build_old_libs=yes else build_libtool_libs=no fi fi else deplibs="$dir/$old_library $deplibs" link_static=yes fi fi # link shared/static library? if test "$linkmode" = lib; then if test -n "$dependency_libs" && { test "$hardcode_into_libs" != yes || test "$build_old_libs" = yes || test "$link_static" = yes; }; then # Extract -R from dependency_libs temp_deplibs= for libdir in $dependency_libs; do case $libdir in -R*) func_stripname '-R' '' "$libdir" temp_xrpath=$func_stripname_result case " $xrpath " in *" $temp_xrpath "*) ;; *) func_append xrpath " $temp_xrpath";; esac;; *) func_append temp_deplibs " $libdir";; esac done dependency_libs="$temp_deplibs" fi func_append newlib_search_path " $absdir" # Link against this library test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs" # ... and its dependency_libs tmp_libs= for deplib in $dependency_libs; do newdependency_libs="$deplib $newdependency_libs" case $deplib in -L*) func_stripname '-L' '' "$deplib" func_resolve_sysroot "$func_stripname_result";; *) func_resolve_sysroot "$deplib" ;; esac if $opt_preserve_dup_deps ; then case "$tmp_libs " in *" $func_resolve_sysroot_result "*) func_append specialdeplibs " $func_resolve_sysroot_result" ;; esac fi func_append tmp_libs " $func_resolve_sysroot_result" done if test "$link_all_deplibs" != no; then # Add the search paths of all dependency libraries for deplib in $dependency_libs; do path= case $deplib in -L*) path="$deplib" ;; *.la) func_resolve_sysroot "$deplib" deplib=$func_resolve_sysroot_result func_dirname "$deplib" "" "." dir=$func_dirname_result # We need an absolute path. case $dir in [\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;; *) absdir=`cd "$dir" && pwd` if test -z "$absdir"; then func_warning "cannot determine absolute directory name of \`$dir'" absdir="$dir" fi ;; esac if $GREP "^installed=no" $deplib > /dev/null; then case $host in *-*-darwin*) depdepl= eval deplibrary_names=`${SED} -n -e 's/^library_names=\(.*\)$/\1/p' $deplib` if test -n "$deplibrary_names" ; then for tmp in $deplibrary_names ; do depdepl=$tmp done if test -f "$absdir/$objdir/$depdepl" ; then depdepl="$absdir/$objdir/$depdepl" darwin_install_name=`${OTOOL} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` if test -z "$darwin_install_name"; then darwin_install_name=`${OTOOL64} -L $depdepl | awk '{if (NR == 2) {print $1;exit}}'` fi func_append compiler_flags " ${wl}-dylib_file ${wl}${darwin_install_name}:${depdepl}" func_append linker_flags " -dylib_file ${darwin_install_name}:${depdepl}" path= fi fi ;; *) path="-L$absdir/$objdir" ;; esac else eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` test -z "$libdir" && \ func_fatal_error "\`$deplib' is not a valid libtool archive" test "$absdir" != "$libdir" && \ func_warning "\`$deplib' seems to be moved" path="-L$absdir" fi ;; esac case " $deplibs " in *" $path "*) ;; *) deplibs="$path $deplibs" ;; esac done fi # link_all_deplibs != no fi # linkmode = lib done # for deplib in $libs if test "$pass" = link; then if test "$linkmode" = "prog"; then compile_deplibs="$new_inherited_linker_flags $compile_deplibs" finalize_deplibs="$new_inherited_linker_flags $finalize_deplibs" else compiler_flags="$compiler_flags "`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` fi fi dependency_libs="$newdependency_libs" if test "$pass" = dlpreopen; then # Link the dlpreopened libraries before other libraries for deplib in $save_deplibs; do deplibs="$deplib $deplibs" done fi if test "$pass" != dlopen; then if test "$pass" != conv; then # Make sure lib_search_path contains only unique directories. lib_search_path= for dir in $newlib_search_path; do case "$lib_search_path " in *" $dir "*) ;; *) func_append lib_search_path " $dir" ;; esac done newlib_search_path= fi if test "$linkmode,$pass" != "prog,link"; then vars="deplibs" else vars="compile_deplibs finalize_deplibs" fi for var in $vars dependency_libs; do # Add libraries to $var in reverse order eval tmp_libs=\"\$$var\" new_libs= for deplib in $tmp_libs; do # FIXME: Pedantically, this is the right thing to do, so # that some nasty dependency loop isn't accidentally # broken: #new_libs="$deplib $new_libs" # Pragmatically, this seems to cause very few problems in # practice: case $deplib in -L*) new_libs="$deplib $new_libs" ;; -R*) ;; *) # And here is the reason: when a library appears more # than once as an explicit dependence of a library, or # is implicitly linked in more than once by the # compiler, it is considered special, and multiple # occurrences thereof are not removed. Compare this # with having the same library being listed as a # dependency of multiple other libraries: in this case, # we know (pedantically, we assume) the library does not # need to be listed more than once, so we keep only the # last copy. This is not always right, but it is rare # enough that we require users that really mean to play # such unportable linking tricks to link the library # using -Wl,-lname, so that libtool does not consider it # for duplicate removal. case " $specialdeplibs " in *" $deplib "*) new_libs="$deplib $new_libs" ;; *) case " $new_libs " in *" $deplib "*) ;; *) new_libs="$deplib $new_libs" ;; esac ;; esac ;; esac done tmp_libs= for deplib in $new_libs; do case $deplib in -L*) case " $tmp_libs " in *" $deplib "*) ;; *) func_append tmp_libs " $deplib" ;; esac ;; *) func_append tmp_libs " $deplib" ;; esac done eval $var=\"$tmp_libs\" done # for var fi # Last step: remove runtime libs from dependency_libs # (they stay in deplibs) tmp_libs= for i in $dependency_libs ; do case " $predeps $postdeps $compiler_lib_search_path " in *" $i "*) i="" ;; esac if test -n "$i" ; then func_append tmp_libs " $i" fi done dependency_libs=$tmp_libs done # for pass if test "$linkmode" = prog; then dlfiles="$newdlfiles" fi if test "$linkmode" = prog || test "$linkmode" = lib; then dlprefiles="$newdlprefiles" fi case $linkmode in oldlib) if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then func_warning "\`-dlopen' is ignored for archives" fi case " $deplibs" in *\ -l* | *\ -L*) func_warning "\`-l' and \`-L' are ignored for archives" ;; esac test -n "$rpath" && \ func_warning "\`-rpath' is ignored for archives" test -n "$xrpath" && \ func_warning "\`-R' is ignored for archives" test -n "$vinfo" && \ func_warning "\`-version-info/-version-number' is ignored for archives" test -n "$release" && \ func_warning "\`-release' is ignored for archives" test -n "$export_symbols$export_symbols_regex" && \ func_warning "\`-export-symbols' is ignored for archives" # Now set the variables for building old libraries. build_libtool_libs=no oldlibs="$output" func_append objs "$old_deplibs" ;; lib) # Make sure we only generate libraries of the form `libNAME.la'. case $outputname in lib*) func_stripname 'lib' '.la' "$outputname" name=$func_stripname_result eval shared_ext=\"$shrext_cmds\" eval libname=\"$libname_spec\" ;; *) test "$module" = no && \ func_fatal_help "libtool library \`$output' must begin with \`lib'" if test "$need_lib_prefix" != no; then # Add the "lib" prefix for modules if required func_stripname '' '.la' "$outputname" name=$func_stripname_result eval shared_ext=\"$shrext_cmds\" eval libname=\"$libname_spec\" else func_stripname '' '.la' "$outputname" libname=$func_stripname_result fi ;; esac if test -n "$objs"; then if test "$deplibs_check_method" != pass_all; then func_fatal_error "cannot build libtool library \`$output' from non-libtool objects on this host:$objs" else echo $ECHO "*** Warning: Linking the shared library $output against the non-libtool" $ECHO "*** objects $objs is not portable!" func_append libobjs " $objs" fi fi test "$dlself" != no && \ func_warning "\`-dlopen self' is ignored for libtool libraries" set dummy $rpath shift test "$#" -gt 1 && \ func_warning "ignoring multiple \`-rpath's for a libtool library" install_libdir="$1" oldlibs= if test -z "$rpath"; then if test "$build_libtool_libs" = yes; then # Building a libtool convenience library. # Some compilers have problems with a `.al' extension so # convenience libraries should have the same extension an # archive normally would. oldlibs="$output_objdir/$libname.$libext $oldlibs" build_libtool_libs=convenience build_old_libs=yes fi test -n "$vinfo" && \ func_warning "\`-version-info/-version-number' is ignored for convenience libraries" test -n "$release" && \ func_warning "\`-release' is ignored for convenience libraries" else # Parse the version information argument. save_ifs="$IFS"; IFS=':' set dummy $vinfo 0 0 0 shift IFS="$save_ifs" test -n "$7" && \ func_fatal_help "too many parameters to \`-version-info'" # convert absolute version numbers to libtool ages # this retains compatibility with .la files and attempts # to make the code below a bit more comprehensible case $vinfo_number in yes) number_major="$1" number_minor="$2" number_revision="$3" # # There are really only two kinds -- those that # use the current revision as the major version # and those that subtract age and use age as # a minor version. But, then there is irix # which has an extra 1 added just for fun # case $version_type in # correct linux to gnu/linux during the next big refactor darwin|linux|osf|windows|none) func_arith $number_major + $number_minor current=$func_arith_result age="$number_minor" revision="$number_revision" ;; freebsd-aout|freebsd-elf|qnx|sunos) current="$number_major" revision="$number_minor" age="0" ;; irix|nonstopux) func_arith $number_major + $number_minor current=$func_arith_result age="$number_minor" revision="$number_minor" lt_irix_increment=no ;; *) func_fatal_configuration "$modename: unknown library version type \`$version_type'" ;; esac ;; no) current="$1" revision="$2" age="$3" ;; esac # Check that each of the things are valid numbers. case $current in 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; *) func_error "CURRENT \`$current' must be a nonnegative integer" func_fatal_error "\`$vinfo' is not valid version information" ;; esac case $revision in 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; *) func_error "REVISION \`$revision' must be a nonnegative integer" func_fatal_error "\`$vinfo' is not valid version information" ;; esac case $age in 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; *) func_error "AGE \`$age' must be a nonnegative integer" func_fatal_error "\`$vinfo' is not valid version information" ;; esac if test "$age" -gt "$current"; then func_error "AGE \`$age' is greater than the current interface number \`$current'" func_fatal_error "\`$vinfo' is not valid version information" fi # Calculate the version variables. major= versuffix= verstring= case $version_type in none) ;; darwin) # Like Linux, but with the current version available in # verstring for coding it into the library header func_arith $current - $age major=.$func_arith_result versuffix="$major.$age.$revision" # Darwin ld doesn't like 0 for these options... func_arith $current + 1 minor_current=$func_arith_result xlcverstring="${wl}-compatibility_version ${wl}$minor_current ${wl}-current_version ${wl}$minor_current.$revision" verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" ;; freebsd-aout) major=".$current" versuffix=".$current.$revision"; ;; freebsd-elf) major=".$current" versuffix=".$current" ;; irix | nonstopux) if test "X$lt_irix_increment" = "Xno"; then func_arith $current - $age else func_arith $current - $age + 1 fi major=$func_arith_result case $version_type in nonstopux) verstring_prefix=nonstopux ;; *) verstring_prefix=sgi ;; esac verstring="$verstring_prefix$major.$revision" # Add in all the interfaces that we are compatible with. loop=$revision while test "$loop" -ne 0; do func_arith $revision - $loop iface=$func_arith_result func_arith $loop - 1 loop=$func_arith_result verstring="$verstring_prefix$major.$iface:$verstring" done # Before this point, $major must not contain `.'. major=.$major versuffix="$major.$revision" ;; linux) # correct to gnu/linux during the next big refactor func_arith $current - $age major=.$func_arith_result versuffix="$major.$age.$revision" ;; osf) func_arith $current - $age major=.$func_arith_result versuffix=".$current.$age.$revision" verstring="$current.$age.$revision" # Add in all the interfaces that we are compatible with. loop=$age while test "$loop" -ne 0; do func_arith $current - $loop iface=$func_arith_result func_arith $loop - 1 loop=$func_arith_result verstring="$verstring:${iface}.0" done # Make executables depend on our current version. func_append verstring ":${current}.0" ;; qnx) major=".$current" versuffix=".$current" ;; sunos) major=".$current" versuffix=".$current.$revision" ;; windows) # Use '-' rather than '.', since we only want one # extension on DOS 8.3 filesystems. func_arith $current - $age major=$func_arith_result versuffix="-$major" ;; *) func_fatal_configuration "unknown library version type \`$version_type'" ;; esac # Clear the version info if we defaulted, and they specified a release. if test -z "$vinfo" && test -n "$release"; then major= case $version_type in darwin) # we can't check for "0.0" in archive_cmds due to quoting # problems, so we reset it completely verstring= ;; *) verstring="0.0" ;; esac if test "$need_version" = no; then versuffix= else versuffix=".0.0" fi fi # Remove version info from name if versioning should be avoided if test "$avoid_version" = yes && test "$need_version" = no; then major= versuffix= verstring="" fi # Check to see if the archive will have undefined symbols. if test "$allow_undefined" = yes; then if test "$allow_undefined_flag" = unsupported; then func_warning "undefined symbols not allowed in $host shared libraries" build_libtool_libs=no build_old_libs=yes fi else # Don't allow undefined symbols. allow_undefined_flag="$no_undefined_flag" fi fi func_generate_dlsyms "$libname" "$libname" "yes" func_append libobjs " $symfileobj" test "X$libobjs" = "X " && libobjs= if test "$opt_mode" != relink; then # Remove our outputs, but don't remove object files since they # may have been created when compiling PIC objects. removelist= tempremovelist=`$ECHO "$output_objdir/*"` for p in $tempremovelist; do case $p in *.$objext | *.gcno) ;; $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/${libname}${release}.*) if test "X$precious_files_regex" != "X"; then if $ECHO "$p" | $EGREP -e "$precious_files_regex" >/dev/null 2>&1 then continue fi fi func_append removelist " $p" ;; *) ;; esac done test -n "$removelist" && \ func_show_eval "${RM}r \$removelist" fi # Now set the variables for building old libraries. if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then func_append oldlibs " $output_objdir/$libname.$libext" # Transform .lo files to .o files. oldobjs="$objs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; $lo2o" | $NL2SP` fi # Eliminate all temporary directories. #for path in $notinst_path; do # lib_search_path=`$ECHO "$lib_search_path " | $SED "s% $path % %g"` # deplibs=`$ECHO "$deplibs " | $SED "s% -L$path % %g"` # dependency_libs=`$ECHO "$dependency_libs " | $SED "s% -L$path % %g"` #done if test -n "$xrpath"; then # If the user specified any rpath flags, then add them. temp_xrpath= for libdir in $xrpath; do func_replace_sysroot "$libdir" func_append temp_xrpath " -R$func_replace_sysroot_result" case "$finalize_rpath " in *" $libdir "*) ;; *) func_append finalize_rpath " $libdir" ;; esac done if test "$hardcode_into_libs" != yes || test "$build_old_libs" = yes; then dependency_libs="$temp_xrpath $dependency_libs" fi fi # Make sure dlfiles contains only unique files that won't be dlpreopened old_dlfiles="$dlfiles" dlfiles= for lib in $old_dlfiles; do case " $dlprefiles $dlfiles " in *" $lib "*) ;; *) func_append dlfiles " $lib" ;; esac done # Make sure dlprefiles contains only unique files old_dlprefiles="$dlprefiles" dlprefiles= for lib in $old_dlprefiles; do case "$dlprefiles " in *" $lib "*) ;; *) func_append dlprefiles " $lib" ;; esac done if test "$build_libtool_libs" = yes; then if test -n "$rpath"; then case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos* | *-cegcc* | *-*-haiku*) # these systems don't actually have a c library (as such)! ;; *-*-rhapsody* | *-*-darwin1.[012]) # Rhapsody C library is in the System framework func_append deplibs " System.ltframework" ;; *-*-netbsd*) # Don't link with libc until the a.out ld.so is fixed. ;; *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) # Do not include libc due to us having libc/libc_r. ;; *-*-sco3.2v5* | *-*-sco5v6*) # Causes problems with __ctype ;; *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) # Compiler inserts libc in the correct place for threads to work ;; *) # Add libc to deplibs on all other systems if necessary. if test "$build_libtool_need_lc" = "yes"; then func_append deplibs " -lc" fi ;; esac fi # Transform deplibs into only deplibs that can be linked in shared. name_save=$name libname_save=$libname release_save=$release versuffix_save=$versuffix major_save=$major # I'm not sure if I'm treating the release correctly. I think # release should show up in the -l (ie -lgmp5) so we don't want to # add it in twice. Is that correct? release="" versuffix="" major="" newdeplibs= droppeddeps=no case $deplibs_check_method in pass_all) # Don't check for shared/static. Everything works. # This might be a little naive. We might want to check # whether the library exists or not. But this is on # osf3 & osf4 and I'm not really sure... Just # implementing what was already the behavior. newdeplibs=$deplibs ;; test_compile) # This code stresses the "libraries are programs" paradigm to its # limits. Maybe even breaks it. We compile a program, linking it # against the deplibs as a proxy for the library. Then we can check # whether they linked in statically or dynamically with ldd. $opt_dry_run || $RM conftest.c cat > conftest.c </dev/null` $nocaseglob else potential_libs=`ls $i/$libnameglob[.-]* 2>/dev/null` fi for potent_lib in $potential_libs; do # Follow soft links. if ls -lLd "$potent_lib" 2>/dev/null | $GREP " -> " >/dev/null; then continue fi # The statement above tries to avoid entering an # endless loop below, in case of cyclic links. # We might still enter an endless loop, since a link # loop can be closed while we follow links, # but so what? potlib="$potent_lib" while test -h "$potlib" 2>/dev/null; do potliblink=`ls -ld $potlib | ${SED} 's/.* -> //'` case $potliblink in [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";; *) potlib=`$ECHO "$potlib" | $SED 's,[^/]*$,,'`"$potliblink";; esac done if eval $file_magic_cmd \"\$potlib\" 2>/dev/null | $SED -e 10q | $EGREP "$file_magic_regex" > /dev/null; then func_append newdeplibs " $a_deplib" a_deplib="" break 2 fi done done fi if test -n "$a_deplib" ; then droppeddeps=yes echo $ECHO "*** Warning: linker path does not have real file for library $a_deplib." echo "*** I have the capability to make that library automatically link in when" echo "*** you link to this library. But I can only do this if you have a" echo "*** shared version of the library, which you do not appear to have" echo "*** because I did check the linker path looking for a file starting" if test -z "$potlib" ; then $ECHO "*** with $libname but no candidates were found. (...for file magic test)" else $ECHO "*** with $libname and none of the candidates passed a file format test" $ECHO "*** using a file magic. Last file checked: $potlib" fi fi ;; *) # Add a -L argument. func_append newdeplibs " $a_deplib" ;; esac done # Gone through all deplibs. ;; match_pattern*) set dummy $deplibs_check_method; shift match_pattern_regex=`expr "$deplibs_check_method" : "$1 \(.*\)"` for a_deplib in $deplibs; do case $a_deplib in -l*) func_stripname -l '' "$a_deplib" name=$func_stripname_result if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then case " $predeps $postdeps " in *" $a_deplib "*) func_append newdeplibs " $a_deplib" a_deplib="" ;; esac fi if test -n "$a_deplib" ; then libname=`eval "\\$ECHO \"$libname_spec\""` for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do potential_libs=`ls $i/$libname[.-]* 2>/dev/null` for potent_lib in $potential_libs; do potlib="$potent_lib" # see symlink-check above in file_magic test if eval "\$ECHO \"$potent_lib\"" 2>/dev/null | $SED 10q | \ $EGREP "$match_pattern_regex" > /dev/null; then func_append newdeplibs " $a_deplib" a_deplib="" break 2 fi done done fi if test -n "$a_deplib" ; then droppeddeps=yes echo $ECHO "*** Warning: linker path does not have real file for library $a_deplib." echo "*** I have the capability to make that library automatically link in when" echo "*** you link to this library. But I can only do this if you have a" echo "*** shared version of the library, which you do not appear to have" echo "*** because I did check the linker path looking for a file starting" if test -z "$potlib" ; then $ECHO "*** with $libname but no candidates were found. (...for regex pattern test)" else $ECHO "*** with $libname and none of the candidates passed a file format test" $ECHO "*** using a regex pattern. Last file checked: $potlib" fi fi ;; *) # Add a -L argument. func_append newdeplibs " $a_deplib" ;; esac done # Gone through all deplibs. ;; none | unknown | *) newdeplibs="" tmp_deplibs=`$ECHO " $deplibs" | $SED 's/ -lc$//; s/ -[LR][^ ]*//g'` if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then for i in $predeps $postdeps ; do # can't use Xsed below, because $i might contain '/' tmp_deplibs=`$ECHO " $tmp_deplibs" | $SED "s,$i,,"` done fi case $tmp_deplibs in *[!\ \ ]*) echo if test "X$deplibs_check_method" = "Xnone"; then echo "*** Warning: inter-library dependencies are not supported in this platform." else echo "*** Warning: inter-library dependencies are not known to be supported." fi echo "*** All declared inter-library dependencies are being dropped." droppeddeps=yes ;; esac ;; esac versuffix=$versuffix_save major=$major_save release=$release_save libname=$libname_save name=$name_save case $host in *-*-rhapsody* | *-*-darwin1.[012]) # On Rhapsody replace the C library with the System framework newdeplibs=`$ECHO " $newdeplibs" | $SED 's/ -lc / System.ltframework /'` ;; esac if test "$droppeddeps" = yes; then if test "$module" = yes; then echo echo "*** Warning: libtool could not satisfy all declared inter-library" $ECHO "*** dependencies of module $libname. Therefore, libtool will create" echo "*** a static module, that should work as long as the dlopening" echo "*** application is linked with the -dlopen flag." if test -z "$global_symbol_pipe"; then echo echo "*** However, this would only work if libtool was able to extract symbol" echo "*** lists from a program, using \`nm' or equivalent, but libtool could" echo "*** not find such a program. So, this module is probably useless." echo "*** \`nm' from GNU binutils and a full rebuild may help." fi if test "$build_old_libs" = no; then oldlibs="$output_objdir/$libname.$libext" build_libtool_libs=module build_old_libs=yes else build_libtool_libs=no fi else echo "*** The inter-library dependencies that have been dropped here will be" echo "*** automatically added whenever a program is linked with this library" echo "*** or is declared to -dlopen it." if test "$allow_undefined" = no; then echo echo "*** Since this library must not contain undefined symbols," echo "*** because either the platform does not support them or" echo "*** it was explicitly requested with -no-undefined," echo "*** libtool will only create a static version of it." if test "$build_old_libs" = no; then oldlibs="$output_objdir/$libname.$libext" build_libtool_libs=module build_old_libs=yes else build_libtool_libs=no fi fi fi fi # Done checking deplibs! deplibs=$newdeplibs fi # Time to change all our "foo.ltframework" stuff back to "-framework foo" case $host in *-*-darwin*) newdeplibs=`$ECHO " $newdeplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` new_inherited_linker_flags=`$ECHO " $new_inherited_linker_flags" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` deplibs=`$ECHO " $deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` ;; esac # move library search paths that coincide with paths to not yet # installed libraries to the beginning of the library search list new_libs= for path in $notinst_path; do case " $new_libs " in *" -L$path/$objdir "*) ;; *) case " $deplibs " in *" -L$path/$objdir "*) func_append new_libs " -L$path/$objdir" ;; esac ;; esac done for deplib in $deplibs; do case $deplib in -L*) case " $new_libs " in *" $deplib "*) ;; *) func_append new_libs " $deplib" ;; esac ;; *) func_append new_libs " $deplib" ;; esac done deplibs="$new_libs" # All the library-specific variables (install_libdir is set above). library_names= old_library= dlname= # Test again, we may have decided not to build it any more if test "$build_libtool_libs" = yes; then # Remove ${wl} instances when linking with ld. # FIXME: should test the right _cmds variable. case $archive_cmds in *\$LD\ *) wl= ;; esac if test "$hardcode_into_libs" = yes; then # Hardcode the library paths hardcode_libdirs= dep_rpath= rpath="$finalize_rpath" test "$opt_mode" != relink && rpath="$compile_rpath$rpath" for libdir in $rpath; do if test -n "$hardcode_libdir_flag_spec"; then if test -n "$hardcode_libdir_separator"; then func_replace_sysroot "$libdir" libdir=$func_replace_sysroot_result if test -z "$hardcode_libdirs"; then hardcode_libdirs="$libdir" else # Just accumulate the unique libdirs. case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) ;; *) func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" ;; esac fi else eval flag=\"$hardcode_libdir_flag_spec\" func_append dep_rpath " $flag" fi elif test -n "$runpath_var"; then case "$perm_rpath " in *" $libdir "*) ;; *) func_append perm_rpath " $libdir" ;; esac fi done # Substitute the hardcoded libdirs into the rpath. if test -n "$hardcode_libdir_separator" && test -n "$hardcode_libdirs"; then libdir="$hardcode_libdirs" eval "dep_rpath=\"$hardcode_libdir_flag_spec\"" fi if test -n "$runpath_var" && test -n "$perm_rpath"; then # We should set the runpath_var. rpath= for dir in $perm_rpath; do func_append rpath "$dir:" done eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var" fi test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs" fi shlibpath="$finalize_shlibpath" test "$opt_mode" != relink && shlibpath="$compile_shlibpath$shlibpath" if test -n "$shlibpath"; then eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var" fi # Get the real and link names of the library. eval shared_ext=\"$shrext_cmds\" eval library_names=\"$library_names_spec\" set dummy $library_names shift realname="$1" shift if test -n "$soname_spec"; then eval soname=\"$soname_spec\" else soname="$realname" fi if test -z "$dlname"; then dlname=$soname fi lib="$output_objdir/$realname" linknames= for link do func_append linknames " $link" done # Use standard objects if they are pic test -z "$pic_flag" && libobjs=`$ECHO "$libobjs" | $SP2NL | $SED "$lo2o" | $NL2SP` test "X$libobjs" = "X " && libobjs= delfiles= if test -n "$export_symbols" && test -n "$include_expsyms"; then $opt_dry_run || cp "$export_symbols" "$output_objdir/$libname.uexp" export_symbols="$output_objdir/$libname.uexp" func_append delfiles " $export_symbols" fi orig_export_symbols= case $host_os in cygwin* | mingw* | cegcc*) if test -n "$export_symbols" && test -z "$export_symbols_regex"; then # exporting using user supplied symfile if test "x`$SED 1q $export_symbols`" != xEXPORTS; then # and it's NOT already a .def file. Must figure out # which of the given symbols are data symbols and tag # them as such. So, trigger use of export_symbols_cmds. # export_symbols gets reassigned inside the "prepare # the list of exported symbols" if statement, so the # include_expsyms logic still works. orig_export_symbols="$export_symbols" export_symbols= always_export_symbols=yes fi fi ;; esac # Prepare the list of exported symbols if test -z "$export_symbols"; then if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then func_verbose "generating symbol list for \`$libname.la'" export_symbols="$output_objdir/$libname.exp" $opt_dry_run || $RM $export_symbols cmds=$export_symbols_cmds save_ifs="$IFS"; IFS='~' for cmd1 in $cmds; do IFS="$save_ifs" # Take the normal branch if the nm_file_list_spec branch # doesn't work or if tool conversion is not needed. case $nm_file_list_spec~$to_tool_file_cmd in *~func_convert_file_noop | *~func_convert_file_msys_to_w32 | ~*) try_normal_branch=yes eval cmd=\"$cmd1\" func_len " $cmd" len=$func_len_result ;; *) try_normal_branch=no ;; esac if test "$try_normal_branch" = yes \ && { test "$len" -lt "$max_cmd_len" \ || test "$max_cmd_len" -le -1; } then func_show_eval "$cmd" 'exit $?' skipped_export=false elif test -n "$nm_file_list_spec"; then func_basename "$output" output_la=$func_basename_result save_libobjs=$libobjs save_output=$output output=${output_objdir}/${output_la}.nm func_to_tool_file "$output" libobjs=$nm_file_list_spec$func_to_tool_file_result func_append delfiles " $output" func_verbose "creating $NM input file list: $output" for obj in $save_libobjs; do func_to_tool_file "$obj" $ECHO "$func_to_tool_file_result" done > "$output" eval cmd=\"$cmd1\" func_show_eval "$cmd" 'exit $?' output=$save_output libobjs=$save_libobjs skipped_export=false else # The command line is too long to execute in one step. func_verbose "using reloadable object file for export list..." skipped_export=: # Break out early, otherwise skipped_export may be # set to false by a later but shorter cmd. break fi done IFS="$save_ifs" if test -n "$export_symbols_regex" && test "X$skipped_export" != "X:"; then func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' func_show_eval '$MV "${export_symbols}T" "$export_symbols"' fi fi fi if test -n "$export_symbols" && test -n "$include_expsyms"; then tmp_export_symbols="$export_symbols" test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols" $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' fi if test "X$skipped_export" != "X:" && test -n "$orig_export_symbols"; then # The given exports_symbols file has to be filtered, so filter it. func_verbose "filter symbol list for \`$libname.la' to tag DATA exports" # FIXME: $output_objdir/$libname.filter potentially contains lots of # 's' commands which not all seds can handle. GNU sed should be fine # though. Also, the filter scales superlinearly with the number of # global variables. join(1) would be nice here, but unfortunately # isn't a blessed tool. $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter func_append delfiles " $export_symbols $output_objdir/$libname.filter" export_symbols=$output_objdir/$libname.def $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols fi tmp_deplibs= for test_deplib in $deplibs; do case " $convenience " in *" $test_deplib "*) ;; *) func_append tmp_deplibs " $test_deplib" ;; esac done deplibs="$tmp_deplibs" if test -n "$convenience"; then if test -n "$whole_archive_flag_spec" && test "$compiler_needs_object" = yes && test -z "$libobjs"; then # extract the archives, so we have objects to list. # TODO: could optimize this to just extract one archive. whole_archive_flag_spec= fi if test -n "$whole_archive_flag_spec"; then save_libobjs=$libobjs eval libobjs=\"\$libobjs $whole_archive_flag_spec\" test "X$libobjs" = "X " && libobjs= else gentop="$output_objdir/${outputname}x" func_append generated " $gentop" func_extract_archives $gentop $convenience func_append libobjs " $func_extract_archives_result" test "X$libobjs" = "X " && libobjs= fi fi if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then eval flag=\"$thread_safe_flag_spec\" func_append linker_flags " $flag" fi # Make a backup of the uninstalled library when relinking if test "$opt_mode" = relink; then $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}U && $MV $realname ${realname}U)' || exit $? fi # Do each of the archive commands. if test "$module" = yes && test -n "$module_cmds" ; then if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then eval test_cmds=\"$module_expsym_cmds\" cmds=$module_expsym_cmds else eval test_cmds=\"$module_cmds\" cmds=$module_cmds fi else if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then eval test_cmds=\"$archive_expsym_cmds\" cmds=$archive_expsym_cmds else eval test_cmds=\"$archive_cmds\" cmds=$archive_cmds fi fi if test "X$skipped_export" != "X:" && func_len " $test_cmds" && len=$func_len_result && test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then : else # The command line is too long to link in one step, link piecewise # or, if using GNU ld and skipped_export is not :, use a linker # script. # Save the value of $output and $libobjs because we want to # use them later. If we have whole_archive_flag_spec, we # want to use save_libobjs as it was before # whole_archive_flag_spec was expanded, because we can't # assume the linker understands whole_archive_flag_spec. # This may have to be revisited, in case too many # convenience libraries get linked in and end up exceeding # the spec. if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then save_libobjs=$libobjs fi save_output=$output func_basename "$output" output_la=$func_basename_result # Clear the reloadable object creation command queue and # initialize k to one. test_cmds= concat_cmds= objlist= last_robj= k=1 if test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "$with_gnu_ld" = yes; then output=${output_objdir}/${output_la}.lnkscript func_verbose "creating GNU ld script: $output" echo 'INPUT (' > $output for obj in $save_libobjs do func_to_tool_file "$obj" $ECHO "$func_to_tool_file_result" >> $output done echo ')' >> $output func_append delfiles " $output" func_to_tool_file "$output" output=$func_to_tool_file_result elif test -n "$save_libobjs" && test "X$skipped_export" != "X:" && test "X$file_list_spec" != X; then output=${output_objdir}/${output_la}.lnk func_verbose "creating linker input file list: $output" : > $output set x $save_libobjs shift firstobj= if test "$compiler_needs_object" = yes; then firstobj="$1 " shift fi for obj do func_to_tool_file "$obj" $ECHO "$func_to_tool_file_result" >> $output done func_append delfiles " $output" func_to_tool_file "$output" output=$firstobj\"$file_list_spec$func_to_tool_file_result\" else if test -n "$save_libobjs"; then func_verbose "creating reloadable object files..." output=$output_objdir/$output_la-${k}.$objext eval test_cmds=\"$reload_cmds\" func_len " $test_cmds" len0=$func_len_result len=$len0 # Loop over the list of objects to be linked. for obj in $save_libobjs do func_len " $obj" func_arith $len + $func_len_result len=$func_arith_result if test "X$objlist" = X || test "$len" -lt "$max_cmd_len"; then func_append objlist " $obj" else # The command $test_cmds is almost too long, add a # command to the queue. if test "$k" -eq 1 ; then # The first file doesn't have a previous command to add. reload_objs=$objlist eval concat_cmds=\"$reload_cmds\" else # All subsequent reloadable object files will link in # the last one created. reload_objs="$objlist $last_robj" eval concat_cmds=\"\$concat_cmds~$reload_cmds~\$RM $last_robj\" fi last_robj=$output_objdir/$output_la-${k}.$objext func_arith $k + 1 k=$func_arith_result output=$output_objdir/$output_la-${k}.$objext objlist=" $obj" func_len " $last_robj" func_arith $len0 + $func_len_result len=$func_arith_result fi done # Handle the remaining objects by creating one last # reloadable object file. All subsequent reloadable object # files will link in the last one created. test -z "$concat_cmds" || concat_cmds=$concat_cmds~ reload_objs="$objlist $last_robj" eval concat_cmds=\"\${concat_cmds}$reload_cmds\" if test -n "$last_robj"; then eval concat_cmds=\"\${concat_cmds}~\$RM $last_robj\" fi func_append delfiles " $output" else output= fi if ${skipped_export-false}; then func_verbose "generating symbol list for \`$libname.la'" export_symbols="$output_objdir/$libname.exp" $opt_dry_run || $RM $export_symbols libobjs=$output # Append the command to create the export file. test -z "$concat_cmds" || concat_cmds=$concat_cmds~ eval concat_cmds=\"\$concat_cmds$export_symbols_cmds\" if test -n "$last_robj"; then eval concat_cmds=\"\$concat_cmds~\$RM $last_robj\" fi fi test -n "$save_libobjs" && func_verbose "creating a temporary reloadable object file: $output" # Loop through the commands generated above and execute them. save_ifs="$IFS"; IFS='~' for cmd in $concat_cmds; do IFS="$save_ifs" $opt_silent || { func_quote_for_expand "$cmd" eval "func_echo $func_quote_for_expand_result" } $opt_dry_run || eval "$cmd" || { lt_exit=$? # Restore the uninstalled library and exit if test "$opt_mode" = relink; then ( cd "$output_objdir" && \ $RM "${realname}T" && \ $MV "${realname}U" "$realname" ) fi exit $lt_exit } done IFS="$save_ifs" if test -n "$export_symbols_regex" && ${skipped_export-false}; then func_show_eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' func_show_eval '$MV "${export_symbols}T" "$export_symbols"' fi fi if ${skipped_export-false}; then if test -n "$export_symbols" && test -n "$include_expsyms"; then tmp_export_symbols="$export_symbols" test -n "$orig_export_symbols" && tmp_export_symbols="$orig_export_symbols" $opt_dry_run || eval '$ECHO "$include_expsyms" | $SP2NL >> "$tmp_export_symbols"' fi if test -n "$orig_export_symbols"; then # The given exports_symbols file has to be filtered, so filter it. func_verbose "filter symbol list for \`$libname.la' to tag DATA exports" # FIXME: $output_objdir/$libname.filter potentially contains lots of # 's' commands which not all seds can handle. GNU sed should be fine # though. Also, the filter scales superlinearly with the number of # global variables. join(1) would be nice here, but unfortunately # isn't a blessed tool. $opt_dry_run || $SED -e '/[ ,]DATA/!d;s,\(.*\)\([ \,].*\),s|^\1$|\1\2|,' < $export_symbols > $output_objdir/$libname.filter func_append delfiles " $export_symbols $output_objdir/$libname.filter" export_symbols=$output_objdir/$libname.def $opt_dry_run || $SED -f $output_objdir/$libname.filter < $orig_export_symbols > $export_symbols fi fi libobjs=$output # Restore the value of output. output=$save_output if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then eval libobjs=\"\$libobjs $whole_archive_flag_spec\" test "X$libobjs" = "X " && libobjs= fi # Expand the library linking commands again to reset the # value of $libobjs for piecewise linking. # Do each of the archive commands. if test "$module" = yes && test -n "$module_cmds" ; then if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then cmds=$module_expsym_cmds else cmds=$module_cmds fi else if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then cmds=$archive_expsym_cmds else cmds=$archive_cmds fi fi fi if test -n "$delfiles"; then # Append the command to remove temporary files to $cmds. eval cmds=\"\$cmds~\$RM $delfiles\" fi # Add any objects from preloaded convenience libraries if test -n "$dlprefiles"; then gentop="$output_objdir/${outputname}x" func_append generated " $gentop" func_extract_archives $gentop $dlprefiles func_append libobjs " $func_extract_archives_result" test "X$libobjs" = "X " && libobjs= fi save_ifs="$IFS"; IFS='~' for cmd in $cmds; do IFS="$save_ifs" eval cmd=\"$cmd\" $opt_silent || { func_quote_for_expand "$cmd" eval "func_echo $func_quote_for_expand_result" } $opt_dry_run || eval "$cmd" || { lt_exit=$? # Restore the uninstalled library and exit if test "$opt_mode" = relink; then ( cd "$output_objdir" && \ $RM "${realname}T" && \ $MV "${realname}U" "$realname" ) fi exit $lt_exit } done IFS="$save_ifs" # Restore the uninstalled library and exit if test "$opt_mode" = relink; then $opt_dry_run || eval '(cd $output_objdir && $RM ${realname}T && $MV $realname ${realname}T && $MV ${realname}U $realname)' || exit $? if test -n "$convenience"; then if test -z "$whole_archive_flag_spec"; then func_show_eval '${RM}r "$gentop"' fi fi exit $EXIT_SUCCESS fi # Create links to the real library. for linkname in $linknames; do if test "$realname" != "$linkname"; then func_show_eval '(cd "$output_objdir" && $RM "$linkname" && $LN_S "$realname" "$linkname")' 'exit $?' fi done # If -module or -export-dynamic was specified, set the dlname. if test "$module" = yes || test "$export_dynamic" = yes; then # On all known operating systems, these are identical. dlname="$soname" fi fi ;; obj) if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then func_warning "\`-dlopen' is ignored for objects" fi case " $deplibs" in *\ -l* | *\ -L*) func_warning "\`-l' and \`-L' are ignored for objects" ;; esac test -n "$rpath" && \ func_warning "\`-rpath' is ignored for objects" test -n "$xrpath" && \ func_warning "\`-R' is ignored for objects" test -n "$vinfo" && \ func_warning "\`-version-info' is ignored for objects" test -n "$release" && \ func_warning "\`-release' is ignored for objects" case $output in *.lo) test -n "$objs$old_deplibs" && \ func_fatal_error "cannot build library object \`$output' from non-libtool objects" libobj=$output func_lo2o "$libobj" obj=$func_lo2o_result ;; *) libobj= obj="$output" ;; esac # Delete the old objects. $opt_dry_run || $RM $obj $libobj # Objects from convenience libraries. This assumes # single-version convenience libraries. Whenever we create # different ones for PIC/non-PIC, this we'll have to duplicate # the extraction. reload_conv_objs= gentop= # reload_cmds runs $LD directly, so let us get rid of # -Wl from whole_archive_flag_spec and hope we can get by with # turning comma into space.. wl= if test -n "$convenience"; then if test -n "$whole_archive_flag_spec"; then eval tmp_whole_archive_flags=\"$whole_archive_flag_spec\" reload_conv_objs=$reload_objs\ `$ECHO "$tmp_whole_archive_flags" | $SED 's|,| |g'` else gentop="$output_objdir/${obj}x" func_append generated " $gentop" func_extract_archives $gentop $convenience reload_conv_objs="$reload_objs $func_extract_archives_result" fi fi # If we're not building shared, we need to use non_pic_objs test "$build_libtool_libs" != yes && libobjs="$non_pic_objects" # Create the old-style object. reload_objs="$objs$old_deplibs "`$ECHO "$libobjs" | $SP2NL | $SED "/\.${libext}$/d; /\.lib$/d; $lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test output="$obj" func_execute_cmds "$reload_cmds" 'exit $?' # Exit if we aren't doing a library object file. if test -z "$libobj"; then if test -n "$gentop"; then func_show_eval '${RM}r "$gentop"' fi exit $EXIT_SUCCESS fi if test "$build_libtool_libs" != yes; then if test -n "$gentop"; then func_show_eval '${RM}r "$gentop"' fi # Create an invalid libtool object if no PIC, so that we don't # accidentally link it into a program. # $show "echo timestamp > $libobj" # $opt_dry_run || eval "echo timestamp > $libobj" || exit $? exit $EXIT_SUCCESS fi if test -n "$pic_flag" || test "$pic_mode" != default; then # Only do commands if we really have different PIC objects. reload_objs="$libobjs $reload_conv_objs" output="$libobj" func_execute_cmds "$reload_cmds" 'exit $?' fi if test -n "$gentop"; then func_show_eval '${RM}r "$gentop"' fi exit $EXIT_SUCCESS ;; prog) case $host in *cygwin*) func_stripname '' '.exe' "$output" output=$func_stripname_result.exe;; esac test -n "$vinfo" && \ func_warning "\`-version-info' is ignored for programs" test -n "$release" && \ func_warning "\`-release' is ignored for programs" test "$preload" = yes \ && test "$dlopen_support" = unknown \ && test "$dlopen_self" = unknown \ && test "$dlopen_self_static" = unknown && \ func_warning "\`LT_INIT([dlopen])' not used. Assuming no dlopen support." case $host in *-*-rhapsody* | *-*-darwin1.[012]) # On Rhapsody replace the C library is the System framework compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's/ -lc / System.ltframework /'` finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's/ -lc / System.ltframework /'` ;; esac case $host in *-*-darwin*) # Don't allow lazy linking, it breaks C++ global constructors # But is supposedly fixed on 10.4 or later (yay!). if test "$tagname" = CXX ; then case ${MACOSX_DEPLOYMENT_TARGET-10.0} in 10.[0123]) func_append compile_command " ${wl}-bind_at_load" func_append finalize_command " ${wl}-bind_at_load" ;; esac fi # Time to change all our "foo.ltframework" stuff back to "-framework foo" compile_deplibs=`$ECHO " $compile_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` finalize_deplibs=`$ECHO " $finalize_deplibs" | $SED 's% \([^ $]*\).ltframework% -framework \1%g'` ;; esac # move library search paths that coincide with paths to not yet # installed libraries to the beginning of the library search list new_libs= for path in $notinst_path; do case " $new_libs " in *" -L$path/$objdir "*) ;; *) case " $compile_deplibs " in *" -L$path/$objdir "*) func_append new_libs " -L$path/$objdir" ;; esac ;; esac done for deplib in $compile_deplibs; do case $deplib in -L*) case " $new_libs " in *" $deplib "*) ;; *) func_append new_libs " $deplib" ;; esac ;; *) func_append new_libs " $deplib" ;; esac done compile_deplibs="$new_libs" func_append compile_command " $compile_deplibs" func_append finalize_command " $finalize_deplibs" if test -n "$rpath$xrpath"; then # If the user specified any rpath flags, then add them. for libdir in $rpath $xrpath; do # This is the magic to use -rpath. case "$finalize_rpath " in *" $libdir "*) ;; *) func_append finalize_rpath " $libdir" ;; esac done fi # Now hardcode the library paths rpath= hardcode_libdirs= for libdir in $compile_rpath $finalize_rpath; do if test -n "$hardcode_libdir_flag_spec"; then if test -n "$hardcode_libdir_separator"; then if test -z "$hardcode_libdirs"; then hardcode_libdirs="$libdir" else # Just accumulate the unique libdirs. case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) ;; *) func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" ;; esac fi else eval flag=\"$hardcode_libdir_flag_spec\" func_append rpath " $flag" fi elif test -n "$runpath_var"; then case "$perm_rpath " in *" $libdir "*) ;; *) func_append perm_rpath " $libdir" ;; esac fi case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-cegcc*) testbindir=`${ECHO} "$libdir" | ${SED} -e 's*/lib$*/bin*'` case :$dllsearchpath: in *":$libdir:"*) ;; ::) dllsearchpath=$libdir;; *) func_append dllsearchpath ":$libdir";; esac case :$dllsearchpath: in *":$testbindir:"*) ;; ::) dllsearchpath=$testbindir;; *) func_append dllsearchpath ":$testbindir";; esac ;; esac done # Substitute the hardcoded libdirs into the rpath. if test -n "$hardcode_libdir_separator" && test -n "$hardcode_libdirs"; then libdir="$hardcode_libdirs" eval rpath=\" $hardcode_libdir_flag_spec\" fi compile_rpath="$rpath" rpath= hardcode_libdirs= for libdir in $finalize_rpath; do if test -n "$hardcode_libdir_flag_spec"; then if test -n "$hardcode_libdir_separator"; then if test -z "$hardcode_libdirs"; then hardcode_libdirs="$libdir" else # Just accumulate the unique libdirs. case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) ;; *) func_append hardcode_libdirs "$hardcode_libdir_separator$libdir" ;; esac fi else eval flag=\"$hardcode_libdir_flag_spec\" func_append rpath " $flag" fi elif test -n "$runpath_var"; then case "$finalize_perm_rpath " in *" $libdir "*) ;; *) func_append finalize_perm_rpath " $libdir" ;; esac fi done # Substitute the hardcoded libdirs into the rpath. if test -n "$hardcode_libdir_separator" && test -n "$hardcode_libdirs"; then libdir="$hardcode_libdirs" eval rpath=\" $hardcode_libdir_flag_spec\" fi finalize_rpath="$rpath" if test -n "$libobjs" && test "$build_old_libs" = yes; then # Transform all the library objects into standard objects. compile_command=`$ECHO "$compile_command" | $SP2NL | $SED "$lo2o" | $NL2SP` finalize_command=`$ECHO "$finalize_command" | $SP2NL | $SED "$lo2o" | $NL2SP` fi func_generate_dlsyms "$outputname" "@PROGRAM@" "no" # template prelinking step if test -n "$prelink_cmds"; then func_execute_cmds "$prelink_cmds" 'exit $?' fi wrappers_required=yes case $host in *cegcc* | *mingw32ce*) # Disable wrappers for cegcc and mingw32ce hosts, we are cross compiling anyway. wrappers_required=no ;; *cygwin* | *mingw* ) if test "$build_libtool_libs" != yes; then wrappers_required=no fi ;; *) if test "$need_relink" = no || test "$build_libtool_libs" != yes; then wrappers_required=no fi ;; esac if test "$wrappers_required" = no; then # Replace the output file specification. compile_command=`$ECHO "$compile_command" | $SED 's%@OUTPUT@%'"$output"'%g'` link_command="$compile_command$compile_rpath" # We have no uninstalled library dependencies, so finalize right now. exit_status=0 func_show_eval "$link_command" 'exit_status=$?' if test -n "$postlink_cmds"; then func_to_tool_file "$output" postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` func_execute_cmds "$postlink_cmds" 'exit $?' fi # Delete the generated files. if test -f "$output_objdir/${outputname}S.${objext}"; then func_show_eval '$RM "$output_objdir/${outputname}S.${objext}"' fi exit $exit_status fi if test -n "$compile_shlibpath$finalize_shlibpath"; then compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command" fi if test -n "$finalize_shlibpath"; then finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" fi compile_var= finalize_var= if test -n "$runpath_var"; then if test -n "$perm_rpath"; then # We should set the runpath_var. rpath= for dir in $perm_rpath; do func_append rpath "$dir:" done compile_var="$runpath_var=\"$rpath\$$runpath_var\" " fi if test -n "$finalize_perm_rpath"; then # We should set the runpath_var. rpath= for dir in $finalize_perm_rpath; do func_append rpath "$dir:" done finalize_var="$runpath_var=\"$rpath\$$runpath_var\" " fi fi if test "$no_install" = yes; then # We don't need to create a wrapper script. link_command="$compile_var$compile_command$compile_rpath" # Replace the output file specification. link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output"'%g'` # Delete the old output file. $opt_dry_run || $RM $output # Link the executable and exit func_show_eval "$link_command" 'exit $?' if test -n "$postlink_cmds"; then func_to_tool_file "$output" postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` func_execute_cmds "$postlink_cmds" 'exit $?' fi exit $EXIT_SUCCESS fi if test "$hardcode_action" = relink; then # Fast installation is not supported link_command="$compile_var$compile_command$compile_rpath" relink_command="$finalize_var$finalize_command$finalize_rpath" func_warning "this platform does not like uninstalled shared libraries" func_warning "\`$output' will be relinked during installation" else if test "$fast_install" != no; then link_command="$finalize_var$compile_command$finalize_rpath" if test "$fast_install" = yes; then relink_command=`$ECHO "$compile_var$compile_command$compile_rpath" | $SED 's%@OUTPUT@%\$progdir/\$file%g'` else # fast_install is set to needless relink_command= fi else link_command="$compile_var$compile_command$compile_rpath" relink_command="$finalize_var$finalize_command$finalize_rpath" fi fi # Replace the output file specification. link_command=`$ECHO "$link_command" | $SED 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'` # Delete the old output files. $opt_dry_run || $RM $output $output_objdir/$outputname $output_objdir/lt-$outputname func_show_eval "$link_command" 'exit $?' if test -n "$postlink_cmds"; then func_to_tool_file "$output_objdir/$outputname" postlink_cmds=`func_echo_all "$postlink_cmds" | $SED -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g' -e 's%@TOOL_OUTPUT@%'"$func_to_tool_file_result"'%g'` func_execute_cmds "$postlink_cmds" 'exit $?' fi # Now create the wrapper script. func_verbose "creating $output" # Quote the relink command for shipping. if test -n "$relink_command"; then # Preserve any variables that may affect compiler behavior for var in $variables_saved_for_relink; do if eval test -z \"\${$var+set}\"; then relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" elif eval var_value=\$$var; test -z "$var_value"; then relink_command="$var=; export $var; $relink_command" else func_quote_for_eval "$var_value" relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command" fi done relink_command="(cd `pwd`; $relink_command)" relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"` fi # Only actually do things if not in dry run mode. $opt_dry_run || { # win32 will think the script is a binary if it has # a .exe suffix, so we strip it off here. case $output in *.exe) func_stripname '' '.exe' "$output" output=$func_stripname_result ;; esac # test for cygwin because mv fails w/o .exe extensions case $host in *cygwin*) exeext=.exe func_stripname '' '.exe' "$outputname" outputname=$func_stripname_result ;; *) exeext= ;; esac case $host in *cygwin* | *mingw* ) func_dirname_and_basename "$output" "" "." output_name=$func_basename_result output_path=$func_dirname_result cwrappersource="$output_path/$objdir/lt-$output_name.c" cwrapper="$output_path/$output_name.exe" $RM $cwrappersource $cwrapper trap "$RM $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15 func_emit_cwrapperexe_src > $cwrappersource # The wrapper executable is built using the $host compiler, # because it contains $host paths and files. If cross- # compiling, it, like the target executable, must be # executed on the $host or under an emulation environment. $opt_dry_run || { $LTCC $LTCFLAGS -o $cwrapper $cwrappersource $STRIP $cwrapper } # Now, create the wrapper script for func_source use: func_ltwrapper_scriptname $cwrapper $RM $func_ltwrapper_scriptname_result trap "$RM $func_ltwrapper_scriptname_result; exit $EXIT_FAILURE" 1 2 15 $opt_dry_run || { # note: this script will not be executed, so do not chmod. if test "x$build" = "x$host" ; then $cwrapper --lt-dump-script > $func_ltwrapper_scriptname_result else func_emit_wrapper no > $func_ltwrapper_scriptname_result fi } ;; * ) $RM $output trap "$RM $output; exit $EXIT_FAILURE" 1 2 15 func_emit_wrapper no > $output chmod +x $output ;; esac } exit $EXIT_SUCCESS ;; esac # See if we need to build an old-fashioned archive. for oldlib in $oldlibs; do if test "$build_libtool_libs" = convenience; then oldobjs="$libobjs_save $symfileobj" addlibs="$convenience" build_libtool_libs=no else if test "$build_libtool_libs" = module; then oldobjs="$libobjs_save" build_libtool_libs=no else oldobjs="$old_deplibs $non_pic_objects" if test "$preload" = yes && test -f "$symfileobj"; then func_append oldobjs " $symfileobj" fi fi addlibs="$old_convenience" fi if test -n "$addlibs"; then gentop="$output_objdir/${outputname}x" func_append generated " $gentop" func_extract_archives $gentop $addlibs func_append oldobjs " $func_extract_archives_result" fi # Do each command in the archive commands. if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then cmds=$old_archive_from_new_cmds else # Add any objects from preloaded convenience libraries if test -n "$dlprefiles"; then gentop="$output_objdir/${outputname}x" func_append generated " $gentop" func_extract_archives $gentop $dlprefiles func_append oldobjs " $func_extract_archives_result" fi # POSIX demands no paths to be encoded in archives. We have # to avoid creating archives with duplicate basenames if we # might have to extract them afterwards, e.g., when creating a # static archive out of a convenience library, or when linking # the entirety of a libtool archive into another (currently # not supported by libtool). if (for obj in $oldobjs do func_basename "$obj" $ECHO "$func_basename_result" done | sort | sort -uc >/dev/null 2>&1); then : else echo "copying selected object files to avoid basename conflicts..." gentop="$output_objdir/${outputname}x" func_append generated " $gentop" func_mkdir_p "$gentop" save_oldobjs=$oldobjs oldobjs= counter=1 for obj in $save_oldobjs do func_basename "$obj" objbase="$func_basename_result" case " $oldobjs " in " ") oldobjs=$obj ;; *[\ /]"$objbase "*) while :; do # Make sure we don't pick an alternate name that also # overlaps. newobj=lt$counter-$objbase func_arith $counter + 1 counter=$func_arith_result case " $oldobjs " in *[\ /]"$newobj "*) ;; *) if test ! -f "$gentop/$newobj"; then break; fi ;; esac done func_show_eval "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj" func_append oldobjs " $gentop/$newobj" ;; *) func_append oldobjs " $obj" ;; esac done fi func_to_tool_file "$oldlib" func_convert_file_msys_to_w32 tool_oldlib=$func_to_tool_file_result eval cmds=\"$old_archive_cmds\" func_len " $cmds" len=$func_len_result if test "$len" -lt "$max_cmd_len" || test "$max_cmd_len" -le -1; then cmds=$old_archive_cmds elif test -n "$archiver_list_spec"; then func_verbose "using command file archive linking..." for obj in $oldobjs do func_to_tool_file "$obj" $ECHO "$func_to_tool_file_result" done > $output_objdir/$libname.libcmd func_to_tool_file "$output_objdir/$libname.libcmd" oldobjs=" $archiver_list_spec$func_to_tool_file_result" cmds=$old_archive_cmds else # the command line is too long to link in one step, link in parts func_verbose "using piecewise archive linking..." save_RANLIB=$RANLIB RANLIB=: objlist= concat_cmds= save_oldobjs=$oldobjs oldobjs= # Is there a better way of finding the last object in the list? for obj in $save_oldobjs do last_oldobj=$obj done eval test_cmds=\"$old_archive_cmds\" func_len " $test_cmds" len0=$func_len_result len=$len0 for obj in $save_oldobjs do func_len " $obj" func_arith $len + $func_len_result len=$func_arith_result func_append objlist " $obj" if test "$len" -lt "$max_cmd_len"; then : else # the above command should be used before it gets too long oldobjs=$objlist if test "$obj" = "$last_oldobj" ; then RANLIB=$save_RANLIB fi test -z "$concat_cmds" || concat_cmds=$concat_cmds~ eval concat_cmds=\"\${concat_cmds}$old_archive_cmds\" objlist= len=$len0 fi done RANLIB=$save_RANLIB oldobjs=$objlist if test "X$oldobjs" = "X" ; then eval cmds=\"\$concat_cmds\" else eval cmds=\"\$concat_cmds~\$old_archive_cmds\" fi fi fi func_execute_cmds "$cmds" 'exit $?' done test -n "$generated" && \ func_show_eval "${RM}r$generated" # Now create the libtool archive. case $output in *.la) old_library= test "$build_old_libs" = yes && old_library="$libname.$libext" func_verbose "creating $output" # Preserve any variables that may affect compiler behavior for var in $variables_saved_for_relink; do if eval test -z \"\${$var+set}\"; then relink_command="{ test -z \"\${$var+set}\" || $lt_unset $var || { $var=; export $var; }; }; $relink_command" elif eval var_value=\$$var; test -z "$var_value"; then relink_command="$var=; export $var; $relink_command" else func_quote_for_eval "$var_value" relink_command="$var=$func_quote_for_eval_result; export $var; $relink_command" fi done # Quote the link command for shipping. relink_command="(cd `pwd`; $SHELL $progpath $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)" relink_command=`$ECHO "$relink_command" | $SED "$sed_quote_subst"` if test "$hardcode_automatic" = yes ; then relink_command= fi # Only create the output if not a dry run. $opt_dry_run || { for installed in no yes; do if test "$installed" = yes; then if test -z "$install_libdir"; then break fi output="$output_objdir/$outputname"i # Replace all uninstalled libtool libraries with the installed ones newdependency_libs= for deplib in $dependency_libs; do case $deplib in *.la) func_basename "$deplib" name="$func_basename_result" func_resolve_sysroot "$deplib" eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $func_resolve_sysroot_result` test -z "$libdir" && \ func_fatal_error "\`$deplib' is not a valid libtool archive" func_append newdependency_libs " ${lt_sysroot:+=}$libdir/$name" ;; -L*) func_stripname -L '' "$deplib" func_replace_sysroot "$func_stripname_result" func_append newdependency_libs " -L$func_replace_sysroot_result" ;; -R*) func_stripname -R '' "$deplib" func_replace_sysroot "$func_stripname_result" func_append newdependency_libs " -R$func_replace_sysroot_result" ;; *) func_append newdependency_libs " $deplib" ;; esac done dependency_libs="$newdependency_libs" newdlfiles= for lib in $dlfiles; do case $lib in *.la) func_basename "$lib" name="$func_basename_result" eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` test -z "$libdir" && \ func_fatal_error "\`$lib' is not a valid libtool archive" func_append newdlfiles " ${lt_sysroot:+=}$libdir/$name" ;; *) func_append newdlfiles " $lib" ;; esac done dlfiles="$newdlfiles" newdlprefiles= for lib in $dlprefiles; do case $lib in *.la) # Only pass preopened files to the pseudo-archive (for # eventual linking with the app. that links it) if we # didn't already link the preopened objects directly into # the library: func_basename "$lib" name="$func_basename_result" eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` test -z "$libdir" && \ func_fatal_error "\`$lib' is not a valid libtool archive" func_append newdlprefiles " ${lt_sysroot:+=}$libdir/$name" ;; esac done dlprefiles="$newdlprefiles" else newdlfiles= for lib in $dlfiles; do case $lib in [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;; *) abs=`pwd`"/$lib" ;; esac func_append newdlfiles " $abs" done dlfiles="$newdlfiles" newdlprefiles= for lib in $dlprefiles; do case $lib in [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;; *) abs=`pwd`"/$lib" ;; esac func_append newdlprefiles " $abs" done dlprefiles="$newdlprefiles" fi $RM $output # place dlname in correct position for cygwin # In fact, it would be nice if we could use this code for all target # systems that can't hard-code library paths into their executables # and that have no shared library path variable independent of PATH, # but it turns out we can't easily determine that from inspecting # libtool variables, so we have to hard-code the OSs to which it # applies here; at the moment, that means platforms that use the PE # object format with DLL files. See the long comment at the top of # tests/bindir.at for full details. tdlname=$dlname case $host,$output,$installed,$module,$dlname in *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll | *cegcc*,*lai,yes,no,*.dll) # If a -bindir argument was supplied, place the dll there. if test "x$bindir" != x ; then func_relative_path "$install_libdir" "$bindir" tdlname=$func_relative_path_result$dlname else # Otherwise fall back on heuristic. tdlname=../bin/$dlname fi ;; esac $ECHO > $output "\ # $outputname - a libtool library file # Generated by $PROGRAM (GNU $PACKAGE$TIMESTAMP) $VERSION # # Please DO NOT delete this file! # It is necessary for linking the library. # The name that we can dlopen(3). dlname='$tdlname' # Names of this library. library_names='$library_names' # The name of the static archive. old_library='$old_library' # Linker flags that can not go in dependency_libs. inherited_linker_flags='$new_inherited_linker_flags' # Libraries that this one depends upon. dependency_libs='$dependency_libs' # Names of additional weak libraries provided by this library weak_library_names='$weak_libs' # Version information for $libname. current=$current age=$age revision=$revision # Is this an already installed library? installed=$installed # Should we warn about portability when linking against -modules? shouldnotlink=$module # Files to dlopen/dlpreopen dlopen='$dlfiles' dlpreopen='$dlprefiles' # Directory that this library needs to be installed in: libdir='$install_libdir'" if test "$installed" = no && test "$need_relink" = yes; then $ECHO >> $output "\ relink_command=\"$relink_command\"" fi done } # Do a symbolic link so that the libtool archive can be found in # LD_LIBRARY_PATH before the program is installed. func_show_eval '( cd "$output_objdir" && $RM "$outputname" && $LN_S "../$outputname" "$outputname" )' 'exit $?' ;; esac exit $EXIT_SUCCESS } { test "$opt_mode" = link || test "$opt_mode" = relink; } && func_mode_link ${1+"$@"} # func_mode_uninstall arg... func_mode_uninstall () { $opt_debug RM="$nonopt" files= rmforce= exit_status=0 # This variable tells wrapper scripts just to set variables rather # than running their programs. libtool_install_magic="$magic" for arg do case $arg in -f) func_append RM " $arg"; rmforce=yes ;; -*) func_append RM " $arg" ;; *) func_append files " $arg" ;; esac done test -z "$RM" && \ func_fatal_help "you must specify an RM program" rmdirs= for file in $files; do func_dirname "$file" "" "." dir="$func_dirname_result" if test "X$dir" = X.; then odir="$objdir" else odir="$dir/$objdir" fi func_basename "$file" name="$func_basename_result" test "$opt_mode" = uninstall && odir="$dir" # Remember odir for removal later, being careful to avoid duplicates if test "$opt_mode" = clean; then case " $rmdirs " in *" $odir "*) ;; *) func_append rmdirs " $odir" ;; esac fi # Don't error if the file doesn't exist and rm -f was used. if { test -L "$file"; } >/dev/null 2>&1 || { test -h "$file"; } >/dev/null 2>&1 || test -f "$file"; then : elif test -d "$file"; then exit_status=1 continue elif test "$rmforce" = yes; then continue fi rmfiles="$file" case $name in *.la) # Possibly a libtool archive, so verify it. if func_lalib_p "$file"; then func_source $dir/$name # Delete the libtool libraries and symlinks. for n in $library_names; do func_append rmfiles " $odir/$n" done test -n "$old_library" && func_append rmfiles " $odir/$old_library" case "$opt_mode" in clean) case " $library_names " in *" $dlname "*) ;; *) test -n "$dlname" && func_append rmfiles " $odir/$dlname" ;; esac test -n "$libdir" && func_append rmfiles " $odir/$name $odir/${name}i" ;; uninstall) if test -n "$library_names"; then # Do each command in the postuninstall commands. func_execute_cmds "$postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1' fi if test -n "$old_library"; then # Do each command in the old_postuninstall commands. func_execute_cmds "$old_postuninstall_cmds" 'test "$rmforce" = yes || exit_status=1' fi # FIXME: should reinstall the best remaining shared library. ;; esac fi ;; *.lo) # Possibly a libtool object, so verify it. if func_lalib_p "$file"; then # Read the .lo file func_source $dir/$name # Add PIC object to the list of files to remove. if test -n "$pic_object" && test "$pic_object" != none; then func_append rmfiles " $dir/$pic_object" fi # Add non-PIC object to the list of files to remove. if test -n "$non_pic_object" && test "$non_pic_object" != none; then func_append rmfiles " $dir/$non_pic_object" fi fi ;; *) if test "$opt_mode" = clean ; then noexename=$name case $file in *.exe) func_stripname '' '.exe' "$file" file=$func_stripname_result func_stripname '' '.exe' "$name" noexename=$func_stripname_result # $file with .exe has already been added to rmfiles, # add $file without .exe func_append rmfiles " $file" ;; esac # Do a test to see if this is a libtool program. if func_ltwrapper_p "$file"; then if func_ltwrapper_executable_p "$file"; then func_ltwrapper_scriptname "$file" relink_command= func_source $func_ltwrapper_scriptname_result func_append rmfiles " $func_ltwrapper_scriptname_result" else relink_command= func_source $dir/$noexename fi # note $name still contains .exe if it was in $file originally # as does the version of $file that was added into $rmfiles func_append rmfiles " $odir/$name $odir/${name}S.${objext}" if test "$fast_install" = yes && test -n "$relink_command"; then func_append rmfiles " $odir/lt-$name" fi if test "X$noexename" != "X$name" ; then func_append rmfiles " $odir/lt-${noexename}.c" fi fi fi ;; esac func_show_eval "$RM $rmfiles" 'exit_status=1' done # Try to remove the ${objdir}s in the directories where we deleted files for dir in $rmdirs; do if test -d "$dir"; then func_show_eval "rmdir $dir >/dev/null 2>&1" fi done exit $exit_status } { test "$opt_mode" = uninstall || test "$opt_mode" = clean; } && func_mode_uninstall ${1+"$@"} test -z "$opt_mode" && { help="$generic_help" func_fatal_help "you must specify a MODE" } test -z "$exec_cmd" && \ func_fatal_help "invalid operation mode \`$opt_mode'" if test -n "$exec_cmd"; then eval exec "$exec_cmd" exit $EXIT_FAILURE fi exit $exit_status # The TAGs below are defined such that we never get into a situation # in which we disable both kinds of libraries. Given conflicting # choices, we go for a static library, that is the most portable, # since we can't tell whether shared libraries were disabled because # the user asked for that or because the platform doesn't support # them. This is particularly important on AIX, because we don't # support having both static and shared libraries enabled at the same # time on that platform, so we default to a shared-only configuration. # If a disable-shared tag is given, we'll fallback to a static-only # configuration. But we'll never go from static-only to shared-only. # ### BEGIN LIBTOOL TAG CONFIG: disable-shared build_libtool_libs=no build_old_libs=yes # ### END LIBTOOL TAG CONFIG: disable-shared # ### BEGIN LIBTOOL TAG CONFIG: disable-static build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac` # ### END LIBTOOL TAG CONFIG: disable-static # Local Variables: # mode:shell-script # sh-indentation:2 # End: # vi:sw=2 ecm-6.4.4/ecm-params.h.sparc640000644023561000001540000000162612106741273012702 00000000000000/* those parameters were obtained on gcc54.fsffrance.org with ecm-6.4.1-rc3 gmp-5.0.2, and gcc 4.3.2 -O2 -pedantic -m64 -mptr64 -mcpu=ultrasparc (sparc64-unknown-linux-gnu) */ /* 0:mulredc 1:mul+redc_1 2:mul+redc_2 3:mul+redc_n */ #define TUNE_MULREDC_TABLE {0,1,1,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2} /* 0:mulredc 1:sqr+redc_1 2:sqr+redc_2 3:sqr+redc_n */ #define TUNE_SQRREDC_TABLE {0,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2} #define MPZMOD_THRESHOLD 104 #define REDC_THRESHOLD 341 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 11, 13, 13, 13, 15, 14, 15, 16, 17, 16, 17, 16, 19, 19, 19, 20, 21, 22} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 17 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 262144 #define PREREVERTDIVISION_NTT_THRESHOLD 262144 #define POLYINVERT_NTT_THRESHOLD 262144 #define POLYEVALT_NTT_THRESHOLD 262144 #define MPZSPV_NORMALISE_STRIDE 64 ecm-6.4.4/ecm-ecm.h0000644023561000001540000002035012106741273010675 00000000000000/* ecm-ecm.h - private header file for GMP-ECM. Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Paul Zimmermann, Alexander Kruppa and Cyril Bouvier. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _ECM_ECM_H #define _ECM_ECM_H 1 #include "config.h" #include #define ASSERT_ALWAYS(expr) assert (expr) #ifdef WANT_ASSERT #define ASSERT(expr) assert (expr) #else #define ASSERT(expr) do {} while (0) #endif #include "ecm.h" /* Structure for candidate usage. This is much more powerful than using a simple mpz_t to hold the candidate. This structure also houses the expression (in raw form), and will modify the expression as factors are found (if in looping modes). Also, since we are warehousing all of the data associated with the candidate, we also store whether the candidate is PRP here (so testing will cease), along with the length of the candidate. As each factor is found, the candidate will also have the factor removed from it */ typedef struct { #if defined (CANDI_DEBUG) unsigned long magic; /* used for debugging purposes while writing this code */ #endif char *cpExpr; /* if non-NULL, then this is a "simpler" expression than the decimal output of n */ mpz_t n; /* the cofactor candidate currently being used to find factors from */ unsigned ndigits; /* the number of digits (decimal) in n */ unsigned nexprlen; /* strlen of expression, 0 if there is NO expression */ int isPrp; /* usually 0, but turns 1 if factor found, and the cofactor is PRP, OR if the original candidate was PRP and the user asked to prp check */ } mpcandi_t; typedef struct { int Valid; /* Is ONLY set to 1 if there is a proper -go switch. Otherwise is 0 and if 0, then PM1, PP1 and ECM all ignore it */ char *cpOrigExpr; /* if non-NULL, then this is a "simpler" expression than the decimal output of n */ mpcandi_t Candi; /* The value after expression checked */ int containsN; /* 0 for simple number or expression. 1 if the expression "contains" N as that expression will have to be built for each candidate */ } mpgocandi_t; /* auxi.c */ unsigned int nb_digits (const mpz_t); int probab_prime_p (mpz_t, int); int read_number (mpcandi_t*, FILE*, int); /* Various logging levels */ /* OUTPUT_ALWAYS means print always, regardless of verbose value */ #define OUTPUT_ALWAYS 0 /* OUTPUT_NORMAL means print during normal program execution */ #define OUTPUT_NORMAL 1 /* OUTPUT_VERBOSE means print if the user requested more verbosity */ #define OUTPUT_VERBOSE 2 /* OUTPUT_RESVERBOSE is for printing residues (after stage 1 etc) */ #define OUTPUT_RESVERBOSE 3 /* OUTPUT_DEVVERBOSE is for printing internal parameters (for developers) */ #define OUTPUT_DEVVERBOSE 4 /* OUTPUT_TRACE is for printing trace data, produces lots of output */ #define OUTPUT_TRACE 5 /* OUTPUT_ERROR is for printing error messages */ #define OUTPUT_ERROR -1 #define MAX_NUMBER_PRINT_LEN 1000 #define NTT_SIZE_THRESHOLD 30 /* auxlib.c */ int test_verbose (int); void set_verbose (int); int inc_verbose (); /* Return codes */ /* Bit coded values: 1: error (for example out of memory) 2: proper factor found, 4: factor is prime, 8: cofactor is prime or 1 */ #define ECM_EXIT_ERROR 1 #define ECM_COMP_FAC_COMP_COFAC 2 #define ECM_PRIME_FAC_COMP_COFAC (2+4) #define ECM_INPUT_NUMBER_FOUND 8 #define ECM_COMP_FAC_PRIME_COFAC (2+8) #define ECM_PRIME_FAC_PRIME_COFAC (2+4+8) /* getprime.c */ double getprime (); void getprime_clear (); void getprime_seek (double); #define WANT_FREE_PRIME_TABLE(p) (p < 0.0) #define FREE_PRIME_TABLE -1.0 /* b1_ainc.c */ double calc_B1_AutoIncrement(double cur_B1, double incB1val, int calcInc); /* memory.c */ #ifdef MEMORY_DEBUG void __gmp_default_free (void *, size_t); void *__gmp_default_allocate (size_t); void *__gmp_default_reallocate (void *, size_t, size_t); void tests_memory_start (void); void tests_memory_end (void); void tests_memory_reset (void); void tests_free (void *, size_t); void tests_memory_status (void); void tests_memory_set_location (char *, unsigned int); #endif /* trial.c */ int trial_factor (mpcandi_t *n, double maxfact, int deep); /* resume.c */ int read_resumefile_line (int *, mpz_t, mpcandi_t *, mpz_t, mpz_t, mpz_t, double *, char *, char *, char *, char *, FILE *); int write_resumefile_line (char *, int, double, mpz_t, mpz_t, mpz_t, mpcandi_t *, mpz_t, const char *); /* main.c */ int kbnc_z (double *k, unsigned long *b, unsigned long *n, signed long *c, mpz_t z); int kbnc_str (double *k, unsigned long *b, unsigned long *n, signed long *c, char *z, mpz_t num); /* batch.c */ void compute_s (mpz_t, unsigned long); int write_s_in_file (char *, mpz_t); void read_s_from_file (mpz_t, char *); /* eval.c */ int eval (mpcandi_t *n, FILE *fd, int bPrp); int eval_str (mpcandi_t *n, char *cp, int primetest, char **EndChar); /* EndChar can be NULL */ void init_expr (void); void free_expr (void); /* candi.c */ void mpcandi_t_init (mpcandi_t *n); /* damn, a C++ class sure would have been nice :( */ void mpcandi_t_free (mpcandi_t *n); int mpcandi_t_copy (mpcandi_t *to, mpcandi_t *from); int mpcandi_t_add_candidate (mpcandi_t *n, mpz_t c, const char *cpExpr, int bPrp); int mpcandi_t_addfoundfactor (mpcandi_t *n, mpz_t f, int displaywarning); int mpcandi_t_addfoundfactor_d (mpcandi_t *n, double f); /* candi.c Group Order candidate functions. */ void mpgocandi_t_init(mpgocandi_t *go); void mpgocandi_t_free(mpgocandi_t *go); int mpgocandi_fixup_with_N(mpgocandi_t *go, mpcandi_t *n); /* random.c */ unsigned long get_random_ul (void); /* random2.c */ void pp1_random_seed (mpz_t, mpz_t, gmp_randstate_t); void pm1_random_seed (mpz_t, mpz_t, gmp_randstate_t); /* default number of probable prime tests */ #define PROBAB_PRIME_TESTS 1 /* maximal stage 1 bound = 2^53 - 1, the next prime being 2^53 + 5 */ #define MAX_B1 9007199254740991.0 /* The checksum for savefile is the product of all mandatory fields, modulo the greatest prime below 2^32 */ #define CHKSUMMOD 4294967291U #ifdef MEMORY_DEBUG #define FREE(ptr,size) tests_free(ptr,size) #define MEMORY_TAG tests_memory_set_location(__FILE__,__LINE__) #define MEMORY_UNTAG tests_memory_set_location("",0) #define MPZ_INIT(x) {MEMORY_TAG;mpz_init(x);MEMORY_UNTAG;} #define MPZ_INIT2(x,n) {MEMORY_TAG;mpz_init2(x,n);MEMORY_UNTAG;} #else #define FREE(ptr,size) free(ptr) #define MEMORY_TAG do{}while(0) #define MEMORY_UNTAG do{}while(0) #define MPZ_INIT(x) mpz_init(x) #define MPZ_INIT2(x,n) mpz_init2(x,n) #endif #define ABS(x) ((x) >= 0 ? (x) : -(x)) /* could go in auxi.c as a function */ #ifdef HAVE_SETPRIORITY # include # ifdef HAVE_SYS_RESOURCE_H # include # endif # define NICE10 setpriority (PRIO_PROCESS, 0, 10) # define NICE20 setpriority (PRIO_PROCESS, 0, 20) #elif defined(HAVE_NICE) # ifdef HAVE_UNISTD_H # include # endif # define NICE10 nice (10) # define NICE20 nice (20) #elif defined(HAVE_WINDOWS_H) # include # define NICE10 do { \ SetPriorityClass (GetCurrentProcess (), BELOW_NORMAL_PRIORITY_CLASS); \ SetThreadPriority (GetCurrentThread (), THREAD_PRIORITY_BELOW_NORMAL); \ } while (0) # define NICE20 do { \ SetPriorityClass (GetCurrentProcess (), IDLE_PRIORITY_CLASS); \ SetThreadPriority (GetCurrentThread (), THREAD_PRIORITY_IDLE); \ } while (0) #else # warning "Can't find a way to change priority" # define NICE10 do {} while (0) # define NICE20 do {} while (0) #endif #endif /* _ECM_ECM_H */ ecm-6.4.4/ecm-params.h.alpha-ev560000644023561000001540000000107412106741273013265 00000000000000/* parameters kindly provided by Torbjorn Granlund, and produced with ecm-6.3-rc3 on alphaev56-unknown-freebsd6.4 */ #define MPZMOD_THRESHOLD 58 #define REDC_THRESHOLD 493 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 17 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 4096 #define PREREVERTDIVISION_NTT_THRESHOLD 64 #define POLYINVERT_NTT_THRESHOLD 512 #define POLYEVALT_NTT_THRESHOLD 512 #define MPZSPV_NORMALISE_STRIDE 128 ecm-6.4.4/ecm.10000644023561000001540000003670712113421230010043 00000000000000'\" t .\" Title: ECM .\" Author: [see the "AUTHORS" section] .\" Generator: DocBook XSL Stylesheets v1.76.1 .\" Date: 02/27/2013 .\" Manual: April 22, 2003 .\" Source: April 22, 2003 .\" Language: English .\" .TH "ECM" "1" "02/27/2013" "April 22, 2003" "April 22, 2003" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" http://bugs.debian.org/507673 .\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .\" ----------------------------------------------------------------- .\" * MAIN CONTENT STARTS HERE * .\" ----------------------------------------------------------------- .SH "NAME" ecm \- integer factorization using ECM, P\-1 or P+1 .SH "SYNOPSIS" .HP \w'\fBecm\fR\ 'u \fBecm\fR [\fBoptions\fR] \fIB1\fR [\fIB2min\fR\-\fIB2max\fR | \fIB2\fR] .br .SH "DESCRIPTION" .PP ecm is an integer factoring program using the Elliptic Curve Method (ECM), the P\-1 method, or the P+1 method\&. The following sections describe parameters relevant to these algorithms\&. .SH "STEP 1 AND STEP 2 BOUND PARAMETERS" .PP \fB\fIB1\fR\fR .RS 4 \fIB1\fR is the step 1 bound\&. It is a mandatory parameter\&. It can be given either in integer format (for example 3000000) or in floating\-point format (3000000\&.0 or 3e6)\&. The largest possible \fIB1\fR value is 9007199254740996 for P\-1, and ULONG_MAX or 9007199254740996 (whichever is smaller) for ECM and P+1\&. All primes 2 <= p <= \fIB1\fR are processed in step 1\&. .RE .PP \fB\fIB2\fR\fR .RS 4 \fIB2\fR is the step 2 bound\&. It is optional: if omitted, a default value is computed from \fIB1\fR, which should be close to optimal\&. Like \fIB1\fR, it can be given either in integer or in floating\-point format\&. The largest possible value of \fIB2\fR is approximately 9e23, but depends on the number of blocks \fIk\fR if you specify the \fB\-k\fR option\&. All primes \fIB1\fR <= p <= \fIB2\fR are processed in step 2\&. If \fIB2\fR < \fIB1\fR, no step 2 is performed\&. .RE .PP \fB\fIB2min\fR\fR\fB\-\fR\fB\fIB2max\fR\fR .RS 4 alternatively one may use the \fIB2min\fR\-\fIB2max\fR form, which means that all primes \fIB2min\fR <= p <= \fIB2max\fR should be processed\&. Thus specifying \fIB2\fR only corresponds to \fIB1\fR\-\fIB2\fR\&. The values of \fIB2min\fR and \fIB2max\fR may be arbitrarily large, but their difference must not exceed approximately 9e23, subject to the number of blocks \fIk\fR\&. .RE .SH "FACTORING METHOD" .PP \fB\-pm1\fR .RS 4 Perform P\-1 instead of the default method (ECM)\&. .RE .PP \fB\-pp1\fR .RS 4 Perform P+1 instead of the default method (ECM)\&. .RE .SH "GROUP AND INITIAL POINT PARAMETERS" .PP \fB\-x0 \fR\fB\fIx\fR\fR .RS 4 [ECM, P\-1, P+1] Use \fIx\fR (arbitrary\-precision integer or rational) as initial point\&. For example, \fB\-x0 1/3\fR is valid\&. If not given, \fIx\fR is generated from the sigma value for ECM, or at random for P\-1 and P+1\&. .RE .PP \fB\-sigma \fR\fB\fIs\fR\fR .RS 4 [ECM] Use \fIs\fR (arbitrary\-precision integer) as curve generator\&. If omitted, \fIs\fR is generated at random\&. .RE .PP \fB\-A \fR\fB\fIa\fR\fR .RS 4 [ECM] Use \fIa\fR (arbitrary\-precision integer) as curve parameter\&. If omitted, is it generated from the sigma value\&. .RE .PP \fB\-go \fR\fB\fIval\fR\fR .RS 4 [ECM, P\-1, P+1] Multiply the initial point by \fIval\fR, which can any valid expression, possibly containing the special character N as place holder for the current input number\&. Example: .sp .if n \{\ .RS 4 .\} .nf ecm \-pp1 \-go "N^2\-1" 1e6 < composite2000 .fi .if n \{\ .RE .\} .sp .RE .SH "STEP 2 PARAMETERS" .PP \fB\-k \fR\fB\fIk\fR\fR .RS 4 [ECM, P\-1, P+1] Perform \fIk\fR blocks in step 2\&. For a given \fIB2\fR value, increasing \fIk\fR decreases the memory usage of step 2, at the expense of more cpu time\&. .RE .PP \fB\-treefile \fR\fB\fIfile\fR\fR .RS 4 Stores some tables of data in disk files to reduce the amount of memory occupied in step 2, at the expense of disk I/O\&. Data will be written to files \fIfile\fR\&.1, \fIfile\fR\&.2 etc\&. Does not work with fast stage 2 for P+1 and P\-1\&. .RE .PP \fB\-power \fR\fB\fIn\fR\fR .RS 4 [ECM, P\-1] Use x^\fIn\fR for Brent\-Suyama\*(Aqs extension (\fB\-power 1\fR disables Brent\-Suyama\*(Aqs extension)\&. The default polynomial is chosen depending on the method and B2\&. For P\-1 and P+1, disables the fast stage 2\&. For P\-1, \fIn\fR must be even\&. .RE .PP \fB\-dickson \fR\fB\fIn\fR\fR .RS 4 [ECM, P\-1] Use degree\-\fIn\fR Dickson\*(Aqs polynomial for Brent\-Suyama\*(Aqs extension\&. For P\-1 and P+1, disables the fast stage 2\&. Like for \fB\-power\fR, \fIn\fR must be even for P\-1\&. .RE .PP \fB\-maxmem \fR\fB\fIn\fR\fR .RS 4 Use at most \fIn\fR megabytes of memory in stage 2\&. .RE .PP \fB\-ntt\fR, \fB\-no\-ntt\fR .RS 4 Enable or disable the Number\-Theoretic Transform code for polynomial arithmetic in stage 2\&. With NTT, dF is chosen to be a power of 2, and is limited by the number suitable primes that fit in a machine word (which is a limitation only on 32 bit systems)\&. The \-no\-ntt variant uses more memory, but is faster than NTT with large input numbers\&. By default, NTT is used for P\-1, P+1 and for ECM on numbers of size at most 30 machine words\&. .RE .SH "OUTPUT" .PP \fB\-q\fR .RS 4 Quiet mode\&. Found factorizations are printed on standard output, with factors separated by white spaces, one line per input number (if no factor was found, the input number is simply copied)\&. .RE .PP \fB\-v\fR .RS 4 Verbose mode\&. More information is printed, more \fB\-v\fR options increase verbosity\&. With one \fB\-v\fR, the kind of modular multiplication used, initial x0 value, step 2 parameters and progress, and expected curves and time to find factors of different sizes for ECM are printed\&. With \fB\-v \-v\fR, the A value for ECM and residues at the end of step 1 and step 2 are printed\&. More \fB\-v\fR print internal data for debugging\&. .RE .PP \fB\-timestamp\fR .RS 4 Print a time stamp whenever a new ECM curve or P+1 or P\-1 run is processed\&. .RE .SH "MODULAR ARITHMETIC OPTIONS" .PP Several algorithms are available for modular multiplication\&. The program tries to find the best one for each input; one can force a given method with the following options\&. .PP \fB\-mpzmod\fR .RS 4 Use GMP\*(Aqs mpz_mod function (sub\-quadratic for large inputs, but induces some overhead for small ones)\&. .RE .PP \fB\-modmuln\fR .RS 4 Use Montgomery\*(Aqs multiplication (quadratic version)\&. Usually best method for small input\&. .RE .PP \fB\-redc\fR .RS 4 Use Montgomery\*(Aqs multiplication (sub\-quadratic version)\&. Theoretically optimal for large input\&. .RE .PP \fB\-nobase2\fR .RS 4 Disable special base\-2 code (which is used when the input number is a large factor of 2^n+1 or 2^n\-1, see \fB\-v\fR)\&. .RE .PP \fB\-base2\fR \fIn\fR .RS 4 Force use of special base\-2 code, input number must divide 2^\fIn\fR+1 if \fIn\fR > 0, or 2^|\fIn\fR|\-1 if \fIn\fR < 0\&. .RE .SH "FILE I/O" .PP The following options enable one to perform step 1 and step 2 separately, either on different machines, at different times, or using different software (in particular, George Woltman\*(Aqs Prime95/mprime program can produce step 1 output suitable for resuming with GMP\-ECM)\&. It can also be useful to split step 2 into several runs, using the \fIB2min\-B2max\fR option\&. .PP \fB\-inp \fR\fB\fIfile\fR\fR .RS 4 Take input from file \fIfile\fR instead of from standard input\&. .RE .PP \fB\-save \fR\fB\fIfile\fR\fR .RS 4 Save result of step 1 in \fIfile\fR\&. If \fIfile\fR exists, an error is raised\&. Example: to perform only step 1 with \fIB1\fR=1000000 on the composite number in the file "c155" and save its result in file "foo", use .sp .if n \{\ .RS 4 .\} .nf ecm \-save foo 1e6 1 < c155 .fi .if n \{\ .RE .\} .sp .RE .PP \fB\-savea \fR\fB\fIfile\fR\fR .RS 4 Like \fB\-save\fR, but appends to existing files\&. .RE .PP \fB\-resume \fR\fB\fIfile\fR\fR .RS 4 Resume residues from \fIfile\fR, reads from standard input if \fIfile\fR is "\-"\&. Example: to perform step 2 following the above step 1 computation, use .sp .if n \{\ .RS 4 .\} .nf ecm \-resume foo 1e6 .fi .if n \{\ .RE .\} .sp .RE .PP \fB\-chkpoint \fR\fB\fIfile\fR\fR .RS 4 Periodically write the current residue in stage 1 to \fIfile\fR\&. In case of a power failure, etc\&., the computation can be continued with the \fB\-resume\fR option\&. .sp .if n \{\ .RS 4 .\} .nf ecm \-chkpnt foo \-pm1 1e10 < largenumber\&.txt .fi .if n \{\ .RE .\} .sp .RE .SH "LOOP MODE" .PP The \(lqloop mode\(rq (option \fB\-c \fR\fB\fIn\fR\fR) enables one to run several curves on each input number\&. The following options control its behavior\&. .PP \fB\-c \fR\fB\fIn\fR\fR .RS 4 Perform \fIn\fR runs on each input number (default is one)\&. This option is mainly useful for P+1 (for example with \fIn\fR=3) or for ECM, where \fIn\fR could be set to the expected number of curves to find a d\-digit factor with a given step 1 bound\&. This option is incompatible with \fB\-resume, \-sigma, \-x0\fR\&. Giving \fB\-c 0\fR produces an infinite loop until a factor is found\&. .RE .PP \fB\-one\fR .RS 4 In loop mode, stop when a factor is found; the default is to continue until the cofactor is prime or the specified number of runs are done\&. .RE .PP \fB\-b\fR .RS 4 Breadth\-first processing: in loop mode, run one curve for each input number, then a second curve for each one, and so on\&. This is the default mode with \fB\-inp\fR\&. .RE .PP \fB\-d\fR .RS 4 Depth\-first processing: in loop mode, run \fIn\fR curves for the first number, then \fIn\fR curves for the second one and so on\&. This is the default mode with standard input\&. .RE .PP \fB\-ve \fR\fB\fIn\fR\fR .RS 4 In loop mode, in the second and following runs, output only expressions that have at most \fIn\fR characters\&. Default is \fB\-ve 0\fR\&. .RE .PP \fB\-i \fR\fB\fIn\fR\fR .RS 4 In loop mode, increment \fIB1\fR by \fIn\fR after each curve\&. .RE .PP \fB\-I \fR\fB\fIn\fR\fR .RS 4 In loop mode, multiply \fIB1\fR by a factor depending on \fIn\fR after each curve\&. Default is one which should be optimal on one machine, while \fB\-I 10\fR could be used when trying to factor the same number simultaneously on 10 identical machines\&. .RE .SH "SHELL COMMAND EXECUTION" .PP These optins allow for executing shell commands to supplement functionality to GMP\-ECM\&. .PP \fB\-prpcmd \fR\fB\fIcmd\fR\fR .RS 4 Execute command \fIcmd\fR to test primality if factors and cofactors instead of GMP\-ECM\*(Aqs own functions\&. The number to test is passed via stdin\&. An exit code of 0 is interpreted as \(lqprobably prime\(rq, a non\-zero exit code as \(lqcomposite\(rq\&. .RE .PP \fB\-faccmd \fR\fB\fIcmd\fR\fR .RS 4 Executes command \fIcmd\fR whenever a factor is found by P\-1, P+1 or ECM\&. The input number, factor and cofactor are passed via stdin, each on a line\&. This could be used i\&.e\&. to mail new factors automatically: .sp .if n \{\ .RS 4 .\} .nf ecm \-faccmd \*(Aqmail \-s \(lq$HOSTNAME found a factor\(rq me@myaddress\&.com\*(Aq 11e6 < cunningham\&.in .fi .if n \{\ .RE .\} .sp .RE .PP \fB\-idlecmd \fR\fB\fIcmd\fR\fR .RS 4 Executes command \fIcmd\fR before each ECM curve, P\-1 or P+1 attempt on a number is started\&. If the exit status of \fIcmd\fR is non\-zero, GMP\-ECM terminates immediately, otherwise it continues normally\&. GMP\-ECM is stopped while \fIcmd\fR runs, offering a way for letting GMP\-ECM sleep for example while the system is otherwise busy\&. .RE .SH "MISCELLANEOUS" .PP \fB\-n\fR .RS 4 Run the program in \(lqnice\(rq mode (below normal priority)\&. .RE .PP \fB\-nn\fR .RS 4 Run the program in \(lqvery nice\(rq mode (idle priority)\&. .RE .PP \fB\-B2scale \fR\fB\fIf\fR\fR .RS 4 Multiply the default step 2 bound \fIB2\fR by the floating\-point value \fIf\fR\&. Example: \fB\-B2scale 0\&.5\fR divides the default \fIB2\fR by 2\&. .RE .PP \fB\-stage1time \fR\fB\fIn\fR\fR .RS 4 Add \fIn\fR seconds to stage 1 time\&. This is useful to get correct expected time with \fI\-v\fR if part of stage 1 was done in another run\&. .RE .PP \fB\-cofdec\fR .RS 4 Force cofactor output in decimal (even if expressions are used)\&. .RE .PP \fB\-h\fR, \fB\-\-help\fR .RS 4 Display a short description of ecm usage, parameters and command line options\&. .RE .PP \fB\-printconfig\fR .RS 4 Prints configuration parameters used for the compilation and exits\&. .RE .SH "INPUT SYNTAX" .PP The input numbers can have several forms: .PP Raw decimal numbers like 123456789\&. .PP Comments can be placed in the file: everything after \(lq//\(rq is ignored, up to the end of line\&. .PP Line continuation\&. If a line ends with a backslash character \(lq\e\(rq, it is considered to continue on the next line\&. .PP Common arithmetic expressions can be used\&. Example: \fI3*5+2^10\fR\&. .PP Factorial: example \fI53!\fR\&. .PP Multi\-factorial: example \fI15!3\fR means 15*12*9*6*3\&. .PP Primorial: example \fI11#\fR means 2*3*5*7*11\&. .PP Reduced primorial: example \fI17#5\fR means 5*7*11*13*17\&. .PP Functions: currently, the only available function is \fIPhi(x,n)\fR\&. .SH "EXIT STATUS" .PP The exit status reflects the result of the last ECM curve or P\-1/P+1 attempt the program performed\&. Individual bits signify particular events, specifically: .PP Bit 0 .RS 4 0 if normal program termination, 1 if error occured .RE .PP Bit 1 .RS 4 0 if no proper factor was found, 1 otherwise .RE .PP Bit 2 .RS 4 0 if factor is composite, 1 if factor is a probable prime .RE .PP Bit 3 .RS 4 0 if cofactor is composite, 1 if cofactor is a probable prime .RE .PP Thus, the following exit status values may occur: .PP 0 .RS 4 Normal program termination, no factor found .RE .PP 1 .RS 4 Error .RE .PP 2 .RS 4 Composite factor found, cofactor is composite .RE .PP 6 .RS 4 Probable prime factor found, cofactor is composite .RE .PP 8 .RS 4 Input number found .RE .PP 10 .RS 4 Composite factor found, cofactor is a probable prime .RE .PP 14 .RS 4 Probable prime factor found, cofactor is a probable prime .RE .SH "BUGS" .PP Report bugs to , after checking for bug fixes or new versions\&. .SH "AUTHORS" .PP Pierrick Gaudry contributed efficient assembly code for combined mul/redc; .PP Jim Fougeron contributed the expression parser and several command\-line options; .PP Laurent Fousse contributed the middle product code, the autoconf/automake tools, and is the maintainer of the Debian package; .PP Alexander Kruppa <(lastname)al@loria\&.fr> contributed estimates for probability of success for ECM, the new P+1 and P\-1 stage 2 (with P\&. L\&. Montgomery), new AMD64 asm mulredc code, and some other things; .PP Dave Newman contributed the Kronecker\-Schoenhage and NTT multiplication code; .PP Jason S\&. Papadopoulos contributed a speedup of the NTT code .PP Paul Zimmermann is the author of the first version of the program and chief maintainer of GMP\-ECM\&. .PP Note: email addresses have been obscured, the required substitutions should be obvious\&. ecm-6.4.4/AUTHORS0000644023561000001540000000304212106741273010265 00000000000000In the following email addresses, please replace "at" by "@" and "dot" by ".". For example, should read . Cyril Bouvier contributed the batch mode and GPU for stage 1 Pierrick Gaudry contributed efficient assembly code for combined mul/redc. Brian Gladman contributed the Visual C build files Jim Fougeron contributed the expression parser, the primality testing tools, and several command-line options. Laurent Fousse contributed the middle product code, the autoconf/automake tools, and author of the Debian package. Alexander Kruppa (substitute appropriately) joined Paul Zimmermann at release 5, contributed the Toom-Cook multiplication code, the special code for Fermat numbers, and many other nice things. Dave Newman contributed the Kronecker-Schönhage multiplication code, and the NTT code. Jason S. Papadopoulos contributed optimizations to the NTT code. Paul Zimmermann author of the first version of the program. Several people also helped by suggesting improvements, or testing beta-versions: Allan Steel, Karim Belabas, Torbjörn Granlund, Japke Rosink, Bruce Dodson. If you want to contribute to GMP-ECM, you are welcome; the development version is available on . ecm-6.4.4/NEWS0000644023561000001540000002605312113416752007722 00000000000000Changes between GMP-ECM 6.4.3 and GMP-ECM 6.4.4: * Fixed PowerPC64 assembly code with --enable-shared (thanks Leif Leonhardy) * Fix to deal with change of semantics of internal GMP functions in GMP 5.1 * Fixed small memory leak in non-NTT P-1 stage 2 * Fixed segfaults with large non-NTT P+-1 stage 2 * Removed defunct -t command line option Changes between GMP-ECM 6.4.2 and GMP-ECM 6.4.3: * Fixed bug reported by user "lorgix" on mersenneforum (http://www.mersenneforum.org/showpost.php?p=286385&postcount=280) * Use 64-bit value for random seed under Windows Changes between GMP-ECM 6.4.1 and GMP-ECM 6.4.2: * Corrected the copyright headers * Reduced memory usage in stage 1 with -batch={1,2} mode. * Fixed bug in modular reduction (could occur only for numbers larger than 386 digits on 64-bit computers and 193 digits on 32-bit computers). * Speedup in stage 2 with the NTT default mode Changes between ecm-6.4 and ecm-6.4.1: * GMP-ECM is now distributed under the GPL version 3 or later for the binary, and under the LGPL version 3 or later for the library * Fixed a speed regression with respect to ecm-6.3 http://lists.gforge.inria.fr/pipermail/ecm-discuss/2012-February/004103.html * Fixed a bug with the -treefile option which had been present for a long time * Several fixes for the Visual Studio 2010 build * New experimental option -batch=2, and speedup for -batch (i.e., -batch=1) * New tuning mechanism, now --enable-asm-redc is always recommended * New configure option --enable-mulredc-svoboda, for input numbers whose low limbs is congruent to -1 * New tuning parameters for Intel Core i5 * New ecmbench utility Changes between ecm-6.3 and ecm-6.4: * Fixed configure problem with SSE2 (http://trac.sagemath.org/sage_trac/ticket/10252) * Fixed configure bug on 32-bit PowerPC (tried to use 64-bit assembly) https://gforge.inria.fr/tracker/index.php?func=detail&aid=10646 * Fixed dependencies from build directory https://gforge.inria.fr/tracker/index.php?func=detail&aid=10648 * Patch from David Cleaver to allow B1 >= 2^32 on machines where "unsigned long" has 32 bits only * Patch from David Cleaver to use GWNUM 26.6 on Windows x64 with MingW64/Msys * Improved conversion from mpz_t to residue number system in NTT code * Better asm code for AMD cpus * Use of GMP's mpn_mullo_n and mpn_redc_2 when available * New option -batch with faster Stage 1 (but smaller success probability) * Added Visual Studio 2010 build Changes between ecm-6.2.3 and ecm-6.3: * New assembly code for 64-bit PowerPC (thanks to Philip McLaughlin) * Allow several processes to write to the same -save file * More routines in new P+-1 stage 2 use multi-threading in OpenMP build * Fixed incompatibility with GMP 5.0.0 * Fixed several bugs, and now check return value from malloc() calls * Fixed linking of GMP which prevented successful builds under Darwin (and presumably other systems) * Allow use of x86_64 asm code under MinGW Changes between ecm-6.2.2 and ecm-6.2.3: * Fixed incompatibility with GMP 4.3.0 when testing version in configure * SSE2 asm code for Visual C added in stage 2 NTT code * Small improvement to x86_64 mulredc asm code, slight speedup on Core 2 * Fixed incorrect carry propagation in subquadratic REDC code which could lead to incorrect arithmetic in rare cases * Fixed memory leak with -v parameter when factor was found in ECM stage 1 * Fixed bug which caused only one ECM curve to be run in spite of -c parameter if input line did not end in newline * Assembler mulredc code enabled by default on x86_64 Changes between ecm-6.2.1 and ecm-6.2.2: * Updated build project files for Visual C by Brian Gladman, also adds missing NTT_GFP_TWIDDLE_DI[FT]_BREAKOVER defines in VC parameter file * Fixed uninitialised parameter to P-1 probability computation * In tune.c : fixed generation of NTT_GFP_TWIDDLE_DI[FT]_BREAKOVER values, avoid calling cputime() excessively often when timing short functions, fixed access to uninitialised memory * Fixed serious split infinitive in configure script (thanks Paul Leyland) * Removed unnecessary carry propagation in x86_64 mulredc code, slight speedup (thanks Philip McLaughlin) * Fixed non-portable PIC code in x86_64/redc.asm * Fixed problem with pattern matching host type names in configure.in * Converted binary constants in spv.c and ntt_gfp.c to hexadecimal, some assembler do not support binary constants Changes between ecm-6.2 and ecm-6.2.1: * Default B2 for new P-1 and P+1 stage 2 increased * Probabilities for finding factors with P-1 are now printed with -v * Fixed compilation problem on IA64, EV56, and ARM * Made threshold between recursive and iterative NTT tunable Changes between ecm-6.1.3 and ecm-6.2: * New stage 2 for P-1 and P+1, described in Montgomery and Kruppa, Improved Stage 2 to P+-1 Factoring Algorithms, in A. J. van der Poorten and A. Stein (Eds.), ANTS-VIII 2008, LNCS 5011, pp. 180-195. * Parallelization in the new P+-1 stage 2 (with --enable-openmp). * Optimizations to the NTT code by Jason S. Papadopoulos * Improved mulredc assembly code for Athlon64/Opteron * Improved modular reduction in the mpzmod range * Bugfix in P+1 stage 2 which caused incorrect initialisation if Brent-Suyama polynomial had degree > 1 and i0 was negative (occurs only with non-standard parameters) * Bugfix in generation of Lucas chains for P+1 and ECM, causing some stage 1 primes close to 2^32 to be processed incorrectly on 32 bit systems * Added build project for VC++ by Brian Gladman * File ecm.h changed from GPL to LGPL: the fact it was under GPL was an unvoluntary mistake, which has the consequence that applications linking with libecm for version < 6.2 should be under GPL too. * Fixed a regression introduced in 6.1.1: the default arithmetic (NTT) for stage 2 was slower for large inputs. Now defaults to -no-ntt for input numbers >30 machine words. Changes between ecm-6.1.2 and ecm-6.1.3: * fixed incorrect computation of memory use in stage 2, especially for machines that use Kronecker-Schoenhage multiplication even for large degrees, such as Core 2. * fixed -B2scale option whose value hadn't been passed to the factoring routines * fixed default B2min for P-1, which could be truncated on 32 bit machines, causing stage 2 to take a little longer than necessary * fixed bug for modular multiplication modulo Fermat numbers 2^2^n+1, where a result of 2^2^n would be truncated to 0. Changes between ecm-6.1.1 and ecm-6.1.2: * changed copyright header from sp.h, to recognize the FSF for parts inspired or taken from gmp-impl.h. Changes between ecm-6.0.1 and ecm-6.1: * new assembly code contributed by Pierrick Gaudry for combined mul/redc * new Number Theoretic Transform code contributed by Dave Newman for step 2 * new signal handling and corresponding save files for step 1 * now prints peak memory allocation with -v * improved and simplified tuning * command-line options: added -idlecmd -no-ntt -prpcmd -stage1time -maxmem, removed -prp* * new configure options --enable-asm-redc, --with-gwnum * new exit status codes of ecm program * new interface to George Woltman's GWNUM library (see INSTALL) * chosen stage 2 bound is now printed right away * fixed minor memory leak in mpmod.c/isbase2 * fixed invalid Found input number N reported on some numbers * fixed serious P+1 bug on 64-bit architectures with B1 > 2^32 * fixed wrong detection of divisors of 2^n+1 or 2^n-1 * fixed memory leaks Changes between ecm-6.0 and ecm-6.0.1: * now checks for availability of snprintf() during configure * fixed linking problems with tune and tune2 on PowerPC G5 * fixed segfault in rho.c * fixed main()'s B2 value being overwritten by callees * allow both \r and \n for newline (for Apple computers) * made files compile under Visual C * fixed bug in listz.c that could leave undefined data * fixed the -B2scale option * fixed small error in printed B2' value (with -v) * added Windows section to INSTALL * small corrections to ecm.xml and ecm.1 * added curve counter in loop mode again * fixed segfault when a non-number was in place of B2 on command line * worked around problem with MinGW/Wine scanf() (value 1 too high for %n) * free rhotable memory at end of stage 2 * replaced GSL's dilog_series() in rho.c due to licensing (GPL vs. LGPL) Changes between ecm-5.0.3 and ecm-6.0: * use of the autotools (configure/make) * there is now a documentation in "man" format (ecm.1) * added a set of -prp* command line switches which will use an external program to perform prp testing of candidate and factors. When the numbers get large, GMP becomes very non-optimal in PRP testing. An external program, such as OpenPFGW, can be much faster than the GMP. (New code from Phil Carmody) * new parser for symbolic input * added hex number input into the expression parser. This was needed due to save files from Prime95 being output in hex. ecm was not resuming these. * added some porting code so ecm builds under VC6. VC6 builds an ecm that is is about 1% to 3% faster than the MinGW build (which is about 1% to 3% faster than a Cygwin build) * quiet mode (-q) now prints on stdout all factors found on the same line: f1 f2 ... fk ccc where ccc is the remaining composite (contributed by Laurent Fousse). Example: $ echo 438573459834757 | ./ecm -sigma 6 -q 1e2 2166151 202466707 * special code for Fermat numbers, which improves both steps, example for F12: GMP-ECM 5.0.3 [powered by GMP 4.1.4] [ECM] Input number has 1187 digits Using B1=100000, B2=31565866, polynomial x^2, sigma=4155936925 Step 1 took 27157ms Step 2 took 20830ms GMP-ECM 5.2.0 [powered by GMP 4.1.4] [ECM] Input number has 1187 digits Using B1=100000, B2=31565866, polynomial x^2, sigma=535125396 Step 1 took 24264ms Step 2 took 6605ms * speed improvement in step 2, especially for large B2 (here with the c155 in the GMP-ECM sources), using Kronecker/Schonhage multiplication: GMP-ECM 5.0.3 [powered by GMP 4.1.4] [ECM] Using B1=3000000, B2=4016636514, polynomial Dickson(12), sigma=2383768044 Step 1 took 62305ms Step 2 took 45322ms GMP-ECM 5.2.0 [powered by GMP 4.1.4] [ECM] Using B1=3000000, B2=4016636514, polynomial Dickson(12), sigma=3595368442 Step 1 took 61824ms Step 2 took 32989ms * Brent-Suyama's extension now works for P+1 too * trial division is available (option -t n) * new options -n (low priority) and -nn (idle priority) * the looping mode (-c n) now continues to try to factor the composite when a factor is found (use -one to stop) * new option -ve n to display only inputs of <= n characters (looping mode) * new option -treefile which stores product tree of F on disk to save memory * generation of roots use double sieve idea, increases B2 for given k, dF * generation of roots for ECM rewritten to reduce number of extgcds Changes between ecm-5.0 and ecm-5.0.1: * fixed bug when B1 or B2min is too large, and an overflow occurs in step 2, making the computations incorrect. The new limit for B1 or B2min is now around 2^53, and an error occurs when this limit is passed. * fixed problem with save lines incorrectly written when both sigma and A are given with -save. * fixed efficiency problem with -pm1: for B1 > 1e6, use -redc or -mpz_mod by default. ecm-6.4.4/batch.c0000644023561000001540000002720112106741273010445 00000000000000/* batch.c - Implement batch mode for step 1 of ECM Copyright 2011, 2012 Cyril Bouvier, Paul Zimmermann and David Cleaver. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ /* ECM stage 1 in batch mode, for initial point (x:z) with small coordinates, such that x and z fit into a mp_limb_t. For example we can start with (x=2:y=1) with the curve by^2 = x^3 + ax^2 + x with a = 4d-2 and b=16d+2, then we have to multiply by d=(a+2)/4 in the duplicates. With the change of variable x=b*X, y=b*Y, this curve becomes: Y^2 = X^3 + a/b*X^2 + 1/b^2*X. */ #include "ecm-impl.h" #define MAX_HEIGHT 32 #if ULONG_MAX == 4294967295 #define MAX_B1_BATCH 2977044736UL #else /* nth_prime(2^(MAX_HEIGHT-1)) */ #define MAX_B1_BATCH 50685770167UL #endif void compute_s (mpz_t s, unsigned long B1) { mpz_t acc[MAX_HEIGHT]; /* To accumulate products of prime powers */ unsigned int i, j; unsigned long pi = 2, pp, maxpp; ASSERT_ALWAYS (B1 < MAX_B1_BATCH); for (j = 0; j < MAX_HEIGHT; j++) mpz_init (acc[j]); /* sets acc[j] to 0 */ i = 0; while (pi <= B1) { pp = pi; maxpp = B1 / pi; while (pp <= maxpp) pp *= pi; if ((i & 1) == 0) mpz_set_ui (acc[0], pp); else mpz_mul_ui (acc[0], acc[0], pp); j = 0; /* We have accumulated i+1 products so far. If bits 0..j of i are all set, then i+1 is a multiple of 2^(j+1). */ while ((i & (1 << j)) != 0) { /* we use acc[MAX_HEIGHT-1] as 0-sentinel below, thus we need j+1 < MAX_HEIGHT-1 */ ASSERT (j + 1 < MAX_HEIGHT - 1); if ((i & (1 << (j + 1))) == 0) /* i+1 is not multiple of 2^(j+2), thus add[j+1] is "empty" */ mpz_swap (acc[j+1], acc[j]); /* avoid a copy with mpz_set */ else mpz_mul (acc[j+1], acc[j+1], acc[j]); /* accumulate in acc[j+1] */ mpz_set_ui (acc[j], 1); j++; } i++; pi = getprime (pi); } for (mpz_set (s, acc[0]), j = 1; mpz_cmp_ui (acc[j], 0) != 0; j++) mpz_mul (s, s, acc[j]); getprime_clear (); /* free the prime tables, and reinitialize */ for (i = 0; i < MAX_HEIGHT; i++) mpz_clear (acc[i]); } /* Return the number of bytes written */ int write_s_in_file (char *fn, mpz_t s) { FILE *file; int ret = 0; #ifdef DEBUG if (fn == NULL) { fprintf (stderr, "write_s_in_file: fn == NULL\n"); exit (EXIT_FAILURE); } #endif file = fopen (fn, "w"); if (file == NULL) { fprintf (stderr, "Could not open file %s for writing\n", fn); return 0; } ret = mpz_out_raw (file, s); fclose (file); return ret; } void read_s_from_file (mpz_t s, char *fn) { FILE *file; int ret = 0; #ifdef DEBUG if (fn == NULL) { fprintf (stderr, "read_s_from_file: fn == NULL\n"); exit (EXIT_FAILURE); } #endif file = fopen (fn, "r"); if (file == NULL) { fprintf (stderr, "Could not open file %s for reading\n", fn); exit (EXIT_FAILURE); } ret = mpz_inp_raw (s, file); if (ret == 0) { fprintf (stderr, "read_s_from_file: 0 bytes read from %s\n", fn); exit (EXIT_FAILURE); } fclose (file); } #ifndef GPUECM #if 0 /* this function is useful in debug mode to print non-normalized residues */ static void mpresn_print (mpres_t x, mpmod_t n) { mp_size_t m, xn; xn = SIZ(x); m = ABSIZ(x); MPN_NORMALIZE(PTR(x), m); SIZ(x) = xn >= 0 ? m : -m; gmp_printf ("%Zd\n", x); SIZ(x) = xn; } #endif /* (x1:z1) <- 2(x1:z1) (x2:z2) <- (x1:z1) + (x2:z2) assume (x2:z2) - (x1:z1) = (2:1) Uses 4 modular multiplies and 4 modular squarings. Inputs are x1, z1, x2, z2, d, n. Use two auxiliary variables: t, w (it seems using one only is not possible if all mpresn_mul and mpresn_sqr calls don't overlap input and output). In the batch 1 mode, we pass d_prime such that the actual d is d_prime/beta. Since beta is a square, if d_prime is a square (on 64-bit machines), so is d. In mpresn_mul_1, we multiply by d_prime = beta*d and divide by beta. */ static void dup_add_batch1 (mpres_t x1, mpres_t z1, mpres_t x2, mpres_t z2, mpres_t t, mpres_t w, mp_limb_t d_prime, mpmod_t n) { /* active: x1 z1 x2 z2 */ mpresn_addsub (w, z1, x1, z1, n); /* w = x1+z1, z1 = x1-z1 */ /* active: w z1 x2 z2 */ mpresn_addsub (x1, x2, x2, z2, n); /* x1 = x2+z2, x2 = x2-z2 */ /* active: w z1 x1 x2 */ mpresn_mul (z2, w, x2, n); /* w = (x1+z1)(x2-z2) */ /* active: w z1 x1 z2 */ mpresn_mul (x2, z1, x1, n); /* x2 = (x1-z1)(x2+z2) */ /* active: w z1 x2 z2 */ mpresn_sqr (t, z1, n); /* t = (x1-z1)^2 */ /* active: w t x2 z2 */ mpresn_sqr (z1, w, n); /* z1 = (x1+z1)^2 */ /* active: z1 t x2 z2 */ mpresn_mul (x1, z1, t, n); /* xdup = (x1+z1)^2 * (x1-z1)^2 */ /* active: x1 z1 t x2 z2 */ mpresn_sub (w, z1, t, n); /* w = (x1+z1)^2 - (x1-z1)^2 */ /* active: x1 w t x2 z2 */ mpresn_mul_1 (z1, w, d_prime, n); /* z1 = d * ((x1+z1)^2 - (x1-z1)^2) */ /* active: x1 z1 w t x2 z2 */ mpresn_add (t, t, z1, n); /* t = (x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2) */ /* active: x1 w t x2 z2 */ mpresn_mul (z1, w, t, n); /* zdup = w * [(x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2)] */ /* active: x1 z1 x2 z2 */ mpresn_addsub (w, z2, x2, z2, n); /* active: x1 z1 w z2 */ mpresn_sqr (x2, w, n); /* active: x1 z1 x2 z2 */ mpresn_sqr (w, z2, n); /* active: x1 z1 x2 w */ mpresn_add (z2, w, w, n); } static void dup_add_batch2 (mpres_t x1, mpres_t z1, mpres_t x2, mpres_t z2, mpres_t t, mpres_t w, mpres_t d, mpmod_t n) { /* active: x1 z1 x2 z2 */ mpresn_addsub (w, z1, x1, z1, n); /* w = x1+z1, z1 = x1-z1 */ /* active: w z1 x2 z2 */ mpresn_addsub (x1, x2, x2, z2, n); /* x1 = x2+z2, x2 = x2-z2 */ /* active: w z1 x1 x2 */ mpresn_mul (z2, w, x2, n); /* w = (x1+z1)(x2-z2) */ /* active: w z1 x1 z2 */ mpresn_mul (x2, z1, x1, n); /* x2 = (x1-z1)(x2+z2) */ /* active: w z1 x2 z2 */ mpresn_sqr (t, z1, n); /* t = (x1-z1)^2 */ /* active: w t x2 z2 */ mpresn_sqr (z1, w, n); /* z1 = (x1+z1)^2 */ /* active: z1 t x2 z2 */ mpresn_mul (x1, z1, t, n); /* xdup = (x1+z1)^2 * (x1-z1)^2 */ /* active: x1 z1 t x2 z2 */ mpresn_sub (w, z1, t, n); /* w = (x1+z1)^2 - (x1-z1)^2 */ /* active: x1 w t x2 z2 */ mpresn_mul (z1, w, d, n); /* z1 = d * ((x1+z1)^2 - (x1-z1)^2) */ /* active: x1 z1 w t x2 z2 */ mpresn_add (t, t, z1, n); /* t = (x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2) */ /* active: x1 w t x2 z2 */ mpresn_mul (z1, w, t, n); /* zdup = w * [(x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2)] */ /* active: x1 z1 x2 z2 */ mpresn_addsub (w, z2, x2, z2, n); /* active: x1 z1 w z2 */ mpresn_sqr (x2, w, n); /* active: x1 z1 x2 z2 */ mpresn_sqr (w, z2, n); /* active: x1 z1 x2 w */ mpresn_add (z2, w, w, n); } /* Input: x is initial point A is curve parameter in Montgomery's form: g*y^2*z = x^3 + a*x^2*z + x*z^2 n is the number to factor B1 is the stage 1 bound Output: If a factor is found, it is returned in x. Otherwise, x contains the x-coordinate of the point computed in stage 1 (with z coordinate normalized to 1). B1done is set to B1 if stage 1 completed normally, or to the largest prime processed if interrupted, but never to a smaller value than B1done was upon function entry. Return value: ECM_FACTOR_FOUND_STEP1 if a factor, otherwise ECM_NO_FACTOR_FOUND */ /* For now we don't take into account go stop_asap and chkfilename */ int ecm_stage1_batch (mpz_t f, mpres_t x, mpres_t A, mpmod_t n, double B1, double *B1done, int batch, mpz_t s) { mp_limb_t d_1; mpz_t d_2; mpres_t x1, z1, x2, z2; unsigned long i; mpres_t t, u; int ret = ECM_NO_FACTOR_FOUND; MEMORY_TAG; mpres_init (x1, n); MEMORY_TAG; mpres_init (z1, n); MEMORY_TAG; mpres_init (x2, n); MEMORY_TAG; mpres_init (z2, n); MEMORY_TAG; mpres_init (t, n); MEMORY_TAG; mpres_init (u, n); if (batch == 2) { MEMORY_TAG; mpres_init (d_2, n); } MEMORY_UNTAG; /* initialize P */ mpres_set (x1, x, n); mpres_set_ui (z1, 1, n); /* P1 <- 1P */ /* Compute d=(A+2)/4 from A and d'=B*d thus d' = 2^(GMP_NUMB_BITS-2)*(A+2) */ if (batch == 1) { mpres_get_z (u, A, n); mpz_add_ui (u, u, 2); mpz_mul_2exp (u, u, GMP_NUMB_BITS - 2); mpres_set_z_for_gcd (u, u, n); /* reduces u mod n */ if (mpz_size (u) > 1) { mpres_get_z (u, A, n); outputf (OUTPUT_ERROR, "Error, d'=B*(A+2)/4 should fit in a mp_limb_t, A=%Zd\n", u); return ECM_ERROR; } d_1 = mpz_getlimbn (u, 0); } else { /* b = (A0+2)*B/4, where B=2^(k*GMP_NUMB_BITS) for MODMULN or REDC, B=2^GMP_NUMB_BITS for batch1, and B=1 otherwise */ mpres_add_ui (d_2, A, 2, n); mpres_div_2exp (d_2, d_2, 2, n); } /* Compute 2P : no need to duplicate P, the coordinates are simple. */ mpres_set_ui (x2, 9, n); if (batch == 1) /* here d = d_1 / GMP_NUMB_BITS */ { /* warning: mpres_set_ui takes an unsigned long which has only 32 bits on Windows, while d_1 might have 64 bits */ ASSERT_ALWAYS (mpz_size (u) == 1 && mpz_getlimbn (u, 0) == d_1); mpres_set_z (z2, u, n); mpres_div_2exp (z2, z2, GMP_NUMB_BITS, n); } else mpres_set (z2, d_2, n); mpres_mul_2exp (z2, z2, 6, n); mpres_add_ui (z2, z2, 8, n); /* P2 <- 2P = (9 : : 64d+8) */ /* invariant: if j represents the upper bits of s, then P1 = j*P and P2=(j+1)*P */ mpresn_pad (x1, n); mpresn_pad (z1, n); mpresn_pad (x2, n); mpresn_pad (z2, n); /* now perform the double-and-add ladder */ if (batch == 1) { for (i = mpz_sizeinbase (s, 2) - 1; i-- > 0;) { if (mpz_tstbit (s, i) == 0) /* (j,j+1) -> (2j,2j+1) */ /* P2 <- P1+P2 P1 <- 2*P1 */ dup_add_batch1 (x1, z1, x2, z2, t, u, d_1, n); else /* (j,j+1) -> (2j+1,2j+2) */ /* P1 <- P1+P2 P2 <- 2*P2 */ dup_add_batch1 (x2, z2, x1, z1, t, u, d_1, n); } } else /* batch = 2 */ { mpresn_pad (d_2, n); for (i = mpz_sizeinbase (s, 2) - 1; i-- > 0;) { if (mpz_tstbit (s, i) == 0) /* (j,j+1) -> (2j,2j+1) */ /* P2 <- P1+P2 P1 <- 2*P1 */ dup_add_batch2 (x1, z1, x2, z2, t, u, d_2, n); else /* (j,j+1) -> (2j+1,2j+2) */ /* P1 <- P1+P2 P2 <- 2*P2 */ dup_add_batch2 (x2, z2, x1, z1, t, u, d_2, n); } } *B1done=B1; mpresn_unpad (x1); mpresn_unpad (z1); if (!mpres_invert (u, z1, n)) /* Factor found? */ { mpres_gcd (f, z1, n); ret = ECM_FACTOR_FOUND_STEP1; } mpres_mul (x, x1, u, n); mpz_clear (x1); mpz_clear (z1); mpz_clear (x2); mpz_clear (z2); mpz_clear (t); mpz_clear (u); if (batch == 2) { mpz_clear (d_2); } return ret; } #endif ecm-6.4.4/ecm-params.h.core20000644023561000001540000000316612106741273012433 00000000000000/* produced on pasta.loria.fr (Intel(R) Core(TM)2 CPU 6700 @ 2.66GHz) */ #ifndef HAVE_MPIR /* tuning parameters for GMP, tuned for GMP 5.0.4 */ /* 0:mulredc 1:mul+redc_1 2:mul+redc_2 3:mul+redc_n */ #define TUNE_MULREDC_TABLE {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0} /* 0:mulredc 1:sqr+redc_1 2:sqr+redc_2 3:sqr+redc_n */ #define TUNE_SQRREDC_TABLE {0,0,0,0,0,0,0,2,0,2,0,2,1,1,1,1,2,2,1,2,2} #define MPZMOD_THRESHOLD 21 #define REDC_THRESHOLD 512 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 8, 8, 9, 10, 12, 11, 12, 13, 12, 12, 14, 16, 16, 16, 18, 18, 18} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 17 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 256 #define PREREVERTDIVISION_NTT_THRESHOLD 8 #define POLYINVERT_NTT_THRESHOLD 128 #define POLYEVALT_NTT_THRESHOLD 256 #define MPZSPV_NORMALISE_STRIDE 128 #else /* tuning parameters for MPIR, tuned for MPIR 2.5.1 */ /* 0:mulredc 1:mul+redc_1 2:mul+redc_2 3:mul+redc_n */ #define TUNE_MULREDC_TABLE {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0} /* 0:mulredc 1:sqr+redc_1 2:sqr+redc_2 3:sqr+redc_n */ #define TUNE_SQRREDC_TABLE {0,0,0,0,0,0,0,0,1,1,2,2,1,1,1,1,1,1,2,1,2} #define MPZMOD_THRESHOLD 21 #define REDC_THRESHOLD 512 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 1, 1, 1, 1, 0, 6, 6, 7, 8, 9, 9, 11, 10, 10, 11, 12, 13, 14, 14, 11, 13, 18, 18, 14, 20, 16, 18, 18, 20} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 17 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 256 #define PREREVERTDIVISION_NTT_THRESHOLD 16 #define POLYINVERT_NTT_THRESHOLD 128 #define POLYEVALT_NTT_THRESHOLD 256 #define MPZSPV_NORMALISE_STRIDE 32 #endif ecm-6.4.4/pm1fs2.c0000644023561000001540000043420512113200327010466 00000000000000/* Implementation of fast stage 2 for P-1 and P+1 as described in "Improved Stage 2 to $P\pm{}1$ Factoring Algorithms" by Peter L. Montgomery and Alexander Kruppa, ANTS 2008 (8th Algorithmic Number Theory Symposium). Copyright 2007, 2008, 2009, 2010, 2011, 2012 Alexander Kruppa, Paul Zimmermann. NTT functions are based on code Copyright 2005 Dave Newman. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "config.h" #include #include #include #include "ecm-impl.h" #include "sp.h" #include #ifdef HAVE_ALLOCA_H #include #endif #ifdef HAVE_STRING_H #include #endif #ifdef _OPENMP #include #endif /* TODO: - move functions into their proper files (i.e. NTT functions etc.) - later: allow storing NTT vectors on disk */ /* Define TEST_ZERO_RESULT to test if any result of the multipoint evaluation is equal to zero. If the modulus is composite, this happening might indicate a problem in the evalutaion code */ #define TEST_ZERO_RESULT const int pari = 0; const unsigned long Pvalues[] = { 3UL, 5UL, 9UL, 15UL, 21UL, 17UL, 27UL, 33UL, 45UL, 51UL, 63UL, 75UL, 105UL, 99UL, 135UL, 165UL, 195UL, 189UL, 231UL, 255UL, 315UL, 345UL, 357UL, 375UL, 405UL, 435UL, 525UL, 585UL, 615UL, 735UL, 765UL, 825UL, 945UL, 1155UL, 1065UL, 1365UL, 1305UL, 1335UL, 1575UL, 1785UL, 1995UL, 2145UL, 2205UL, 2415UL, 2625UL, 2805UL, 3045UL, 3465UL, 3675UL, 4095UL, 4305UL, 4515UL, 4725UL, 4785UL, 5355UL, 5775UL, 5985UL, 5865UL, 6825UL, 7245UL, 8085UL, 8925UL, 9555UL, 10395UL, 10725UL, 11025UL, 12285UL, 12705UL, 15015UL, 14175UL, 15225UL, 16065UL, 17325UL, 19635UL, 21945UL, 23205UL, 24255UL, 25935UL, 26775UL, 28875UL, 31395UL, 33495UL, 35805UL, 36465UL, 38115UL, 39585UL, 40425UL, 45045UL, 45885UL, 49665UL, 51765UL, 58905UL, 65835UL, 69615UL, 75075UL, 77805UL, 82005UL, 84315UL, 86625UL, 88935UL, 94185UL, 98175UL, 105105UL, 109725UL, 116025UL, 118755UL, 121275UL, 135135UL, 137445UL, 137655UL, 144375UL, 153615UL, 165165UL, 167475UL, 176715UL, 179025UL, 185955UL, 197505UL, 208845UL, 215985UL, 225225UL, 255255UL, 250635UL, 285285UL, 277095UL, 294525UL, 315315UL, 345345UL, 373065UL, 368445UL, 405405UL, 435435UL, 451605UL, 465465UL, 454545UL, 504735UL, 525525UL, 555555UL, 569415UL, 596505UL, 645645UL, 647955UL, 672945UL, 687225UL, 765765UL, 770385UL, 805035UL, 855855UL, 858585UL, 915915UL, 945945UL, 962115UL, 1036035UL, 1066065UL, 1119195UL, 1156155UL, 1276275UL, 1306305UL, 1354815UL, 1426425UL, 1456455UL, 1514205UL, 1576575UL, 1666665UL, 1726725UL, 1786785UL, 1789515UL, 1865325UL, 1996995UL, 1983135UL, 2177175UL, 2297295UL, 2327325UL, 2417415UL, 2567565UL, 2611455UL, 2807805UL, 2847075UL, 2878785UL, 3048045UL, 3161235UL, 3258255UL, 3357585UL, 3401475UL, 3533145UL, 3828825UL, 3918915UL, 3985905UL, 4279275UL, 4849845UL, 4789785UL, 4967655UL, 5180175UL, 5360355UL, 5870865UL, 5990985UL, 6561555UL, 6531525UL, 6891885UL, 7402395UL, 7912905UL, 8273265UL, 8580495UL, 8843835UL, 9444435UL, 10015005UL, 10465455UL, 10705695UL, 10885875UL, 11696685UL, 12267255UL, 12507495UL, 12785955UL, 13498485UL, 14549535UL, 14849835UL, 15570555UL, 16111095UL, 16291275UL, 17612595UL, 18123105UL, 18633615UL, 19684665UL, 20255235UL, 20825805UL, 22207185UL, 22717695UL, 24249225UL, 24819795UL, 25741485UL, 26531505UL, 28333305UL, 29354325UL, 30045015UL, 31396365UL, 32807775UL, 33948915UL, 33528495UL, 34879845UL, 37011975UL, 37522485UL, 39564525UL, 41096055UL, 43648605UL, 44219175UL, 45930885UL, 47222175UL, 48333285UL, 50075025UL, 51816765UL, 52777725UL, 55390335UL, 55547415UL, 59053995UL, 60063465UL, 61906845UL, 64579515UL, 66621555UL, 67492425UL, 70105035UL, 73258185UL, 74939865UL, 77224455UL, 79594515UL, 81876795UL, 84999915UL, 88062975UL, 91005915UL, 94189095UL, 98423325UL, 101846745UL, 111546435UL, 111035925UL, 115120005UL, 121246125UL, 124098975UL, 130945815UL, 140645505UL, 150345195UL, 150225075UL, 155450295UL, 158333175UL, 170255085UL, 179444265UL, 190285095UL, 198843645UL, 203408205UL, 206831625UL, 217222005UL, 229474245UL, 240705465UL, 252447195UL, 254999745UL, 269023755UL, 282146865UL, 287672385UL, 294076965UL, 306110805UL, 318302985UL, 334639305UL, 344338995UL, 354038685UL, 363738375UL, 373438065UL, 387221835UL, 400254855UL, 421936515UL, 431636205UL, 451035585UL, 453888435UL, 470434965UL, 480134655UL, 510765255UL, 522506985UL, 557732175UL, 570855285UL, 596530935UL, 610224615UL, 627912285UL, 654729075UL, 703227525UL, 722116395UL, 751725975UL, 780825045UL, 790524735UL, 821665845UL, 851275425UL, 863017155UL, 909984075UL, 936020085UL, 984518535UL, 1017041025UL, 1052416365UL #if (ULONG_MAX > 4294967295) ,1086110025UL, 1110614505UL, 1147371225UL, 1191785595UL, 1213887675UL, 1265809545UL, 1282356075UL, 1331995665UL, 1391905515UL, 1450103655UL, 1479202725UL, 1547100555UL, 1555088535UL, 1673196525UL, 1712565855UL, 1767130365UL, 1830673845UL, 1883166285UL, 1954487535UL, 2001964965UL, 2119382265UL, 2187280095UL, 2255177925UL, 2342475135UL, 2390973585UL, 2421213795UL, 2555868315UL, 2672264595UL, 2788660875UL, 2856558705UL, 2953555605UL, 3050552505UL, 3234846615UL, 3457939485UL, 3516137625UL, 3681032355UL, 3758629875UL, 3904125225UL, 4127218095UL, 4360010655UL, 4573403835UL, 4796496705UL, 4844995155UL, 5019589575UL, 5203883685UL, 5262081825UL, 5465775315UL, 5766465705UL, 5898837945UL, 6164152995UL, 6358146795UL, 6411780375UL, 6804332535UL, 6980458485UL, 7172920755UL, 7473611145UL, 7716103395UL, 7968295335UL, 8182259085UL, 8342499165UL, 8812168365UL, 9023519505UL, 9704539845UL, 9927632715UL, 10373818455UL, 10439434005UL, 10820004195UL, 11043097065UL, 11489282805UL, 11877270405UL, 12381654285UL, 12604747155UL, 13080031965UL, 13274025765UL, 13642613985UL, 14389490115UL, 14583483915UL, 15058768725UL, 15611651055UL, 16174233075UL, 16397325945UL, 17289697425UL, 17735883165UL, 18143270145UL, 18381678315UL, 19074440385UL, 19559424885UL, 20636090475UL, 20941375455UL, 21800053275UL, 22643926305UL, 23148310185UL, 24205576395UL, 24546777255UL, 25544133615UL, 26389538175UL, 26863291455UL, 27813861075UL, 29113619535UL, 29494189725UL, 30520074585UL, 30684969315UL, 31790733975UL, 33575476935UL, 34467848415UL, 35202742575UL, 36427185795UL, 38037334335UL, 39240095895UL, 40365259935UL, 42053005995UL, 43168470345UL, 44953213305UL, 45845584785UL, 48522699225UL, 50307442185UL, 51869092275UL, 53653835235UL, 54546206715UL, 56680138515UL, 58784971245UL, 59386352025UL, 61908271425UL, 63431122755UL, 65700850215UL, 67931778915UL, 70162707615UL, 72616729185UL, 74120181135UL, 75740029365UL, 78417143805UL, 80871165375UL, 82840202445UL, 86448487125UL, 88466022645UL, 91133437395UL, 92918180355UL, 100280245065UL, 100726430805UL, 102811864155UL, 106749938295UL, 109000266375UL, 113219631525UL, 119689324755UL, 121027881975UL, 127943760945UL, 132628711215UL, 134859639915UL, 141775518885UL, 148691397855UL, 150922326555UL, 155607276825UL, 161320394235UL, 164977177365UL, 171446870595UL, 177470378085UL, 183270792705UL #endif }; /* All the prime factors that can appear in eulerphi(P) */ const unsigned long phiPfactors[] = {2UL, 3UL, 5UL, 7UL, 11UL, 13UL, 17UL, 19UL}; /* Some useful PARI functions: sumset(a,b) = {local(i, j, l); l = listcreate (length(a) * length(b)); for (i = 1, length(a), for (j = 1, length(b), listput(l, a[i] + b[j]))); listsort (l, 1); l} V(i,X) = { if (i==0, return(2)); if (i==1, return(X)); if(i%2 == 0, return (V (i/2, X)^2-2)); return (V ((i+1)/2, X) * V ((i-1)/2, X) - X)} U(i,X) = { if (i==0, return(0)); if (i==1, return(1)); if(i%2 == 0, return (U (i/2, X) * V(i/2,X))); return (V ((i+1)/2, X) *U( (i-1)/2, X) + 1)} */ #ifndef _OPENMP static int omp_get_num_threads () {return 1;} static int omp_get_thread_num () {return 0;} #endif static void ntt_sqr_reciprocal (mpzv_t, const mpzv_t, mpzspv_t, const spv_size_t, const mpzspm_t); static void print_elapsed_time (int verbosity, long cpu_start, ATTRIBUTE_UNUSED long real_start) { #ifdef _OPENMP if (real_start != 0L) { outputf (verbosity, " took %lums (%lums real)\n", elltime (cpu_start, cputime()), elltime (real_start, realtime())); return; } #endif outputf (verbosity, " took %lums\n", elltime (cpu_start, cputime())); } static void print_CRT_primes (const int verbosity, const char *prefix, const mpzspm_t ntt_context) { double modbits = 0.; unsigned int i; if (test_verbose (verbosity)) { outputf (verbosity, "%s%lu", prefix, ntt_context->spm[0]->sp); modbits += log ((double) ntt_context->spm[0]->sp); for (i = 1; i < ntt_context->sp_num; i++) { outputf (verbosity, " * %lu", ntt_context->spm[i]->sp); modbits += log ((double) ntt_context->spm[i]->sp); } outputf (verbosity, ", has %d primes, %f bits\n", ntt_context->sp_num, modbits / log (2.)); } } /* Approximate amount of memory in bytes each coefficient in an NTT takes so that NTT can do transforms up to length lmax with modulus, or with 2*modulus if twice != 0 */ static size_t ntt_coeff_mem (const unsigned long lmax, const mpz_t modulus, const int twice) { mpz_t t; size_t n; mpz_init (t); mpz_mul (t, modulus, modulus); mpz_mul_ui (t, t, lmax); if (twice) mpz_mul_2exp (t, t, 1UL); /* +4: +1 for rounding up, +3 for extra words due to ECRT */ n = (mpz_sizeinbase (t, 2) - 1) / SP_NUMB_BITS + 4; mpz_clear (t); return n * sizeof (sp_t); } size_t pm1fs2_memory_use (const unsigned long lmax, const mpz_t modulus, const int use_ntt) { if (use_ntt) { /* We store lmax / 2 + 1 coefficients for the DCT-I of F and lmax coefficients for G in NTT ready format. Each coefficient in NTT-ready format occupies approx. ceil(log(lmax*modulus^2)/log(bits per sp_t)) + 3 words. */ size_t n; n = ntt_coeff_mem (lmax, modulus, 0) * (size_t) (3 * lmax / 2 + 1); outputf (OUTPUT_DEVVERBOSE, "pm1fs2_memory_use: Estimated memory use " "with lmax = %lu NTT is %lu bytes\n", lmax, n); return n; } else { /* F stores s_1/2 residues, h stores s_1 mpz_t structs (residues get cloned from F) g stores lmax residues, R stores lmax-s_1 residues, and tmp stores 3*lmax+list_mul_mem (lmax / 2) residues. Assume s_1 is close to lmax/2. Then we have lmax/4 + lmax/2 + lmax + lmax/2 + 3*lmax + list_mul_mem (lmax / 2) = (5+1/4)*lmax + list_mul_mem (lmax / 2) residues, plus s_1 mpz_t. */ size_t n; n = mpz_size (modulus) * sizeof (mp_limb_t) + sizeof (mpz_t); n *= 5 * lmax + lmax / 4 + list_mul_mem (lmax / 2); n += lmax / 2 * sizeof (mpz_t); /* Memory use due to temp space allocation in TMulKS appears to approximately triple the estimated memory use. This is hard to estimate precisely, so let's go with the fudge factor of 3 here */ n *= 3; outputf (OUTPUT_DEVVERBOSE, "pm1fs2_memory_use: Estimated memory use " "with lmax = %lu is %lu bytes\n", lmax, n); return n; } } /* return the possible lmax for given memory use and modulus */ unsigned long pm1fs2_maxlen (const size_t memory, const mpz_t modulus, const int use_ntt) { if (use_ntt) { size_t n, lmax = 1; n = ntt_coeff_mem (lmax, modulus, 0); lmax = 1UL << ceil_log2 (memory / n / 3); return lmax; } else { size_t lmax, n; n = mpz_size (modulus) * sizeof (mp_limb_t) + sizeof (mpz_t); /* Guess an initial value of lmax for list_mul_mem (lmax / 2) */ /* memory = n * 25/4 * lmax + lmax / 2 * sizeof (mpz_t); */ /* Fudge factor of 3 for TMulKS as above */ lmax = memory / (3 * 25 * n / 4 + 3 * sizeof (mpz_t) / 2); return lmax; } } size_t pp1fs2_memory_use (const unsigned long lmax, const mpz_t modulus, const int use_ntt, const int twopass) { size_t n, m; m = mpz_size (modulus) * sizeof (mp_limb_t) + sizeof (mpz_t); if (use_ntt) { /* In one pass mode, we store h_x_ntt and h_y_ntt, each of length lmax/2(+1), and g_x_ntt and g_y_ntt, each of length lmax, all in NTT ready format. In two pass mode, we store h_x_ntt, h_y_ntt and g_x_ntt as before, plus R which is lmax - s_1 mpz_t. We assume s_1 ~= lmax/2. */ n = ntt_coeff_mem (lmax, modulus, !twopass); if (twopass) return lmax * (2 * n + m / 2); else return lmax * 3 * n; } else { /* We allocate: F: s_1/2 coefficients fh_x, fh_y: s_1/2 coefficients h_x, h_y: s_1 mpz_t's (cloned from fh_x and fh_y) g_x, g_y: lmax coefficients R_x, R_y: lmax - s_1 coefficients tmp: 3UL * lmax + list_mul_mem (lmax / 2) Assuming s_1 ~ lmax/2, that's lmax/2 + 2*lmax/4 + 2*lmax + 2*lmax/2 * 3*lmax + list_mul_mem (lmax / 2) = 7 + list_mul_mem (lmax / 2) coefficients and lmax mpz_t. */ n = m * (7 * lmax + list_mul_mem (lmax / 2)); n += lmax * sizeof (mpz_t); n = 5 * n / 2; /* A fudge factor again */ return n; } } unsigned long pp1fs2_maxlen (const size_t memory, const mpz_t modulus, const int use_ntt, const int twopass) { size_t n, m; m = mpz_size (modulus) * sizeof (mp_limb_t) + sizeof (mpz_t); if (use_ntt) { n = ntt_coeff_mem (1, modulus, !twopass); if (twopass) n = memory / (2 * n + m / 2); else n = memory / (3 * n); return 1UL << (ceil_log2 (n / 2)); /* Rounded down to power of 2 */ } else { return memory / 5 / (m * 8 + sizeof (mpz_t)) * 2; } } /* Test if for given P, nr, B2min and B2 we can choose an m_1 so that the stage 2 interval [B2min, B2] is covered. The effective B2min and B2 are stored in effB2min and effB2 */ static int test_P (const mpz_t B2min, const mpz_t B2, mpz_t m_1, const unsigned long P, const unsigned long nr, mpz_t effB2min, mpz_t effB2) { mpz_t m; /* We need B2min >= 2 * max(S_1 + S_2) + (2*m_1 - 1)*P + 1, or B2min - 2 * max(S_1 + S_2) - 1 >= (2*m_1)*P - P, or (B2min - 2*max(S_1 + S_2) + P - 1)/(2P) >= m_1 Choose m_1 accordingly */ mpz_init (m); sets_max (m, P); mpz_mul_2exp (m, m, 1UL); /* m = 2*max(S_1 + S_2) */ mpz_sub (m_1, B2min, m); mpz_sub_ui (m_1, m_1, 1UL); /* m_1 = B2min - 2*max(S_1 + S_2) - 1 */ mpz_add_ui (m_1, m_1, P); mpz_fdiv_q_2exp (m_1, m_1, 1UL); mpz_fdiv_q_ui (m_1, m_1, P); /* 2UL*P may overflow */ /* Compute effB2min = 2 * max(S_1 + S_2) + (2*(m_1 - 1) + 1)*P + 1 */ mpz_mul_2exp (effB2min, m_1, 1UL); mpz_sub_ui (effB2min, effB2min, 1UL); mpz_mul_ui (effB2min, effB2min, P); mpz_add (effB2min, effB2min, m); mpz_add_ui (effB2min, effB2min, 1UL); ASSERT_ALWAYS (mpz_cmp (effB2min, B2min) <= 0); /* Compute the smallest value coprime to P at the high end of the stage 2 interval that will not be covered: 2*(min(S_1 + S_2)) + (2*(m_1 + nr) + 1)*P. We assume min(S_1 + S_2) = -max(S_1 + S_2) */ mpz_add_ui (effB2, m_1, nr); mpz_mul_2exp (effB2, effB2, 1UL); mpz_add_ui (effB2, effB2, 1UL); mpz_mul_ui (effB2, effB2, P); mpz_sub (effB2, effB2, m); /* The effective B2 values is that value, minus 1 */ mpz_sub_ui (effB2, effB2, 1UL); mpz_clear (m); return (mpz_cmp (B2, effB2) <= 0); } static void factor_phiP (int *exponents, const unsigned long phiP) { const int nrprimes = sizeof (phiPfactors) / sizeof (unsigned long); unsigned long cofactor = phiP; int i; ASSERT_ALWAYS (phiP > 0UL); for (i = 0; i < nrprimes; i++) for (exponents[i] = 0; cofactor % phiPfactors[i] == 0UL; exponents[i]++) cofactor /= phiPfactors[i]; ASSERT_ALWAYS (cofactor == 1UL); } static unsigned long pow_ul (const unsigned long b, const unsigned int e) { unsigned long r = 1UL; unsigned int i; for (i = 0; i < e; i++) r *= b; return r; } static unsigned long absdiff_ul (unsigned long a, unsigned long b) { return (a > b) ? a - b : b - a; } /* Choose s_1 so that s_1 * s_2 = phiP, s_1 is positive and even, s_2 >= min_s2 and s_2 is minimal and abs(s_1 - l) is minimal under those conditions. If use_ntt == 1, we require s_1 < l. Returns 0 if no such choice is possible */ static unsigned long choose_s_1 (const unsigned long phiP, const unsigned long min_s2, const unsigned long l, const int use_ntt) { const int nrprimes = sizeof (phiPfactors) / sizeof (unsigned long); /* Using [nrprimes] here makes the compiler complain about variable-sized arrays */ int phiPexponents[sizeof (phiPfactors) / sizeof (unsigned long)], exponents[sizeof (phiPfactors) / sizeof (unsigned long)]; unsigned long s_1 = 0UL, s_2 = 0UL, trys_1; int i; ASSERT_ALWAYS (phiP > 0 && phiP % 2 == 0); /* We want only even s_1. We divide one 2 out of phiP here... */ factor_phiP (phiPexponents, phiP / 2); for (i = 0; i < nrprimes; i++) exponents[i] = 0; do { trys_1 = 2; /* ... and add a 2 here */ for (i = 0; i < nrprimes; i++) trys_1 *= pow_ul (phiPfactors[i], exponents[i]); #if 0 printf ("choose_s_1: Trying trys_1 = %lu\n", trys_1); #endif /* See if it satisfies all the required conditions and is an improvement over the previous choice */ if (phiP / trys_1 >= min_s2 && (s_2 == 0UL || phiP / trys_1 < s_2) && absdiff_ul (trys_1, l) < absdiff_ul (s_1, l) && (use_ntt == 0 || trys_1 < l)) { #if 0 printf ("choose_s_1: New best s_1 for phiP = %lu, min_s2 = %lu, " "l = %lu : %lu\n", phiP, min_s2, l, trys_1); #endif s_1 = trys_1; } for (i = 0; i < nrprimes; i++) { if (++(exponents[i]) <= phiPexponents[i]) break; exponents[i] = 0; } } while (i < nrprimes); return s_1; } /* Approximate cost of stage 2. Cost with and without ntt are not comparable. We have l > s_1 and s_1 * s_2 = eulerphi(P), hence s_2*l > eulerphi(P) and so cost (s_2, l) > eulerphi(P) for all P */ static unsigned long est_cost (const unsigned long s_2, const unsigned long l, const int use_ntt, const int method) { if (method == ECM_PM1) { /* The time for building f, h and DCT-I of h seems to be about 7/6 of the time of computing g, h*g and gcd with NTT, and 3/2 of the time of computing g, h*g and gcd without NTT */ if (use_ntt) return (7 * l) / 6 + s_2 * l; else return (3 * l) / 2 + s_2 * l; } else if (method == ECM_PP1) { /* Building f is the same, building h and its forward transform is twice about as expensive as for P-1. Each multi-point evaluation is twice as expensive as for P-1. FIXME: The estimate for NTT assumes the "one-pass" variant, in "two-pass" the multipoint evaluations are slower, so the optimum shifts towards smaller s_2 some more */ if (use_ntt) return (4 * l) / 5 + s_2 * l; else return (3 * l) / 4 + s_2 * l; } else abort (); /* Invalid value for method */ } /* Choose P so that a stage 2 range from B2min to B2 can be covered with multipoint evaluations, each using a convolution of length at most lmax. The parameters for stage 2 are stored in finalparams, the final effective B2min and B2 values in final_B2min and final_B2, respecively. Each of these may be NULL, in which case the value is not stored. It is permissible to let B2min and final_B2min, or B2 and final_B2 point at the same mpz_t. */ long choose_P (const mpz_t B2min, const mpz_t B2, const unsigned long lmax, const unsigned long min_s2, faststage2_param_t *finalparams, mpz_t final_B2min, mpz_t final_B2, const int use_ntt, const int method) { /* Let S_1 + S_2 == (Z/PZ)* (mod P). Let F(x) = \prod_{k_1 \in S_1} (x - b_1^{2 k_1}). If we evaluate F(b_1^{2 k_2 + (2m + 1)P}) for all k_2 \in S_2 with m_1 <= m < m_1+nr, we test all exponents 2 k_2 + (2m + 1)P - 2 k_1. The largest value coprime to P at the low end of the stage 2 interval *not* covered will be 2*max(S_2) + (2*(m_1-1) + 1)*P - 2*min(S_1). The smallest value at the high end not covered will be 2*min(S_2) + (2*(m_1 + nr) + 1)*P - 2*max(S_1). Assume S_1 and S_2 are symmetric around 0, so that max(S_1) = -min(S_1). Then the largest ... is: 2*(max(S_1) + max(S_2)) + (2*m_1 - 1)*P The smallest ... is: -2*(max(S_1) + max(S_2)) + (2*m_1 + 2*nr + 1)*P The effective B2min = 2*(max(S_1) + max(S_2)) + (2*m_1 - 1)*P + 1 The effective B2max = -2*(max(S_1) + max(S_2)) + (2*m_1 + 2*nr + 1)*P - 1 Then the difference effB2max - effB2min = -4*(max(S_1) + max(S_2)) + 2P*(nr + 1) - 2 We obviously require B2max - B2min <= 2*nr*P Since nr < lmax, B2max - B2min <= 2*lmax*P or P >= ceil((B2max - B2min)/(2*lmax)) Hence we are looking for an odd P with s_1 * s_2 = eulerphi(P) so that s_1 ~= lmax / 2 and the whole stage 2 interval is covered. s_2 should be small, as long as s_1 is small enough. */ mpz_t B2l, m_1, effB2min, tryeffB2, effB2, lmin; /* The best parameters found so far, P == 0 means that no suitable P has been found yet: */ unsigned long P = 0, s_1 = 0, s_2 = 0, l = 0, cost = 0; unsigned int i; const unsigned int Pvalues_len = sizeof (Pvalues) / sizeof (unsigned long); int r; outputf (OUTPUT_TRACE, "choose_P(B2min = %Zd, B2 = %Zd, lmax = %lu, min_s2 = %ld, " "use_ntt = %d, method = %d\n", B2min, B2, lmax, min_s2, use_ntt, method); if (mpz_cmp (B2, B2min) < 0) return 0L; /* If we use the NTT, we allow only power-of-two transform lengths. In that case, the code below assumes that lmax is a power of two. If that is not the case, print error and return. */ if (use_ntt && (lmax & (lmax - 1UL)) != 0) { outputf (OUTPUT_ERROR, "choose_P: Error, lmax = %lu is not a power of two\n", lmax); return ECM_ERROR; } mpz_init (effB2); mpz_init (tryeffB2); mpz_init (effB2min); mpz_init (B2l); mpz_init (m_1); mpz_init (lmin); mpz_sub (B2l, B2, B2min); mpz_add_ui (B2l, B2l, 1UL); /* +1 due to closed interval */ /* For each candidate P, check if [B2min, B2] can be covered at all, and if so, what the best parameters (minimizing the cost, maximizing effB2) are. If they are better than the best parameters for the best P so far, remember them. */ for (i = 0 ; i < Pvalues_len; i++) { unsigned long tryP, tryphiP, trys_1, trys_2, tryl, trycost; tryP = Pvalues[i]; tryphiP = eulerphi (tryP); outputf (OUTPUT_TRACE, "choose_P: trying P = %lu, eulerphi(P) = %lu\n", tryP, tryphiP); /* If we have a good P already and this tryphiP >= cost, then there's no hope for this tryP, since cost(s_2, l) > eulerphi(P) */ if (P != 0 && tryphiP >= cost) { outputf (OUTPUT_TRACE, "choose_P: tryphiP > cost = %lu, this P is too large\n", cost); continue; } /* We have nr < l and effB2-effB2min <= 2*nr*P. Hence we need l >= B2l/P/2 */ mpz_cdiv_q_ui (lmin, B2l, tryP); mpz_cdiv_q_2exp (lmin, lmin, 1UL); outputf (OUTPUT_TRACE, "choose_P: lmin = %Zd for P = %lu\n", lmin, tryP); if (mpz_cmp_ui (lmin, lmax) > 0) { outputf (OUTPUT_TRACE, "choose_P: lmin > lmax, this P is too small\n"); continue; } /* Try all possible transform lengths and store parameters in P, s_1, s_2, l if they are better than the previously best ones */ /* Keep reducing tryl to find best parameters. For NTT, we only have power of 2 lengths so far, so we can simply divide by 2. For non-NTT, we have arbitrary transform lengths so we can decrease in smaller steps... let's say by, umm, 25% each time? */ for (tryl = lmax; mpz_cmp_ui (lmin, tryl) <= 0; tryl = (use_ntt) ? tryl / 2 : 3 * tryl / 4) { trys_1 = choose_s_1 (tryphiP, min_s2, tryl / 2, use_ntt); if (trys_1 == 0) { outputf (OUTPUT_TRACE, "choose_P: could not choose s_1 for P = %lu, l = %lu\n", tryP, tryl); continue; } ASSERT (tryphiP % trys_1 == 0UL); trys_2 = tryphiP / trys_1; outputf (OUTPUT_TRACE, "choose_P: chose s_1 = %lu, k = s_2 = %lu " "for P = %lu, l = %lu\n", trys_1, trys_2, tryP, tryl); if (test_P (B2min, B2, m_1, tryP, tryl - trys_1, effB2min, tryeffB2)) { outputf (OUTPUT_TRACE, "choose_P: P = %lu, l = %lu, s_1 = %lu, k = s_2 = %lu " "works, m_1 = %Zd, effB2min = %Zd, effB2 = %zZd\n", tryP, tryl, trys_1, trys_2, m_1, effB2min, tryeffB2); /* We use these parameters if we 1. didn't have any suitable ones yet, or 2. these cover [B2min, B2] and are cheaper than the best ones so far, or 3. they are as expensive but reach greater effB2. */ trycost = est_cost (trys_2, tryl, use_ntt, method); ASSERT (tryphiP < trycost); if (P == 0 || trycost < cost || (trycost == cost && mpz_cmp (tryeffB2, effB2) > 0)) { outputf (OUTPUT_TRACE, "choose_P: and is the new optimum (cost = %lu)\n", trycost); P = tryP; s_1 = trys_1; s_2 = trys_2; l = tryl; cost = trycost; mpz_set (effB2, tryeffB2); } } } } if (P != 0) /* If we found a suitable P */ { /* Compute m_1, effB2min, effB2 again */ r = test_P (B2min, B2, m_1, P, l - s_1, effB2min, effB2); ASSERT_ALWAYS(r != 0); if (finalparams != NULL) { finalparams->P = P; finalparams->s_1 = s_1; finalparams->s_2 = s_2; finalparams->l = l; mpz_set (finalparams->m_1, m_1); } if (final_B2min != NULL) mpz_set (final_B2min, effB2min); if (final_B2 != NULL) mpz_set (final_B2, effB2); } mpz_clear (effB2); mpz_clear (tryeffB2); mpz_clear (effB2min); mpz_clear (B2l); mpz_clear (m_1); mpz_clear (lmin); return (P != 0) ? (long) P : ECM_ERROR; } static void list_output_poly (listz_t l, unsigned long len, int monic, int symmetric, char *prefix, char *suffix, int verbosity) { unsigned long i; if (prefix != NULL) outputf (verbosity, prefix); if (len == 0) { if (monic) outputf (verbosity, "1\n", len, len); else outputf (verbosity, "0\n", len); return; } if (monic) { if (symmetric) outputf (verbosity, "(x^%lu + x^-%lu) + ", len, len); else outputf (verbosity, "x^%lu + ", len); } for (i = len - 1; i > 0; i--) if (symmetric) outputf (verbosity, "%Zd * (x^%lu + x^-%lu) + ", l[i], i, i); else outputf (verbosity, "%Zd * x^%lu + ", l[i], i); outputf (verbosity, "%Zd", l[0]); if (suffix != NULL) outputf (verbosity, suffix); } /* Multiply P[i] by r^{k(deg-i)}, for 0 <= i <= deg. Needs 3 entries in tmp. */ /* I.e., let P(x) = x^deg + \sum_{i=0}^{deg - 1} P[i] * x^i. The output is R(x) = x^deg + \sum_{i=0}^{deg - 1} R[i] * x^i = r^(k deg) P(r^{-k} x). */ /* The input and output polynomials are monic and have the leading monomial implicit, i.e. not actually stored in the array of coefficients. */ /* Returns 0 if a modular inversion failed (in which case R is left unchanged), 1 otherwise */ static int ATTRIBUTE_UNUSED list_scale_rev (listz_t R, listz_t S, mpz_t r, long k, unsigned long deg, mpz_t modulus, listz_t tmp, ATTRIBUTE_UNUSED const unsigned long tmplen) { unsigned long i; ASSERT (tmplen >= 3); mpz_powm_ui (tmp[0], r, (unsigned long) labs (k), modulus); if (k < 0) { if (!mpz_invert (tmp[0], tmp[0], modulus)) /* FIXME: get rid of this! */ return 0; } /* Here, tmp[0] = r^k */ mpz_set (tmp[1], tmp[0]); /* mpz_set (R[deg], S[deg]); Leading monomial is not stored! */ for (i = 1; i + 1 <= deg; i++) { /* Here, tmp[1] = r^(ki) */ mpz_mul (tmp[2], S[deg-i], tmp[1]); mpz_mod (R[deg-i], tmp[2], modulus); mpz_mul (tmp[2], tmp[1], tmp[0]); /* FIXME, avoid unnecessary mul */ mpz_mod (tmp[1], tmp[2], modulus); /* at end of loop */ } if (i <= deg) { mpz_mul (tmp[2], S[deg-i], tmp[1]); mpz_mod (R[deg-i], tmp[2], modulus); } return 1; } /* Same, but does squaring which makes things easier */ static void list_sqr_reciprocal (listz_t R, listz_t S, const unsigned long l, mpz_t modulus, listz_t tmp, ATTRIBUTE_UNUSED const unsigned long tmplen) { unsigned long i; listz_t Srev, r1 = tmp, r2 = tmp + 2 * l - 1, t = tmp + 4 * l - 2; if (l == 0UL) return; /* FIXME: This modifies the input arguments. */ /* We have to divide S[0] by 2 */ ASSERT (tmplen >= 4 * l - 2 + list_mul_mem (l)); #if 0 gmp_printf ("/* list_sqr_reciprocal */ S(x) = %Zd", S[0]); for (i = 1; i < l1; i++) gmp_printf (" + %Zd * (x^%lu + 1/x^%lu)", S[i], i, i); gmp_printf ("\n"); #endif if (mpz_odd_p (S[0])) { ASSERT_ALWAYS (mpz_odd_p (modulus)); mpz_add (S[0], S[0], modulus); } mpz_tdiv_q_2exp (S[0], S[0], 1UL); list_mul (r1, S, l, 0, S, l, 0, t); /* r1 = f0*g0/4 + (f0*g1 + f1*g0)/2 * x + f1*g1 * x^2 */ #if 0 for (i = 0; i < 2UL * l - 1UL; i++) gmp_printf ("list_sqr_reciprocal: r1[%lu] = %Zd\n", i, r1[i]); #endif Srev = (listz_t) malloc (l * sizeof (mpz_t)); ASSERT_ALWAYS (Srev != NULL); for (i = 0UL; i < l; i++) (*Srev)[i] = (*S)[l - 1UL - i]; list_mul (r2, S, l, 0, Srev, l, 0, t); /* r2 is symmetric, r2[i] = r2[2*l - 2 - i]. Check this */ #if 0 for (i = 0; 0 && i < 2UL * l - 1UL; i++) gmp_printf ("list_sqr_reciprocal: r2[%lu] = %Zd\n", i, r2[i]); #endif #ifdef WANT_ASSERT for (i = 0UL; i < l; i++) ASSERT (mpz_cmp (r2[i], r2[2UL * l - 2UL - i]) == 0); #endif free (Srev); /* r2 = g1*f0/2 + (g0*f0/4 + g1*f1) * x + g0*f1/2 * x^2 */ #if 0 for (i = 0; i < 2UL * l - 1UL; i++) gmp_printf ("list_sqr_reciprocal: r2[%lu] = %Zd\n", i, r2[i]); #endif mpz_mul_2exp (r1[0], r1[0], 1UL); /* r1 = f0*g0/2 + (f0*g1 + f1*g0)/2 * x + f1*g1 * x^2 */ for (i = 0UL; i < l; i++) { mpz_mul_2exp (r2[l - i - 1UL], r2[l - i - 1UL], 1UL); mpz_add (R[i], r1[i], r2[l - i - 1UL]); } /* r1 = 3/4*f0*g0 + g1*f1 + (f0*g1 + 2*f1*g0)/2 * x + f1*g1 * x^2 */ /* r1 = f0*g0 + 2*g1*f1 + (f0*g1 + f1*g0) * x + f1*g1 * x^2 */ for (i = l; i < 2UL * l - 1UL; i++) mpz_set (R[i], r1[i]); if (R != S) mpz_mul_2exp (S[0], S[0], 1UL); #if 0 for (i = 0; i < 2UL * l; i++) gmp_printf ("list_sqr_reciprocal: R[%lu] = %Zd\n", i, R[i]); #endif } ATTRIBUTE_UNUSED static void list_recip_eval1 (mpz_t R, const listz_t S, const unsigned long l) { unsigned long i; mpz_set_ui (R, 0UL); for (i = 1; i < l; i++) mpz_add (R, R, S[i]); mpz_mul_2exp (R, R, 1UL); if (l > 0UL) mpz_add (R, R, S[0]); } /* Multiply two reciprocal polynomials of degree 2*l1-2 and 2*l2-2, resp., with coefficients in standard basis S_1(x) = S1[0] + sum_{1 \leq i \leq l1 - 1} S1[i] (x^i + x^{-i}) S_2(x) = S2[0] + sum_{1 \leq i \leq l2 - 1} S2[i] (x^i + x^{-i}) to the reciprocal polynomial of degree 2*(l1 + l2) - 4 R(x) = R[0] + sum_{1 \leq i \leq l1 + l2 - 2} R[i] (x^i + x^{-i}) = S_1(x) * S_2(x) R == S1 == S2 is permissible, however if S1 == S2, l1 must be equal to l2 (i.e. the multiplication must be a squaring) */ /* FIXME: This modifies the input arguments. */ /* We have to divide S1[0] and S2[0] by 2 */ static void list_mul_reciprocal (listz_t R, listz_t S1, unsigned long l1, listz_t S2, unsigned long l2, mpz_t modulus, listz_t tmp, ATTRIBUTE_UNUSED const unsigned long tmplen) { unsigned long i; const unsigned long lmax = MAX(l1, l2); listz_t r1 = tmp, r2 = tmp + 2*lmax - 1, rev = tmp + 4*lmax - 2, t = tmp + 6*lmax - 3; #ifdef WANT_ASSERT mpz_t sum1, sum2, prod; #endif ASSERT (S1 < tmp || S1 >= tmp + tmplen); ASSERT (S2 < tmp || S2 >= tmp + tmplen); ASSERT (R < tmp || R >= tmp + tmplen); if (l1 == 0UL || l2 == 0UL) return; if (S1 == S2) { ASSERT_ALWAYS (l1 == l2); list_sqr_reciprocal (R, S1, l1, modulus, tmp, tmplen); return; } ASSERT (tmplen >= 6*lmax - 3 + list_mul_mem (lmax)); #ifdef WANT_ASSERT mpz_init (sum1); mpz_init (sum2); mpz_init (prod); list_recip_eval1 (sum1, S1, l1); list_recip_eval1 (sum2, S2, l2); mpz_mul (prod, sum1, sum2); mpz_mod (prod, prod, modulus); #endif /* Make S1 the longer of the two, i.e. l1 >= l2 */ if (l2 > l1) { listz_t St = S1; unsigned long lt = l1; S1 = S2; S2 = St; l1 = l2; l2 = lt; } #if 0 gmp_printf ("/* list_mul_reciprocal */ S1(x) = %Zd", S1[0]); for (i = 1; i < l1; i++) gmp_printf (" + %Zd * (x^%lu + 1/x^%lu)", S1[i], i, i); gmp_printf ("\n"); gmp_printf ("/* list_mul_reciprocal */ S2(x) = %Zd", S2[0]); for (i = 1; i < l1; i++) gmp_printf (" + %Zd * (x^%lu + 1/x^%lu)", S2[i], i, i); gmp_printf ("\n"); #endif /* Divide S1[0] and S2[0] by 2 */ if (mpz_odd_p (S1[0])) { ASSERT_ALWAYS (mpz_odd_p (modulus)); mpz_add (S1[0], S1[0], modulus); } mpz_tdiv_q_2exp (S1[0], S1[0], 1UL); if (mpz_odd_p (S2[0])) { ASSERT_ALWAYS (mpz_odd_p (modulus)); mpz_add (S2[0], S2[0], modulus); } mpz_tdiv_q_2exp (S2[0], S2[0], 1UL); /* Pad rev with zeros */ for (i = l2; i < lmax; i++) mpz_set_ui (rev[i], 0UL); for (i = 0UL; i < l2; i++) mpz_set (rev[i], S2[l2 - 1UL - i]); list_mul (r1, S1, lmax, 0, rev, lmax, 0, t); /* r1 = \tilde{f}(x) \rev(\tilde{g}(x)) and has degree l1 + l2 - 2, i.e. l1 + l2 - 1 entries. */ #if 0 for (i = 0; i < 2 * lmax - 1; i++) gmp_printf ("list_mul_reciprocal: r1[%lu] = %Zd\n", i, r1[i]); #endif for (i = 0UL; i < l2; i++) mpz_set(rev[i], S2[i]); list_mul (r2, S1, lmax, 0, rev, lmax, 0, t); /* \tilde{f}(x) \tilde{g}(x) */ #if 0 for (i = 0; i < 2 * lmax - 1; i++) gmp_printf ("list_mul_reciprocal: r2[%lu] = %Zd\n", i, r2[i]); #endif /* Add f_0*g_0 by doubling the f_0*g_0 term in r2 */ mpz_mul_2exp (r2[0], r2[0], 1UL); /* Add \flloor x^{-d_g} \tilde{f}(x) \rev(\tilde{g}(x)) \rfloor. d_g = l2 - 1. */ for (i = 0; i < l1; i++) mpz_add (r2[i], r2[i], r1[i + l2 - 1]); /* Add \floor x^{-d_f} rev(\tilde{f}(x) \rev(\tilde{g}(x))) \rfloor. d_f = l1 - 1. rev(r2)[i] = r2[l1 + l2 - 2 - i]. We want rev(r2)[l1 - 1 ... l1 + l2 - 2], hence r2[l2 - 1 ... 0] */ for (i = 0; i < l2; i++) mpz_add (r2[i], r2[i], r1[l2 - 1 - i]); #if 0 for (i = 0; i < l1 + l2 - 1; i++) gmp_printf ("list_mul_reciprocal: r2[%lu] = %Zd\n", i, r2[i]); #endif mpz_mul_2exp (S1[0], S1[0], 1UL); mpz_mul_2exp (S2[0], S2[0], 1UL); for (i = 0; i < l1 + l2 - 1; i++) mpz_set (R[i], r2[i]); #if 0 for (i = 0; i < l1 + l2 - 1; i++) gmp_printf ("list_mul_reciprocal: R[%lu] = %Zd\n", i, R[i]); #endif #ifdef WANT_ASSERT list_recip_eval1 (sum1, R, l1 + l2 - 1); mpz_mod (sum1, sum1, modulus); ASSERT (mpz_cmp (prod, sum1) == 0); mpz_clear (sum1); mpz_clear (sum2); mpz_clear (prod); #endif } /* Multiply a (possibly monic) polynomial A of length k * len with a (possibly monic) polynomial B of length len. R may be identical to A. */ static void ATTRIBUTE_UNUSED list_mul_blocks (listz_t R, const listz_t A, int monicA, const listz_t B, int monicB, const unsigned long len, const unsigned int k, listz_t tmp, ATTRIBUTE_UNUSED const unsigned long tmplen) { unsigned int j; if (k == 0 || len == 0) return; ASSERT (R != B); ASSERT (tmplen >= 3 * len + list_mul_mem (len)); /* Do first piece of A */ list_mul (tmp, A, len, (monicA && k == 1), B, len, monicB, tmp + 2 * len); list_set (R, tmp, len); /* May overwrite A[0 ... len-1] */ list_swap (tmp, tmp + len, len); /* Move high part to tmp[0 ... len-1] */ for (j = 1; j < k; j++) /* Process the remaining k-1 pieces of A */ { list_mul (tmp + len, A + j * len, len, (monicA && j + 1 == k), B, len, monicB, tmp + 3 * len); /* Add low part of this product and previous product's high part */ list_add (A + j * len, tmp, tmp + len, len); list_swap (tmp, tmp + 2 * len, len); /* Move this product's high part to beginning of tmp */ } list_set (A + j * len, tmp, len); /* Move the high part of last product */ } /* Computes V_k(S), where the Chebyshev polynomial V_k(X) is defined by V_k(X + 1/X) = X^k + 1/X^k */ static void V (mpres_t R, const mpres_t S, const long k, mpmod_t modulus) { mpres_t V0, Vi, Vi1; unsigned long j, uk; int po2; if (k == 0L) { mpres_set_ui (R, 2UL, modulus); return; } uk = labs (k); if (uk == 1UL) { mpres_set (R, S, modulus); return; } for (po2 = 0; uk % 2UL == 0UL; uk >>= 1, po2++); mpres_init (V0, modulus); mpres_set_ui (V0, 2UL, modulus); /* V0 = V_0(S) = 2 */ if (uk == 1UL) { mpres_set (R, S, modulus); while (po2-- > 0) { mpres_sqr (R, R, modulus); mpres_sub (R, R, V0, modulus); } mpres_clear (V0, modulus); return; } if (0) { mpz_t tz; mpz_init (tz); mpres_get_z (tz, S, modulus); gmp_printf ("Chebyshev_V(%ld, Mod(%Zd,N)) == ", k, tz); mpz_clear (tz); } for (j = 1UL; j <= uk / 2UL; j <<= 1); mpres_init (Vi, modulus); mpres_init (Vi1, modulus); /* i = 1. Vi = V_i(S), Vi1 = V_{i+1}(S) */ mpres_set (Vi, S, modulus); mpres_sqr (Vi1, S, modulus); mpres_sub (Vi1, Vi1, V0, modulus); j >>= 1; while (j > 1) { if ((uk & j) != 0UL) { /* i' = 2i + 1. V_{i'} = V_{2i + 1} = V_{i+1 + i} = V_{i+1} * V_{i} - V_1 V_{i'+1} = V_{2i + 2} = {V_{i+1}}^2 - V_0. */ mpres_mul (Vi, Vi, Vi1, modulus); mpres_sub (Vi, Vi, S, modulus); mpres_sqr (Vi1, Vi1, modulus); mpres_sub (Vi1, Vi1, V0, modulus); } else { /* i' = 2i. V_{i'} = V_{2i} = {V_i}^2 - V0. V_{i'+1} = V_{2i + 1} = V_{i+1 + i} = V_{i+1} * V_{i} - V_1 */ mpres_mul (Vi1, Vi, Vi1, modulus); mpres_sub (Vi1, Vi1, S, modulus); mpres_sqr (Vi, Vi, modulus); mpres_sub (Vi, Vi, V0, modulus); } j >>= 1; } /* Least significant bit of uk is always 1 */ mpres_mul (Vi, Vi, Vi1, modulus); mpres_sub (Vi, Vi, S, modulus); while (po2-- > 0) { mpres_sqr (Vi, Vi, modulus); mpres_sub (Vi, Vi, V0, modulus); } mpres_set (R, Vi, modulus); mpres_clear (Vi, modulus); mpres_clear (Vi1, modulus); mpres_clear (V0, modulus); if (0) { mpz_t tz; mpz_init (tz); mpres_get_z (tz, R, modulus); gmp_printf ("%Zd\n", tz); mpz_clear (tz); } } /* Computes U_k(S), where the Chebyshev polynomial U_k(X) is defined by U_k(X + 1/X) = (X^k - 1/X^k) / (X - 1/X) If R1 != NULL, stores U_{k+1}(S) there */ static void U (mpres_t R, mpres_t R1, const mpres_t S, const long k, mpmod_t modulus) { mpres_t V0, Vi, Vi1, Ui, Ui1, t; unsigned long j, uk; if (k == 0L) { mpres_set_ui (R, 0UL, modulus); /* U_0 = 0 */ if (R1 != NULL) mpres_set_ui (R1, 1UL, modulus); /* U_1 = 1 */ return; } uk = labs (k); if (uk == 1UL) { mpres_set_ui (R, 1UL, modulus); if (k == -1) mpres_neg (R, R, modulus); if (R1 != NULL) { if (k == -1) mpres_set_ui (R1, 0UL, modulus); else mpres_set (R1, S, modulus); /* U_2(S) = S */ } return; } if (0) { mpz_t tz; mpz_init (tz); mpres_get_z (tz, S, modulus); gmp_printf ("Chebyshev_U(%ld, Mod(%Zd,N)) == ", k, tz); mpz_clear (tz); } mpres_init (V0, modulus); mpres_init (Vi, modulus); mpres_init (Vi1, modulus); mpres_init (Ui, modulus); mpres_init (Ui1, modulus); mpres_init (t, modulus); for (j = 1UL; j <= uk / 2UL; j <<= 1); mpres_set_ui (Ui, 1UL, modulus); /* Ui = U_1(S) = 1 */ mpres_set (Ui1, S, modulus); /* Ui1 = U_2(S) = S */ mpres_add (V0, Ui, Ui, modulus); /* V0 = V_0(S) = 2 */ mpres_set (Vi, S, modulus); /* Vi = V_1(S) = S */ mpres_sqr (Vi1, Vi, modulus); mpres_sub (Vi1, Vi1, V0, modulus); /* Vi1 = V_2(S) = S^2 - 2 */ j >>= 1; /* i = 1 */ while (j != 0) { if ((uk & j) == 0UL) { mpres_mul (Vi1, Vi1, Vi, modulus); mpres_sub (Vi1, Vi1, S, modulus); /* V_{2i+1} = V_{i+1} V_i - V_1 */ /* U_{2i+1} = (U_{i+1} + U_i) (U_{i+1} - U_i) */ mpres_sub (t, Ui1, Ui, modulus); mpres_add (Ui1, Ui1, Ui, modulus); mpres_mul (Ui1, Ui1, t, modulus); mpres_mul (Ui, Ui, Vi, modulus); /* U_{2n} = U_n V_n */ mpres_sqr (Vi, Vi, modulus); mpres_sub (Vi, Vi, V0, modulus); /* V_{2n} = V_n^2 - 2 */ } else { /* U_{2i+1} = (U_{i+1} + U_i) (U_{i+1} - U_i) */ mpres_sub (t, Ui1, Ui, modulus); mpres_add (Ui, Ui, Ui1, modulus); mpres_mul (Ui, Ui, t, modulus); mpres_mul (Ui1, Ui1, Vi1, modulus); /* U_{2n+2} = U_{n+1} V_{n+1} */ mpres_mul (Vi, Vi, Vi1, modulus); mpres_sub (Vi, Vi, S, modulus); /* V_{2i+1} = V_{i+1} V_i - V_1 */ mpres_sqr (Vi1, Vi1, modulus); mpres_sub (Vi1, Vi1, V0, modulus); /* V_{2n+2} = V_{n+1}^2 - 2 */ } j >>= 1; } if (k > 0) mpres_set (R, Ui, modulus); else mpres_neg (R, Ui, modulus); if (R1 != NULL) { /* Here k != -1,0,1, so k+1 is negative iff k is */ if (k > 0) mpres_set (R1, Ui1, modulus); else mpres_neg (R1, Ui1, modulus); } mpres_clear (V0, modulus); mpres_clear (Vi, modulus); mpres_clear (Vi1, modulus); mpres_clear (Ui, modulus); mpres_clear (Ui1, modulus); mpres_clear (t, modulus); if (0) { mpz_t tz; mpz_init (tz); mpres_get_z (tz, R, modulus); gmp_printf ("%Zd\n", tz); mpz_clear (tz); } } /* Set R[i] = V_{i+k}(Q) * F[i] or U_{i+k}(Q) * F[i], for 0 <= i < len We compute V_{i+k+1}(Q) by V_{i+k}(Q)*V_1(Q) - V_{i+k-1}(Q). For U, we compute U_{i+k+1}(Q) by U_{i+k}(Q)*V_1(Q) - U_{i+k-1}(Q). The values of V_1(Q), V_{k-1}(Q) and V_k(Q) and V_k(Q) are in V1, Vk_1 and Vk, resp. The values of Vk_1 and Vk are clobbered. */ static void scale_by_chebyshev (listz_t R, const listz_t F, const unsigned long len, mpmod_t modulus, const mpres_t V1, mpres_t Vk_1, mpres_t Vk) { mpres_t Vt; unsigned long i; mpres_init (Vt, modulus); for (i = 0; i < len; i++) { mpres_mul_z_to_z (R[i], Vk, F[i], modulus); mpres_mul (Vt, Vk, V1, modulus); mpres_sub (Vt, Vt, Vk_1, modulus); mpres_set (Vk_1, Vk, modulus); /* Could be a swap */ mpres_set (Vk, Vt, modulus); /* Could be a swap */ } mpres_clear (Vt, modulus); } /* For a given reciprocal polynomial F(x) = f_0 + sum_{i=1}^{deg} f_i V_i(x+1/x), compute F(\gamma x)F(\gamma^{-1} x), with Q = \gamma + 1 / \gamma If NTT is used, needs 4 * deg + 3 entries in tmp. If no NTT is used, needs 4 * deg + 2 + (memory use of list_sqr_reciprocal) */ static void list_scale_V (listz_t R, const listz_t F, const mpres_t Q, const unsigned long deg, mpmod_t modulus, listz_t tmp, const unsigned long tmplen, mpzspv_t dct, const mpzspm_t ntt_context) { mpres_t Vt; unsigned long i; const listz_t G = tmp, H = tmp + 2 * deg + 1, newtmp = tmp + 4 * deg + 2; const unsigned long newtmplen = tmplen - 4 * deg - 2; #ifdef WANT_ASSERT mpz_t leading; #endif if (deg == 0) { ASSERT(tmplen >= 1); mpz_mul (tmp[0], F[0], F[0]); mpz_mod (R[0], tmp[0], modulus->orig_modulus); return; } /* Make sure newtmplen does not underflow */ ASSERT_ALWAYS (tmplen >= 4 * deg + 2); #ifdef WANT_ASSERT mpz_init (leading); mpz_mul (leading, F[deg], F[deg]); mpz_mod (leading, leading, modulus->orig_modulus); #endif /* Generate V_1(Q)/2 ... V_{deg}(Q)/2, multiply by f_i to form coefficients of G(x). Square the symmetric G(x) polynomial. */ outputf (OUTPUT_TRACE, "list_scale_V: Q=%Zd, deg = %lu\n", Q, deg); list_output_poly (F, deg + 1, 0, 1, "/* list_scale_V */ F(x) = ", "\n", OUTPUT_TRACE); /* Compute G[i] = V_i(Q)/2 * F[i] for i = 0, ..., deg. For i=0, V_0(Q) = 2, so G[0] = F[0], which leaves deg entries to process */ mpz_set (G[0], F[0]); #if defined(_OPENMP) #pragma omp parallel if (deg > 1000) #endif { const int nr_chunks = omp_get_num_threads(); const int thread_nr = omp_get_thread_num(); mpmod_t modulus_local; unsigned long l, start_i; mpres_t Vi, Vi_1; l = (deg - 1) / nr_chunks + 1; /* l = ceil (deg / nr_chunks) */ start_i = thread_nr * l + 1; l = MIN(l, deg + 1 - start_i); mpmod_init_set (modulus_local, modulus); mpres_init (Vi_1, modulus_local); mpres_init (Vi, modulus_local); V (Vi, Q, start_i, modulus_local); mpres_div_2exp (Vi, Vi, 1, modulus_local); V (Vi_1, Q, start_i - 1UL, modulus_local); mpres_div_2exp (Vi_1, Vi_1, 1, modulus_local); scale_by_chebyshev (G + start_i, F + start_i, l, modulus_local, Q, Vi_1, Vi); mpres_clear (Vi_1, modulus_local); mpres_clear (Vi, modulus_local); mpmod_clear (modulus_local); } list_output_poly (G, deg + 1, 0, 1, "/* list_scale_V */ G(x) = ", "\n", OUTPUT_TRACE); /* Now square the G polynomial in G[0 .. deg], put result in G[0 .. 2*deg] */ /* Bugfix: ks_multiply() does not like negative coefficients. FIXME */ for (i = 0; i <= deg; i++) if (mpz_sgn (G[i]) < 0) { mpz_add (G[i], G[i], modulus->orig_modulus); /* FIXME: make sure the absolute size does not "run away" */ if (mpz_sgn (G[i]) < 0) { outputf (OUTPUT_ERROR, "list_scale_V: G[%lu] still negative\n", i); mpz_mod (G[i], G[i], modulus->orig_modulus); } } if (dct != NULL && ntt_context != NULL) ntt_sqr_reciprocal (G, G, dct, deg + 1, ntt_context); else list_sqr_reciprocal (G, G, deg + 1, modulus->orig_modulus, newtmp, newtmplen); list_output_poly (G, 2 * deg + 1, 0, 1, "/* list_scale_V */ G(x)^2 == ", "\n", OUTPUT_TRACE); /* Compute H[i-1] = U_i(Q)/2 * F[i] for i = 1, ..., deg */ #if defined(_OPENMP) #pragma omp parallel if (deg > 1000) #endif { const int nr_chunks = omp_get_num_threads(); const int thread_nr = omp_get_thread_num(); mpmod_t modulus_local; unsigned long l, start_i; mpres_t Ui, Ui_1; l = (deg - 1) / nr_chunks + 1; /* l = ceil(deg / nr_chunks) */ start_i = thread_nr * l + 1UL; l = MIN(l, deg + 1 - start_i); mpmod_init_set (modulus_local, modulus); mpres_init (Ui_1, modulus_local); mpres_init (Ui, modulus_local); U (Ui_1, Ui, Q, start_i - 1, modulus_local); mpres_div_2exp (Ui, Ui, 1, modulus_local); mpres_div_2exp (Ui_1, Ui_1, 1, modulus_local); scale_by_chebyshev (H - 1 + start_i, F + start_i, l, modulus_local, Q, Ui_1, Ui); mpres_clear (Ui_1, modulus_local); mpres_clear (Ui, modulus_local); mpmod_clear (modulus_local); } /* Convert H to standard basis */ /* We can do it in-place with H - 1 = H_U. */ for (i = deg; i >= 3; i--) { mpz_add (H[i - 3], H[i - 3], H[i - 1]); if (mpz_cmp (H[i - 3], modulus->orig_modulus) >= 0) mpz_sub (H[i - 3], H[i - 3], modulus->orig_modulus); } /* U_2(X+1/X) = (X^2 - 1/X^2)/(X-1/X) = X+1/X = V_1(X+1/X), so no addition occures here */ /* if (deg >= 2) mpz_set (H[1], H[1]); Again, a no-op. */ /* U_1(X+1/X) = 1, so this goes to coefficient of index 0 in std. basis */ /* mpz_set (H[0], H[0]); Another no-op. */ /* Now H[0 ... deg-1] contains the deg coefficients in standard basis of symmetric H(X) of degree 2*deg-2. */ list_output_poly (H, deg, 0, 1, "/* list_scale_V */ H(x) = ", "\n", OUTPUT_TRACE); /* Square the symmetric H polynomial of degree 2*deg-2 (i.e. with deg coefficents in standard basis in H[0 ... deg-1]) */ /* Bugfix: ks_multiply() does not like negative coefficients. */ for (i = 0; i <= deg; i++) if (mpz_sgn (H[i]) < 0) { mpz_add (H[i], H[i], modulus->orig_modulus); if (mpz_sgn (H[i]) < 0) { outputf (OUTPUT_ERROR, "list_scale_V: H[%lu] still negative\n", i); mpz_mod (H[i], H[i], modulus->orig_modulus); } } if (dct != NULL && ntt_context != NULL) ntt_sqr_reciprocal (H, H, dct, deg, ntt_context); else list_sqr_reciprocal (H, H, deg, modulus->orig_modulus, newtmp, newtmplen); /* Now there are the 2*deg-1 coefficients in standard basis of a symmetric polynomial of degree 4*deg - 4 in H[0 ... 2*deg-2] */ list_output_poly (H, 2*deg - 1, 0, 1, "/* list_scale_V */ H(x)^2 == ", "\n", OUTPUT_TRACE); /* Multiply by Q^2-4 */ mpres_init (Vt, modulus); mpres_sqr (Vt, Q, modulus); mpres_sub_ui (Vt, Vt, 4, modulus); #if defined(_OPENMP) #pragma omp parallel if (deg > 1000) { mpmod_t modulus_local; long i; /* OpenMP insists on signed loop iteration var :( */ mpmod_init_set (modulus_local, modulus); #pragma omp for for (i = 0; (unsigned long) i <= 2 * deg - 2; i++) mpres_mul_z_to_z (H[i], Vt, H[i], modulus_local); mpmod_clear (modulus_local); } #else for (i = 0; (unsigned long) i <= 2 * deg - 2; i++) mpres_mul_z_to_z (H[i], Vt, H[i], modulus); #endif list_output_poly (H, 2 * deg - 1, 0, 1, "/* list_scale_V */ " "H(x)^2*(Q^2-4) == ", "\n", OUTPUT_TRACE); /* Multiply by (X - 1/X)^2 = X^2 - 2 + 1/X^2 and subtract from G */ ASSERT (newtmplen > 0UL); if (deg == 1) { /* H(X) has degree 2*deg-2 = 0, so H(X) = h_0 H(X) * (X - 1/X)^2 = -2 h_0 + h_0 V_2(Y) */ mpz_mul_2exp (newtmp[0], H[0], 1UL); mpz_add (G[0], G[0], newtmp[0]); /* G[0] -= -2*H[0] */ mpz_sub (G[2], G[2], H[0]); } else if (deg == 2) { /* H(X) has degree 2*deg-2 = 2, , so H(X) = h_0 + h_1 (X+1/X) + h_2 (X^2+1/X^2) H(X) * (X - 1/X)^2 = -2*(h_0 - h_2) - h_1 * V_1(Y) + (h_0 - 2*h_2) * V_2(Y) + h_1 * V_3(Y) + h_2 * V_4(Y) */ mpz_sub (newtmp[0], H[0], H[2]); /* h_0 - h_2 */ mpz_mul_2exp (newtmp[0], newtmp[0], 1UL); /* 2*(h_0 - h_2) */ mpz_add (G[0], G[0], newtmp[0]); /* G[0] -= -2*(h_0 - h_2) */ mpz_add (G[1], G[1], H[1]); /* G[1] -= -h_1 */ mpz_sub (newtmp[0], newtmp[0], H[0]); /* h_0 - 2*h_2 */ mpz_sub (G[2], G[2], newtmp[0]); /* G[2] -= h_0 - 2*h_2 */ mpz_sub (G[3], G[3], H[1]); /* G[3] -= h_1 */ mpz_sub (G[4], G[4], H[2]); /* G[3] -= h_2 */ } else { /* Let H(X) = h_0 + \sum_{i=1}^{n} h_i V_i(Y), Y = X+1/X. Then (x - 1/x)^2 H(X) = -2(h_0 - h_2) + (- h_1 + h_3) V_1(Y) + \sum_{i=2}^{n-2} (h_{i-2} - 2h_i + h_{i+2}) V_i(Y) + (h_{n-3} - 2h_{n-1}) V_{n-1}(Y) + (h_{n-2} - 2h_n) V_n(Y) + h_{n-1} V_{n+1}(Y) + h_n V_{n+2}(Y) In our case, n = 2 * deg - 2 */ mpz_sub (newtmp[0], H[0], H[2]); mpz_mul_2exp (newtmp[0], newtmp[0], 1UL); /* t[0] = 2*(h_0 - h_2) */ mpz_add (G[0], G[0], newtmp[0]); /* G[0] -= -2*(h_0 - h_2) */ mpz_add (G[1], G[1], H[1]); mpz_sub (G[1], G[1], H[3]); /* G[1] -= -h_1 + h_3 */ for (i = 2; i <= 2 * deg - 4; i++) { mpz_mul_2exp (newtmp[0], H[i], 1); mpz_sub (newtmp[0], newtmp[0], H[i - 2]); mpz_sub (newtmp[0], newtmp[0], H[i + 2]); /* 2h_i-h_{i-2}-h_{i+2} */ mpz_add (G[i], G[i], newtmp[0]); /* G[i] -= -2h_i+h_{i-2}+h_{i+2} */ } for ( ; i <= 2 * deg - 2; i++) { mpz_mul_2exp (newtmp[0], H[i], 1UL); mpz_sub (newtmp[0], H[i - 2], newtmp[0]); /* h_{n-3} - 2h_{n-1} */ mpz_sub (G[i], G[i], newtmp[0]); } mpz_sub (G[i], G[i], H[i - 2]); mpz_sub (G[i + 1], G[i + 1], H[i - 1]); } for (i = 0; i <= 2 * deg; i++) mpz_mod (R[i], G[i], modulus->orig_modulus); if (test_verbose (OUTPUT_TRACE)) for (i = 0; i <= 2 * deg; i++) outputf (OUTPUT_TRACE, "list_scale_V: R[%lu] = %Zd\n", i, R[i]); #ifdef WANT_ASSERT mpz_mod (R[2 * deg], R[2 * deg], modulus->orig_modulus); ASSERT (mpz_cmp (leading, R[2 * deg]) == 0); mpz_clear (leading); #endif mpres_clear (Vt, modulus); } #ifdef WANT_ASSERT /* Check if l is an (anti-)symmetric, possibly monic, polynomial. Returns -1 if it is (anti-)symmetric, or the smallest index i where l[i] != l[len - 1 + monic - i]) If anti == 1, the list is checked for symmetry, if it is -1, for antisymmetry. This function is used only if assertions are enabled. */ static long int ATTRIBUTE_UNUSED list_is_symmetric (listz_t l, unsigned long len, int monic, int anti, mpz_t modulus, mpz_t tmp) { unsigned long i; ASSERT (monic == 0 || monic == 1); ASSERT (anti == 1 || anti == -1); if (monic && anti == 1 && mpz_cmp_ui (l[0], 1) != 0) return 0L; if (monic && anti == -1) { mpz_sub_ui (tmp, modulus, 1); if (mpz_cmp (tmp, l[0]) != 0) return 0L; } for (i = monic; i < len / 2; i++) { if (anti == -1) { /* Negate (mod modulus) */ if (mpz_sgn (l[i]) == 0) { if (mpz_sgn (l[len - 1 + monic - i]) != 0) return (long) i; } else { mpz_sub (tmp, modulus, l[i]); if (mpz_cmp (tmp, l[len - 1 + monic - i]) != 0) return (long) i; } } else if (mpz_cmp (l[i], l[len - 1 + monic - i]) != 0) return (long) i; } return -1L; } #endif /* Evaluate a polynomial of degree n-1 with all coefficients given in F[], or of degree n with an implicit leading 1 monomial not stored in F[], at x modulo modulus. Result goes in r. tmp needs 2 entries. */ ATTRIBUTE_UNUSED static void list_eval_poly (mpz_t r, const listz_t F, const mpz_t x, const unsigned long n, const int monic, const mpz_t modulus, listz_t tmp) { unsigned long i; mpz_set_ui (tmp[0], 1UL); mpz_set_ui (r, 0UL); for (i = 0UL; i < n; i++) { /* tmp[0] = x^i */ mpz_mul (tmp[1], F[i], tmp[0]); mpz_mod (tmp[1], tmp[1], modulus); mpz_add (r, r, tmp[1]); mpz_mul (tmp[1], tmp[0], x); mpz_mod (tmp[0], tmp[1], modulus); } if (monic) mpz_add (r, r, tmp[0]); mpz_mod (r, r, modulus); } /* Build a polynomial with roots r^2i, i in the sumset of the sets in "sets". The parameter Q = r + 1/r. This code uses the fact that the polynomials are symmetric. Requires that the first set in "sets" has cardinality 2, all sets must be symmetric around 0. The resulting polynomial of degree 2*d is F(x) = f_0 + \sum_{1 <= i <= d} f_i (x^i + 1/x^i). The coefficient f_i is stored in F[i], which therefore needs d+1 elements. */ static unsigned long poly_from_sets_V (listz_t F, const mpres_t Q, sets_long_t *sets, listz_t tmp, const unsigned long tmplen, mpmod_t modulus, mpzspv_t dct, const mpzspm_t ntt_context) { unsigned long c, deg, i, nr; set_long_t *set = sets->sets; mpres_t Qt; ASSERT_ALWAYS (sets->nr > 0UL); ASSERT_ALWAYS (set->card == 2UL); /* Check that the cardinality of first set is 2 */ /* Check that first set is symmetric around 0 (we write card-1 instead of 1 to avoid a compiler warning with clang 2.9) */ ASSERT_ALWAYS (set->elem[0] == -set->elem[set->card - 1]); if (test_verbose (OUTPUT_TRACE)) { mpz_t t; mpz_init (t); mpres_get_z (t, Q, modulus); outputf (OUTPUT_TRACE, "poly_from_sets_V (F, Q = %Zd, sets)\n", t); mpz_clear (t); } mpres_init (Qt, modulus); outputf (OUTPUT_DEVVERBOSE, " (processing set of size 2"); V (Qt, Q, set->elem[0], modulus); /* First set in sets is {-k, k} */ V (Qt, Qt, 2UL, modulus); /* Qt = V_2k(Q) */ mpres_neg (Qt, Qt, modulus); mpres_get_z (F[0], Qt, modulus); mpz_set_ui (F[1], 1UL); deg = 1UL; /* Here, F(x) = (x - r^{2k_1})(x - r^{-2k_1}) / x = (x^2 - x (r^{2k_1} + r^{-2k_1}) + 1) / x = (x + 1/x) - V_{2k_1}(r + 1/r) */ for (nr = sets->nr - 1UL; nr > 0UL; nr--) { /* Assuming the sets are sorted in order of ascending cardinality, we process them back-to-front so the sets of cardinality 2 are processed last, but skipping the first set which we processed already. */ set = sets_nextset (sets->sets); /* Skip first set */ for (i = 1UL; i < nr; i++) /* Skip over remaining sets but one */ set = sets_nextset (set); /* Process this set. We assume it is either of cardinality 2, or of odd cardinality */ c = set->card; outputf (OUTPUT_DEVVERBOSE, " %lu", c); if (c == 2UL) { /* Check it's symmetric (we write c-1 instead of 2 to avoid a compiler warning with clang 2.9) */ ASSERT_ALWAYS (set->elem[0] == -set->elem[c - 1]); V (Qt, Q, set->elem[0], modulus); V (Qt, Qt, 2UL, modulus); list_scale_V (F, F, Qt, deg, modulus, tmp, tmplen, dct, ntt_context); deg *= 2UL; ASSERT_ALWAYS (mpz_cmp_ui (F[deg], 1UL) == 0); /* Check it's monic */ } else { ASSERT_ALWAYS (c % 2UL == 1UL); ASSERT_ALWAYS (set->elem[(c - 1UL) / 2UL] == 0UL); /* Generate the F(Q^{2k_i} * X)*F(Q^{-2k_i} * X) polynomials. Each is symmetric of degree 2*deg, so each has deg+1 coeffients in standard basis. */ for (i = 0UL; i < (c - 1UL) / 2UL; i++) { /* Check it's symmetric */ ASSERT_ALWAYS (set->elem[i] == -set->elem[c - 1L - i]); V (Qt, Q, set->elem[i], modulus); V (Qt, Qt, 2UL, modulus); ASSERT (mpz_cmp_ui (F[deg], 1UL) == 0); /* Check it's monic */ list_scale_V (F + (2UL * i + 1UL) * (deg + 1UL), F, Qt, deg, modulus, tmp, tmplen, dct, ntt_context); ASSERT (mpz_cmp_ui (F[(2UL * i + 1UL) * (deg + 1UL) + 2UL * deg], 1UL) == 0); /* Check it's monic */ } /* Multiply the polynomials */ for (i = 0UL; i < (c - 1UL) / 2UL; i++) { /* So far, we have the product F(X) * F(Q^{2k_j} * X) * F(Q^{-2k_j} * X), 1 <= j <= i, at F. This product has degree 2 * deg + i * 4 * deg, that is (2 * i + 1) * 2 * deg, which means (2 * i + 1) * deg + 1 coefficients in F[0 ... (i * 2 + 1) * deg]. */ ASSERT (mpz_cmp_ui (F[(2UL * i + 1UL) * deg], 1UL) == 0); ASSERT (mpz_cmp_ui (F[(2UL * i + 1UL) * (deg + 1UL) + 2UL*deg], 1UL) == 0); list_output_poly (F, (2UL * i + 1UL) * deg + 1, 0, 1, "poly_from_sets_V: Multiplying ", "\n", OUTPUT_TRACE); list_output_poly (F + (2UL * i + 1UL) * (deg + 1UL), 2UL * deg + 1UL, 0, 1, " and ", "\n", OUTPUT_TRACE); list_mul_reciprocal (F, F, (2UL * i + 1UL) * deg + 1UL, F + (2UL * i + 1UL) * (deg + 1UL), 2UL * deg + 1UL, modulus->orig_modulus, tmp, tmplen); list_mod (F, F, (2UL * i + 3UL) * deg + 1UL, modulus->orig_modulus); list_output_poly (F, (2UL * i + 3UL) * deg + 1UL, 0, 1, " = ", "\n", OUTPUT_TRACE); ASSERT (mpz_cmp_ui (F[(2UL * i + 3UL) * deg], 1UL) == 0); } deg *= c; } } mpres_clear (Qt, modulus); outputf (OUTPUT_DEVVERBOSE, ")"); return deg; } static int build_F_ntt (listz_t F, const mpres_t P_1, sets_long_t *S_1, const faststage2_param_t *params, mpmod_t modulus) { mpzspm_t F_ntt_context; mpzspv_t F_ntt; unsigned long tmplen; listz_t tmp; long timestart, realstart; unsigned long i; timestart = cputime (); realstart = realtime (); /* Precompute the small primes, primitive roots and inverses etc. for the NTT. The code to multiply wants a 3*k-th root of unity, where k is the smallest power of 2 with k > s_1/2 */ F_ntt_context = mpzspm_init (3UL << ceil_log2 (params->s_1 / 2 + 1), modulus->orig_modulus); if (F_ntt_context == NULL) { outputf (OUTPUT_ERROR, "Could not initialise F_ntt_context, " "presumably out of memory\n"); return ECM_ERROR; } print_CRT_primes (OUTPUT_DEVVERBOSE, "CRT modulus for building F = ", F_ntt_context); outputf (OUTPUT_VERBOSE, "Computing F from factored S_1"); tmplen = params->s_1 + 100; tmp = init_list2 (tmplen, (unsigned int) abs (modulus->bits)); F_ntt = mpzspv_init (1UL << ceil_log2 (params->s_1 / 2 + 1), F_ntt_context); i = poly_from_sets_V (F, P_1, S_1, tmp, tmplen, modulus, F_ntt, F_ntt_context); ASSERT_ALWAYS(2 * i == params->s_1); ASSERT_ALWAYS(mpz_cmp_ui (F[i], 1UL) == 0); print_elapsed_time (OUTPUT_VERBOSE, timestart, realstart); if (test_verbose (OUTPUT_TRACE)) { for (i = 0; i < params->s_1 / 2 + 1; i++) outputf (OUTPUT_TRACE, "f_%lu = %Zd; /* PARI */\n", i, F[i]); outputf (OUTPUT_TRACE, "f(x) = f_0"); for (i = 1; i < params->s_1 / 2 + 1; i++) outputf (OUTPUT_TRACE, "+ f_%lu * (x^%lu + x^(-%lu))", i, i, i); outputf (OUTPUT_TRACE, "/* PARI */ \n"); } clear_list (tmp, tmplen); tmp = NULL; mpzspv_clear (F_ntt, F_ntt_context); F_ntt = NULL; mpzspm_clear (F_ntt_context); F_ntt_context = NULL; return 0; } /* Compute g_i = x_0^{M-i} * r^{(M-i)^2} for 0 <= i < l. x_0 = b_1^{2*k_2 + (2*m_1 + 1) * P}. r = b_1^P. Stores the result in g[0 ... l] and/or in g_ntt[offset ... offset + l] */ static void pm1_sequence_g (listz_t g_mpz, mpzspv_t g_ntt, const mpres_t b_1, const unsigned long P, const long M_param, const unsigned long l_param, const mpz_t m_1, const long k_2, mpmod_t modulus_param, const mpzspm_t ntt_context) { mpres_t r[3], x_0, x_Mi; mpz_t t; unsigned long i; long timestart, realstart; long M = M_param; unsigned long l = l_param, offset = 0UL; mpmod_t modulus; int want_output = 1; outputf (OUTPUT_VERBOSE, "Computing g_i"); outputf (OUTPUT_DEVVERBOSE, "\npm1_sequence_g: P = %lu, M_param = %lu, " "l_param = %lu, m_1 = %Zd, k_2 = %lu\n", P, M_param, l_param, m_1, k_2); timestart = cputime (); realstart = realtime (); #ifdef _OPENMP #pragma omp parallel if (l > 100) private(r, x_0, x_Mi, t, i, M, l, offset, modulus, want_output) { /* When multi-threading, we adjust the parameters for each thread */ const int nr_chunks = omp_get_num_threads(); const int thread_nr = omp_get_thread_num(); l = (l_param - 1) / nr_chunks + 1; /* = ceil(l_param / nr_chunks) */ offset = thread_nr * l; outputf (OUTPUT_DEVVERBOSE, "pm1_sequence_g: thread %d has l = %lu, offset = %lu.\n", thread_nr, l, offset); ASSERT_ALWAYS (l_param >= offset); l = MIN(l, l_param - offset); M = M_param - (long) offset; /* Let only the master thread print stuff */ want_output = (thread_nr == 0); if (want_output) outputf (OUTPUT_VERBOSE, " using %d threads", nr_chunks); #endif /* Make a private copy of the mpmod_t struct */ mpmod_init_set (modulus, modulus_param); mpz_init (t); mpres_init (r[0], modulus); mpres_init (r[1], modulus); mpres_init (r[2], modulus); mpres_init (x_0, modulus); mpres_init (x_Mi, modulus); if (want_output) { if (test_verbose (OUTPUT_TRACE)) { mpres_get_z (t, b_1, modulus); outputf (OUTPUT_TRACE, "\n/* pm1_sequence_g */ N = %Zd; " "b_1 = Mod(%Zd, N); /* PARI */\n", modulus->orig_modulus, t); outputf (OUTPUT_TRACE, "/* pm1_sequence_g */ P = %lu; M = %ld; " "m_1 = %Zd; /* PARI */\n", P, M, m_1); outputf (OUTPUT_TRACE, "/* pm1_sequence_g */ r = b_1^P; /* PARI */\n"); outputf (OUTPUT_TRACE, "/* pm1_sequence_g */ x_0 = " "b_1^(2*%ld + (2*m_1 + 1)*P); /* PARI */\n", k_2); } } /* We use (M-(i+1))^2 = (M-i)^2 + 2*(-M+i) + 1 */ mpz_set_ui (t, P); mpres_pow (r[0], b_1, t, modulus); /* r[0] = b_1^P = r */ if (test_verbose (OUTPUT_TRACE)) { mpres_get_z (t, r[0], modulus); outputf (OUTPUT_TRACE, "/* pm1_sequence_g */ r == %Zd /* PARI C */\n", t); } /* FIXME: This is a huge mess, clean up some time */ mpz_set_si (t, M); mpz_neg (t, t); mpz_mul_2exp (t, t, 1UL); mpz_add_ui (t, t, 1UL); mpres_pow (r[1], r[0], t, modulus); /* r[1] = r^{2(-M+i)+1}, i = 0 */ mpz_set_si (t, M); mpz_mul (t, t, t); /* t = M^2 */ mpres_pow (r[2], r[0], t, modulus); /* r[2] = r^{(M-i)^2}, i = 0 */ mpres_sqr (r[0], r[0], modulus); /* r[0] = r^2 */ mpz_mul_2exp (t, m_1, 1UL); mpz_add_ui (t, t, 1UL); mpz_mul_ui (t, t, P); mpz_add_si (t, t, k_2); mpz_add_si (t, t, k_2); if (want_output) outputf (OUTPUT_TRACE, "/* pm1_sequence_g */ 2*%ld + (2*%Zd + 1)*P == " "%Zd /* PARI C */\n", k_2, m_1, t); mpres_pow (x_0, b_1, t, modulus); /* x_0 = b_1^{2*k_2 + (2*m_1 + 1)*P} */ if (want_output && test_verbose (OUTPUT_TRACE)) { mpres_get_z (t, x_0, modulus); outputf (OUTPUT_TRACE, "/* pm1_sequence_g */ x_0 == %Zd /* PARI C */\n", t); } mpz_set_si (t, M); mpres_pow (x_Mi, x_0, t, modulus); /* x_Mi = x_0^{M-i}, i = 0 */ mpres_invert (x_0, x_0, modulus); /* x_0 := x_0^{-1} now */ mpres_mul (r[1], r[1], x_0, modulus); /* r[1] = x_0^{-1} * r^{-2M+1} */ mpres_mul (r[2], r[2], x_Mi, modulus); /* r[2] = x_0^M * r^{M^2} */ mpres_get_z (t, r[2], modulus); outputf (OUTPUT_TRACE, "/* pm1_sequence_g */ g_%lu = %Zd; /* PARI */\n", offset, t); if (g_mpz != NULL) mpz_set (g_mpz[offset], t); if (g_ntt != NULL) mpzspv_from_mpzv (g_ntt, offset, &t, 1UL, ntt_context); /* So here we have for i = 0 r[2] = x_0^(M-i) * r^{(M-i)^2} r[1] = x_0^{-1} * r^{2(-M+i)+1} r[0] = r^2 t = r[2] */ for (i = 1; i < l; i++) { if (g_mpz != NULL) { mpres_mul_z_to_z (g_mpz[offset + i], r[1], g_mpz[offset + i - 1], modulus); outputf (OUTPUT_TRACE, "/* pm1_sequence_g */ g_%lu = %Zd;" " /* PARI */\n", offset + i, g_mpz[offset + i]); } if (g_ntt != NULL) { mpres_mul_z_to_z (t, r[1], t, modulus); if (g_mpz == NULL) /* Only one should be non-NULL... */ outputf (OUTPUT_TRACE, "/* pm1_sequence_g */ g_%lu = %Zd;" " /* PARI */\n", offset + i, t); mpzspv_from_mpzv (g_ntt, offset + i, &t, 1UL, ntt_context); } mpres_mul (r[1], r[1], r[0], modulus); } mpres_clear (r[0], modulus); mpres_clear (r[1], modulus); mpres_clear (r[2], modulus); mpres_clear (x_0, modulus); mpres_clear (x_Mi, modulus); mpz_clear (t); mpmod_clear (modulus); /* Clear our private copy of modulus */ #ifdef _OPENMP } #endif print_elapsed_time (OUTPUT_VERBOSE, timestart, realstart); if (test_verbose (OUTPUT_TRACE)) { for (i = 0; i < l_param; i++) { outputf (OUTPUT_TRACE, "/* pm1_sequence_g */ g_%lu == x_0^" "(M - %lu) * r^((M - %lu)^2) /* PARI C */\n", i, i, i); } outputf (OUTPUT_TRACE, "/* pm1_sequence_g */ g(x) = g_0"); for (i = 1; i < l; i++) outputf (OUTPUT_TRACE, " + g_%lu * x^%lu", i, i); outputf (OUTPUT_TRACE, " /* PARI */\n"); } } /* Compute h_j = r^(-j^2) * f_j for 0 <= j < d as described in section 9 of the paper. h == f is ok. */ static void pm1_sequence_h (listz_t h, mpzspv_t h_ntt, mpz_t *f, const mpres_t r, const unsigned long d, mpmod_t modulus_parm, const mpzspm_t ntt_context) { mpres_t invr; /* r^{-1}. Can be shared between threads */ long timestart, realstart; mpres_init (invr, modulus_parm); mpres_invert (invr, r, modulus_parm); /* invr = r^{-1}. FIXME: test for failure, even if theoretically impossible */ if (test_verbose (OUTPUT_TRACE)) { mpz_t t; mpz_init (t); mpres_get_z (t, r, modulus_parm); outputf (OUTPUT_TRACE, "\n/* pm1_sequence_h */ N = %Zd; " "r = Mod(%Zd, N); /* PARI */\n", modulus_parm->orig_modulus, t); mpz_clear (t); } outputf (OUTPUT_VERBOSE, "Computing h"); timestart = cputime (); realstart = realtime (); #ifdef _OPENMP #pragma omp parallel if (d > 100) #endif { mpres_t fd[3]; /* finite differences table for r^{-i^2}*/ mpz_t t; /* the h_j value as an mpz_t */ unsigned long j; unsigned long offset = 0UL, len = d; mpmod_t modulus; /* Adjust offset and length for this thread */ #ifdef _OPENMP { const int nr_chunks = omp_get_num_threads(); const int thread_nr = omp_get_thread_num(); unsigned long chunklen; if (thread_nr == 0) outputf (OUTPUT_VERBOSE, " using %d threads", nr_chunks); chunklen = (len - 1UL) / (unsigned long) nr_chunks + 1UL; offset = chunklen * (unsigned long) thread_nr; len = MIN(chunklen, len - offset); } #endif mpmod_init_set (modulus, modulus_parm); mpres_init (fd[0], modulus); mpres_init (fd[1], modulus); mpres_init (fd[2], modulus); mpz_init (t); /* We have (n + 1)^2 = n^2 + 2n + 1. For the finite differences we'll need r^{-2}, r^{-(2n+1)}, r^{-n^2}. Init for n = 0. */ /* r^{-2} in fd[0] is constant and could be shared. Computing it separately in each thread has the advantage of putting it in local memory. May not make much difference overall */ mpres_sqr (fd[0], invr, modulus); /* fd[0] = r^{-2} */ mpz_set_ui (t, offset); mpz_mul_2exp (t, t, 1UL); mpz_add_ui (t, t, 1UL); /* t = 2 * offset + 1 */ mpres_pow (fd[1], invr, t, modulus); /* fd[1] = r^{-(2*offset+1)} */ mpz_set_ui (t, offset); mpz_mul (t, t, t); /* t = offset^2 */ mpres_pow (fd[2], invr, t, modulus); /* fd[2] = r^{-offset^2} */ /* Generate the sequence */ for (j = offset; j < offset + len; j++) { mpres_mul_z_to_z (t, fd[2], f[j], modulus); outputf (OUTPUT_TRACE, "/* pm1_sequence_h */ h_%lu = %Zd; /* PARI */\n", j, t); if (h != NULL) mpz_set (h[j], t); if (h_ntt != NULL) mpzspv_from_mpzv (h_ntt, j, &t, 1UL, ntt_context); mpres_mul (fd[2], fd[2], fd[1], modulus); /* fd[2] = r^{-j^2} */ mpres_mul (fd[1], fd[1], fd[0], modulus); /* fd[1] = r^{-2*j-1} */ } mpres_clear (fd[2], modulus); mpres_clear (fd[1], modulus); mpres_clear (fd[0], modulus); mpz_clear (t); mpmod_clear (modulus); } mpres_clear (invr, modulus_parm); print_elapsed_time (OUTPUT_VERBOSE, timestart, realstart); if (test_verbose (OUTPUT_TRACE)) { unsigned long j; for (j = 0; j < d; j++) outputf (OUTPUT_TRACE, "/* pm1_sequence_h */ h_%lu == " "f_%lu * r^(-%lu^2) /* PARI C */\n", j, j, j); outputf (OUTPUT_TRACE, "/* pm1_sequence_h */ h(x) = h_0"); for (j = 1; j < d; j++) outputf (OUTPUT_TRACE, " + h_%lu * (x^%lu + x^(-%lu))", j, j, j); outputf (OUTPUT_TRACE, " /* PARI */\n"); } } static int make_S_1_S_2 (sets_long_t **S_1, set_long_t **S_2, const faststage2_param_t *params) { unsigned long i; sets_long_t *facS_2; size_t facS_2_size; *S_1 = sets_get_factored_sorted (params->P); if (*S_1 == NULL) return ECM_ERROR; { mpz_t t1, t2; mpz_init (t1); mpz_init (t2); sets_sumset_minmax (t1, *S_1, 1); sets_max (t2, params->P); ASSERT_ALWAYS (mpz_cmp (t1, t2) == 0); mpz_clear (t1); mpz_clear (t2); } *S_2 = malloc (set_sizeof(params->s_2)); if (*S_2 == NULL) { free (*S_1); return ECM_ERROR; } /* Extract sets for S_2 and compute the set of sums */ sets_extract (NULL, &facS_2_size, *S_1, params->s_2); facS_2 = malloc (facS_2_size); if (facS_2 == NULL) { free (*S_1); free (*S_2); return ECM_ERROR; } sets_extract (facS_2, NULL, *S_1, params->s_2); sets_sumset (*S_2, facS_2); ASSERT_ALWAYS ((*S_2)->card == params->s_2); free (facS_2); quicksort_long ((*S_2)->elem, (*S_2)->card); /* Print the sets in devverbose mode */ if (test_verbose (OUTPUT_DEVVERBOSE)) { outputf (OUTPUT_DEVVERBOSE, "S_1 = "); sets_print (OUTPUT_DEVVERBOSE, *S_1); outputf (OUTPUT_DEVVERBOSE, "S_2 = {"); for (i = 0UL; i + 1UL < params->s_2; i++) outputf (OUTPUT_DEVVERBOSE, "%ld, ", (*S_2)->elem[i]); if (i < params->s_2) outputf (OUTPUT_DEVVERBOSE, "%ld", (*S_2)->elem[i]); outputf (OUTPUT_DEVVERBOSE, "}\n"); } return 0; } ATTRIBUTE_UNUSED static mpzspv_t * mpzspv_init_mt (spv_size_t len, mpzspm_t mpzspm) { int i; /* OpenMP wants the iteration variable a signed type */ mpzspv_t *x = (mpzspv_t *) malloc (mpzspm->sp_num * sizeof (spv_t *)); if (x == NULL) return NULL; for (i = 0; i < (int) mpzspm->sp_num; i++) x[i] = NULL; #ifdef _OPENMP #pragma omp parallel private(i) shared(x) { #pragma omp for #endif for (i = 0; i < (int) mpzspm->sp_num; i++) x[i] = (spv_t *) sp_aligned_malloc (len * sizeof (sp_t)); #ifdef _OPENMP } #endif for (i = 0; i < (int) mpzspm->sp_num; i++) if (x[i] == NULL) break; if (i != (int) mpzspm->sp_num) /* There is a NULL pointer */ { for (i = 0; i < (int) mpzspm->sp_num; i++) if (x[i] != NULL) sp_aligned_free(x[i]); return NULL; } #if 0 if (test_verbose (OUTPUT_DEVVERBOSE)) { spv_t * last = x[0]; printf ("mpzspv_init_mt: x[0] = %p\n", x[0]); for (i = 1; i < (int) mpzspm->sp_num; i++) printf ("mpzspv_init_mt: x[%d] = %p, distance = %ld\n", i, x[i], (long) (x[i] - x[i-1])); } #endif return x; } ATTRIBUTE_UNUSED static void ntt_print_vec (const char *msg, const spv_t spv, const spv_size_t l) { spv_size_t i; /* Warning: on some computers, for example gcc49.fsffrance.org, "unsigned long" might be shorter than "sp_t" */ gmp_printf ("%s [%Nd", msg, (mp_ptr) spv, 1); for (i = 1; i < l; i++) gmp_printf (", %Nd", (mp_ptr) spv + i, 1); printf ("]\n"); } /* Square the reciprocal Laurent polynomial S(x) of degree 2*n-2. S(x) = s_0 + \sum_{i=1}^{n-1} s_i (x^i + x^{-1}). S[i] contains the n coefficients s_i, 0 <= i <= n-1. R[i] will contain the 2n-1 coefficients r_i, 0 <= i <= 2*n-2, where R(x) = S(x)^2 = r_0 + \sum_{i=1}^{2n-2} r_i (x^i + x^{-1}). dft must have power of 2 length len >= 2n. The NTT primes must be == 1 (mod 3*len). */ #undef TRACE_ntt_sqr_reciprocal static void ntt_sqr_reciprocal (mpzv_t R, const mpzv_t S, mpzspv_t dft, const spv_size_t n, const mpzspm_t ntt_context) { #ifdef WANT_ASSERT mpz_t S_eval_1, R_eval_1; #endif if (n == 0) return; if (n == 1) { mpz_mul (R[0], S[0], S[0]); mpz_mod (R[0], R[0], ntt_context->modulus); return; } #ifdef WANT_ASSERT mpz_init (S_eval_1); list_recip_eval1 (S_eval_1, S, n); /* Compute (S(1))^2 */ mpz_mul (S_eval_1, S_eval_1, S_eval_1); mpz_mod (S_eval_1, S_eval_1, ntt_context->modulus); #endif #ifdef TRACE_ntt_sqr_reciprocal printf ("ntt_sqr_reciprocal: n %lu, length %lu\n", n, len); gmp_printf ("Input polynomial is %Zd", S[0]); { int j; for (j = 1; (spv_size_t) j < n; j++) gmp_printf (" + %Zd * (x^%lu + x^(-%lu))", S[j], j, j); } printf ("\n"); #endif /* Fill NTT elements [0 .. n-1] with coefficients */ mpzspv_from_mpzv (dft, (spv_size_t) 0, S, n, ntt_context); mpzspv_sqr_reciprocal (dft, n, ntt_context); #if defined(_OPENMP) #pragma omp parallel if (n > 50) #endif { spv_size_t i, offset = 0, chunklen = 2*n - 1; #if defined(_OPENMP) { const int nr_chunks = omp_get_num_threads(); const int thread_nr = omp_get_thread_num(); chunklen = (chunklen - 1) / (spv_size_t) nr_chunks + 1; offset = (spv_size_t) thread_nr * chunklen; if (2*n - 1 > offset) chunklen = MIN(chunklen, (2*n - 1) - offset); else chunklen = 0UL; } #endif mpzspv_to_mpzv (dft, offset, R + offset, chunklen, ntt_context); for (i = offset; i < offset + chunklen; i++) mpz_mod (R[i], R[i], ntt_context->modulus); } #ifdef TRACE_ntt_sqr_reciprocal gmp_printf ("ntt_sqr_reciprocal: Output polynomial is %Zd", R[0]); for (j = 1; (spv_size_t) j < 2*n - 1; j++) gmp_printf (" + %Zd * (x^%lu + x^(-%lu))", R[j], j, j); printf ("\n"); #endif #ifdef WANT_ASSERT mpz_init (R_eval_1); /* Compute (S^2)(1) and compare to (S(1))^2 */ list_recip_eval1 (R_eval_1, R, 2 * n - 1); mpz_mod (R_eval_1, R_eval_1, ntt_context->modulus); if (mpz_cmp (R_eval_1, S_eval_1) != 0) { gmp_fprintf (stderr, "ntt_sqr_reciprocal: (S(1))^2 = %Zd but " "(S^2)(1) = %Zd\n", S_eval_1, R_eval_1); #if 0 gmp_printf ("Output polynomial is %Zd", R[0]); for (j = 1; (spv_size_t) j < 2*n - 1; j++) gmp_printf (" + %Zd * (x^%lu + x^(-%lu))", R[j], j, j); printf ("\n"); #endif abort (); } mpz_clear (S_eval_1); mpz_clear (R_eval_1); #endif } /* Computes gcd(\prod_{0 <= i < len} (ntt[i + offset] + add[i]), N), the NTT residues are converted to integer residues (mod N) first. If add == NULL, add[i] is assumed to be 0. */ static void ntt_gcd (mpz_t f, mpz_t *product, mpzspv_t ntt, const unsigned long ntt_offset, const listz_t add, const unsigned long len_param, const mpzspm_t ntt_context, mpmod_t modulus_param) { unsigned long i, j; const unsigned long Rlen = MPZSPV_NORMALISE_STRIDE; listz_t R; unsigned long len = len_param, thread_offset = 0; mpres_t tmpres, tmpprod, totalprod; mpmod_t modulus; long timestart, realstart; outputf (OUTPUT_VERBOSE, "Computing gcd of coefficients and N"); timestart = cputime (); realstart = realtime (); /* All the threads will multiply their partial products to this one. */ mpres_init (totalprod, modulus_param); mpres_set_ui (totalprod, 1UL, modulus_param); #ifdef _OPENMP #pragma omp parallel if (len > 100) private(i, j, R, len, thread_offset, tmpres, tmpprod, modulus) shared(totalprod) { const int nr_chunks = omp_get_num_threads(); const int thread_nr = omp_get_thread_num(); len = (len_param - 1) / nr_chunks + 1; thread_offset = thread_nr * len; ASSERT (len_param >= thread_offset); len = MIN(len, len_param - thread_offset); #pragma omp master { outputf (OUTPUT_VERBOSE, " using %d threads", nr_chunks); } #endif /* Make a private copy of the mpmod_t struct */ mpmod_init_set (modulus, modulus_param); MEMORY_TAG; R = init_list2 (Rlen, (mpz_size (modulus->orig_modulus) + 2) * GMP_NUMB_BITS); MEMORY_UNTAG; mpres_init (tmpres, modulus); mpres_init (tmpprod, modulus); mpres_set_ui (tmpprod, 1UL, modulus); for (i = 0; i < len; i += Rlen) { const unsigned long blocklen = MIN(len - i, Rlen); /* Convert blocklen residues from NTT to integer representatives and store them in R */ mpzspv_to_mpzv (ntt, ntt_offset + thread_offset + i, R, blocklen, ntt_context); /* Accumulate product in tmpprod */ for (j = 0; j < blocklen; j++) { outputf (OUTPUT_TRACE, "r_%lu = %Zd; /* PARI */\n", i, R[j]); if (add != NULL) mpz_add (R[j], R[j], add[i + thread_offset + j]); mpres_set_z_for_gcd (tmpres, R[j], modulus); #define TEST_ZERO_RESULT #ifdef TEST_ZERO_RESULT if (mpres_is_zero (tmpres, modulus)) outputf (OUTPUT_VERBOSE, "R_[%lu] = 0\n", i); #endif mpres_mul (tmpprod, tmpprod, tmpres, modulus); } } #ifdef _OPENMP #pragma omp critical { mpres_mul (totalprod, totalprod, tmpprod, modulus); } #else mpres_set (totalprod, tmpprod, modulus); #endif mpres_clear (tmpres, modulus); mpres_clear (tmpprod, modulus); mpmod_clear (modulus); clear_list (R, Rlen); #ifdef _OPENMP } #endif if (product != NULL) mpres_get_z (*product, totalprod, modulus_param); mpres_gcd (f, totalprod, modulus_param); mpres_clear (totalprod, modulus_param); print_elapsed_time (OUTPUT_VERBOSE, timestart, realstart); } int pm1fs2 (mpz_t f, const mpres_t X, mpmod_t modulus, const faststage2_param_t *params) { unsigned long phiP, nr; unsigned long i, l, lenF, lenG, lenR, tmplen; sets_long_t *S_1; /* This is stored as a set of sets (arithmetic progressions of prime length */ set_long_t *S_2; /* This is stored as a regular set */ listz_t F; /* Polynomial F has roots X^{k_1} for k_1 \in S_1, so has degree s_1. It is symmetric, so has only s_1 / 2 + 1 distinct coefficients. The sequence h_j will be stored in the same memory and won't be a monic polynomial, so the leading 1 monomial of F will be stored explicitly. Hence we need s_1 / 2 + 1 entries. */ listz_t g, h, tmp, R; mpz_t mt; /* All-purpose temp mpz_t */ mpres_t mr; /* All-purpose temp mpres_t */ int youpi = ECM_NO_FACTOR_FOUND; long timetotalstart, realtotalstart, timestart; timetotalstart = cputime (); realtotalstart = realtime (); phiP = eulerphi (params->P); ASSERT_ALWAYS (phiP == params->s_1 * params->s_2); ASSERT_ALWAYS (params->s_1 < params->l); nr = params->l - params->s_1; /* Number of points we evaluate */ if (make_S_1_S_2 (&S_1, &S_2, params) == ECM_ERROR) return ECM_ERROR; /* Allocate all the memory we'll need */ /* Allocate the correct amount of space for each mpz_t or the reallocations will up to double the time for stage 2! */ mpz_init (mt); mpres_init (mr, modulus); lenF = params->s_1 / 2 + 1 + 1; /* Another +1 because poly_from_sets_V stores the leading 1 monomial for each factor */ F = init_list2 (lenF, (unsigned int) abs (modulus->bits)); h = malloc ((params->s_1 + 1) * sizeof (mpz_t)); if (h == NULL) { fprintf (stderr, "Cannot allocate memory in pm1fs2\n"); exit (1); } lenG = params->l; g = init_list2 (lenG, (unsigned int) abs (modulus->bits)); lenR = nr; R = init_list2 (lenR, (unsigned int) abs (modulus->bits)); tmplen = 3UL * params->l + list_mul_mem (params->l / 2); outputf (OUTPUT_DEVVERBOSE, "tmplen = %lu\n", tmplen); if (TMulGen_space (params->l - 1, params->s_1, lenR) + 12 > tmplen) { tmplen = TMulGen_space (params->l - 1, params->s_1 - 1, lenR) + 12; /* FIXME: It appears TMulGen_space() returns a too small value! */ outputf (OUTPUT_DEVVERBOSE, "With TMulGen_space, tmplen = %lu\n", tmplen); } #ifdef SHOW_TMP_USAGE tmp = init_list (tmplen); #else tmp = init_list2 (tmplen, (unsigned int) abs (modulus->bits)); #endif mpres_get_z (mt, X, modulus); /* mpz_t copy of X for printing */ outputf (OUTPUT_TRACE, "N = %Zd; X = Mod(%Zd, N); /* PARI */\n", modulus->orig_modulus, mt); /* Compute the polynomial f(x) = \prod_{k_1 in S_1} (x - X^{2k_1}) */ outputf (OUTPUT_VERBOSE, "Computing F from factored S_1"); timestart = cputime (); /* First compute X + 1/X */ mpres_invert (mr, X, modulus); mpres_add (mr, mr, X, modulus); i = poly_from_sets_V (F, mr, S_1, tmp, tmplen, modulus, NULL, NULL); ASSERT_ALWAYS(2 * i == params->s_1); ASSERT(mpz_cmp_ui (F[i], 1UL) == 0); free (S_1); S_1 = NULL; outputf (OUTPUT_VERBOSE, " took %lums\n", cputime () - timestart); if (test_verbose (OUTPUT_TRACE)) { for (i = 0; i < params->s_1 / 2 + 1; i++) outputf (OUTPUT_TRACE, "f_%lu = %Zd; /* PARI */\n", i, F[i]); outputf (OUTPUT_TRACE, "f(x) = f_0"); for (i = 1; i < params->s_1 / 2 + 1; i++) outputf (OUTPUT_TRACE, "+ f_%lu * (x^%lu + x^(-%lu))", i, i, i); outputf (OUTPUT_TRACE, "/* PARI */ \n"); } mpz_set_ui (mt, params->P); mpres_pow (mr, X, mt, modulus); /* mr = X^P */ pm1_sequence_h (F, NULL, F, mr, params->s_1 / 2 + 1, modulus, NULL); /* Make a symmetric copy of F in h. It will have length s_1 + 1 = 2*lenF - 1 */ /* I.e. with F = [3, 2, 1], s_1 = 4, we want h = [1, 2, 3, 2, 1] */ for (i = 0; i < params->s_1 / 2 + 1; i++) *(h[i]) = *(F[params->s_1 / 2 - i]); /* Clone the mpz_t. */ for (i = 0; i < params->s_1 / 2; i++) *(h[i + params->s_1 / 2 + 1]) = *(F[i + 1]); if (test_verbose (OUTPUT_TRACE)) { for (i = 0; i < params->s_1 + 1; i++) outputf (OUTPUT_VERBOSE, "h_%lu = %Zd; /* PARI */\n", i, h[i]); outputf (OUTPUT_VERBOSE, "h(x) = h_0"); for (i = 1; i < params->s_1 + 1; i++) outputf (OUTPUT_VERBOSE, " + h_%lu * x^%lu", i, i); outputf (OUTPUT_VERBOSE, " /* PARI */\n"); } for (l = 0; l < params->s_2; l++) { const unsigned long M = params->l - 1L - params->s_1 / 2L; outputf (OUTPUT_VERBOSE, "Multi-point evaluation %lu of %lu:\n", l + 1, params->s_2); pm1_sequence_g (g, NULL, X, params->P, M, params->l, params->m_1, S_2->elem[l], modulus, NULL); /* Do the convolution */ /* Use the transposed "Middle Product" algorithm */ /* TMulGen reverses the first input sequence, but that doesn't matter since h is symmetric. */ outputf (OUTPUT_VERBOSE, "TMulGen of g and h"); timestart = cputime (); ASSERT(tmplen >= TMulGen_space (nr - 1, params->l - 1, params->s_1)); /* Computes rev(h)*g, stores coefficients of x^(s_1) to x^(s_1+nr-1) = x^(len-1) */ if (TMulGen (R, nr - 1, h, params->s_1, g, params->l - 1, tmp, modulus->orig_modulus) < 0) { outputf (OUTPUT_ERROR, "TMulGen returned error code (probably out " "of memory)\n"); youpi = ECM_ERROR; break; } list_mod (R, R, nr, modulus->orig_modulus); outputf (OUTPUT_VERBOSE, " took %lums\n", cputime () - timestart); #if 0 && defined(WANT_ASSERT) /* See if R[i] is correct, with a test that works even if i0 != 0 */ /* More expensive self-test */ /* alpha = beta*(i0 + l*nr) */ /* This code is old and probably does not work. */ outputf (OUTPUT_VERBOSE, "Verifying all results (slow)"); for (i = 0; i < nr; i++) { mpz_set_ui (mt, nr * l); mpz_add (mt, mt, root_params->i0); mpz_add_ui (mt, mt, i); mpz_mul_ui (mt, mt, beta); mpres_get_z (tmp[0], X, modulus); mpz_powm (tmp[0], tmp[0], mt, modulus->orig_modulus); /* Hence, tmp[0] = X^(alpha + i * beta) */ list_eval_poly (tmp[1], F, tmp[0], dF, 1, modulus->orig_modulus, tmp + 2); mpz_set_ui (mt, i); mpz_mul_ui (mt, mt, i); mpz_mul_ui (mt, mt, beta / 2); /* h(i) = beta*i^2/2 */ mpres_get_z (tmp[0], X, modulus); mpz_powm (tmp[0], tmp[0], mt, modulus->orig_modulus); /* X^h(1) */ mpz_mul (tmp[0], tmp[0], R[i]); mpz_mod (tmp[0], tmp[0], modulus->orig_modulus); if (mpz_cmp (tmp[0], tmp[1]) != 0) { outputf (OUTPUT_ERROR, "Result in R[%ld] incorrect.\n", i); outputf (OUTPUT_ERROR, "R[%ld] = %Zd\n", i, R[i]); abort (); } } outputf (OUTPUT_VERBOSE, " - everything's correct! :-D\n"); #endif if (test_verbose (OUTPUT_TRACE)) { for (i = 0; i < nr; i++) outputf (OUTPUT_TRACE, "r_%lu = %Zd; /* PARI */\n", i, R[i]); } outputf (OUTPUT_VERBOSE, "Computing product of F(g_i)"); timestart = cputime (); { mpres_t tmpres, tmpprod; mpres_init (tmpres, modulus); mpres_init (tmpprod, modulus); mpres_set_z_for_gcd (tmpprod, R[0], modulus); for (i = 1; i < nr; i++) { mpres_set_z_for_gcd (tmpres, R[i], modulus); mpres_mul (tmpprod, tmpprod, tmpres, modulus); } mpres_get_z (tmp[1], tmpprod, modulus); /* For printing */ mpres_gcd (tmp[0], tmpprod, modulus); mpres_clear (tmpprod, modulus); mpres_clear (tmpres, modulus); } outputf (OUTPUT_VERBOSE, " took %lums\n", cputime () - timestart); outputf (OUTPUT_RESVERBOSE, "Product of R[i] = %Zd (times some " "power of 2 if REDC was used! Try -mpzmod)\n", tmp[1]); if (mpz_cmp_ui (tmp[0], 1UL) > 0) { mpz_set (f, tmp[0]); youpi = ECM_FACTOR_FOUND_STEP2; break; } } #ifdef SHOW_TMP_USAGE for (i = tmplen - 1; i > 0; i--) if (tmp[i]->_mp_alloc > 1) break; outputf (OUTPUT_DEVVERBOSE, "Highest used temp element is tmp[%lu]\n", i); #endif free (S_2); free (h); clear_list (F, lenF); clear_list (g, lenG); clear_list (R, lenR); clear_list (tmp, tmplen); mpz_clear (mt); mpres_clear (mr, modulus); outputf (OUTPUT_NORMAL, "Step 2"); /* In normal output mode, print only cpu time as we always have. In verbose mode, print real time as well if we used multi-threading */ if (test_verbose (OUTPUT_VERBOSE)) print_elapsed_time (OUTPUT_NORMAL, timetotalstart, realtotalstart); else print_elapsed_time (OUTPUT_NORMAL, timetotalstart, 0L); return youpi; } int pm1fs2_ntt (mpz_t f, const mpres_t X, mpmod_t modulus, const faststage2_param_t *params) { unsigned long nr; unsigned long l, lenF; sets_long_t *S_1; /* This is stored as a set of sets (arithmetic progressions of prime length */ set_long_t *S_2; /* This is stored as a regular set */ listz_t F; /* Polynomial F has roots X^{k_1} for k_1 \in S_1, so has degree s_1. It is symmetric, so has only s_1 / 2 + 1 distinct coefficients. The sequence h_j will be stored in the same memory and won't be a monic polynomial, so the leading 1 monomial of F will be stored explicitly. Hence we need s_1 / 2 + 1 entries. */ mpzspm_t ntt_context; mpzspv_t g_ntt, h_ntt; mpz_t mt; /* All-purpose temp mpz_t */ mpz_t product; /* Product of each multi-point evaluation */ mpz_t *product_ptr = NULL; mpres_t tmpres; /* All-purpose temp mpres_t */ int youpi = ECM_NO_FACTOR_FOUND; long timetotalstart, realtotalstart, timestart, realstart; timetotalstart = cputime (); realtotalstart = realtime (); ASSERT_ALWAYS (eulerphi (params->P) == params->s_1 * params->s_2); ASSERT_ALWAYS (params->s_1 < params->l); nr = params->l - params->s_1; /* Number of points we evaluate */ /* Prepare NTT for computing the h sequence, its DCT-I, and the convolution with g. We need NTT of transform length l. We do it here at the start of stage 2 so that in case of a "not enough primes" condition, we don't have to wait until after F is built to get the error. */ ntt_context = mpzspm_init (params->l, modulus->orig_modulus); if (ntt_context == NULL) { outputf (OUTPUT_ERROR, "Could not initialise ntt_context, " "presumably out of memory\n"); return ECM_ERROR; } print_CRT_primes (OUTPUT_DEVVERBOSE, "CRT modulus for evaluation = ", ntt_context); if (make_S_1_S_2 (&S_1, &S_2, params) == ECM_ERROR) return ECM_ERROR; /* Allocate all the memory we'll need for building f */ mpz_init (mt); mpres_init (tmpres, modulus); lenF = params->s_1 / 2 + 1 + 1; /* Another +1 because poly_from_sets_V stores the leading 1 monomial for each factor */ F = init_list2 (lenF, (unsigned int) abs (modulus->bits)); mpres_get_z (mt, X, modulus); /* mpz_t copy of X for printing */ outputf (OUTPUT_TRACE, "N = %Zd; X = Mod(%Zd, N); /* PARI */\n", modulus->orig_modulus, mt); #if 0 && defined (WANT_ASSERT) /* For this self test run with a large enough B2 so that enough memory is allocated for tmp and F_ntt, otherwise it segfaults. */ { int testlen = 255; int i, j; /* A test of ntt_sqr_reciprocal() */ for (j = 1; j <= testlen; j++) { outputf (OUTPUT_VERBOSE, "Testing ntt_sqr_reciprocal() for input degree %d\n", j - 1); for (i = 0; i < j; i++) mpz_set_ui (tmp[i], 1UL); ntt_sqr_reciprocal (tmp, tmp, F_ntt, (spv_size_t) j, ntt_context_F); for (i = 0; i < 2 * j - 1; i++) { ASSERT (mpz_cmp_ui (tmp[i], 2 * j - 1 - i) == 0); } } outputf (OUTPUT_VERBOSE, "Test of ntt_sqr_reciprocal() for input degree 2 ... %d passed\n", testlen - 1); } #endif /* First compute X + 1/X */ mpres_invert (tmpres, X, modulus); mpres_add (tmpres, tmpres, X, modulus); if (build_F_ntt (F, tmpres, S_1, params, modulus) == ECM_ERROR) { free (S_1); free (S_2); mpz_clear (mt); mpres_clear (tmpres, modulus); mpzspm_clear (ntt_context); clear_list (F, lenF); return ECM_ERROR; } free (S_1); S_1 = NULL; h_ntt = mpzspv_init (params->l / 2 + 1, ntt_context); mpz_set_ui (mt, params->P); mpres_pow (tmpres, X, mt, modulus); /* tmpres = X^P */ pm1_sequence_h (NULL, h_ntt, F, tmpres, params->s_1 / 2 + 1, modulus, ntt_context); clear_list (F, lenF); g_ntt = mpzspv_init (params->l, ntt_context); /* Compute the DCT-I of h */ outputf (OUTPUT_VERBOSE, "Computing DCT-I of h"); #ifdef _OPENMP outputf (OUTPUT_VERBOSE, " using %d threads", omp_get_thread_limit()); #endif timestart = cputime (); realstart = realtime (); mpzspv_to_dct1 (h_ntt, h_ntt, params->s_1 / 2 + 1, params->l / 2 + 1, g_ntt, ntt_context); print_elapsed_time (OUTPUT_VERBOSE, timestart, realstart); if (test_verbose (OUTPUT_RESVERBOSE)) { mpz_init (product); product_ptr = &product; } for (l = 0; l < params->s_2; l++) { const unsigned long M = params->l - 1L - params->s_1 / 2L; outputf (OUTPUT_VERBOSE, "Multi-point evaluation %lu of %lu:\n", l + 1, params->s_2); /* Compute the coefficients of the polynomial g(x) */ pm1_sequence_g (NULL, g_ntt, X, params->P, M, params->l, params->m_1, S_2->elem[l], modulus, ntt_context); /* Do the convolution */ outputf (OUTPUT_VERBOSE, "Computing g*h"); #ifdef _OPENMP outputf (OUTPUT_VERBOSE, " using %d threads", omp_get_thread_limit()); #endif timestart = cputime (); realstart = realtime (); mpzspv_mul_by_dct (g_ntt, h_ntt, params->l, ntt_context, NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT); print_elapsed_time (OUTPUT_VERBOSE, timestart, realstart); /* Compute GCD of N and coefficients of product polynomial */ ntt_gcd (mt, product_ptr, g_ntt, params->s_1 / 2, NULL, nr, ntt_context, modulus); outputf (OUTPUT_RESVERBOSE, "Product of R[i] = %Zd (times some " "power of 2 if REDC was used! Try -mpzmod)\n", product); /* If we found a factor, stop */ if (mpz_cmp_ui (mt, 1UL) > 0) { mpz_set (f, mt); youpi = ECM_FACTOR_FOUND_STEP2; break; } } if (test_verbose (OUTPUT_RESVERBOSE)) { product_ptr = NULL; mpz_clear (product); } mpzspv_clear (g_ntt, ntt_context); mpzspv_clear (h_ntt, ntt_context); mpzspm_clear (ntt_context); mpres_clear (tmpres, modulus); mpz_clear (mt); free (S_2); outputf (OUTPUT_NORMAL, "Step 2"); /* In normal output mode, print only cpu time as we always have. In verbose mode, print real time as well if we used multi-threading */ if (test_verbose (OUTPUT_VERBOSE)) print_elapsed_time (OUTPUT_NORMAL, timetotalstart, realtotalstart); else print_elapsed_time (OUTPUT_NORMAL, timetotalstart, 0L); return youpi; } static void gfp_ext_print (const mpres_t r_x, const mpres_t r_y, mpmod_t modulus, const int verbose) { mpz_t t1, t2; if (!test_verbose (verbose)) return; mpz_init (t1); mpz_init (t2); mpres_get_z (t1, r_x, modulus); mpres_get_z (t2, r_y, modulus); outputf (verbose, "Mod(%Zd, N) + Mod(%Zd, N) * w", t1, t2); mpz_clear (t1); mpz_clear (t2); } /* Multiplies (a_0 + a_1*sqrt(Delta)) * (b_0 + b_1*sqrt(Delta)) using four multiplications. Result goes in (r_0 + r_1*sqrt(Delta)). a_0, b_0, r_0 as well as a_1, b_1, r_1 may overlap arbitrarily. t[0], t[1], t[2] and Delta must not overlap with anything. */ /* FIXME: is there a faster multiplication routine if both inputs have norm 1? */ static void gfp_ext_mul (mpres_t r_0, mpres_t r_1, const mpres_t a_0, const mpres_t a_1, const mpres_t b_0, const mpres_t b_1, const mpres_t Delta, mpmod_t modulus, ATTRIBUTE_UNUSED const unsigned long tmplen, mpres_t *tmp) { ASSERT (tmplen >= 2); if (0 && test_verbose (OUTPUT_TRACE)) { mpz_t t; mpz_init (t); mpres_get_z (t, Delta, modulus); outputf (OUTPUT_TRACE, "/* gfp_ext_mul */ w = quadgen (4*%Zd); " "N = %Zd; /* PARI */\n", t, modulus->orig_modulus); mpz_clear (t); outputf (OUTPUT_TRACE, "/* gfp_ext_mul */ ("); gfp_ext_print (a_0, a_1, modulus, OUTPUT_TRACE); outputf (OUTPUT_TRACE, ") * ("); gfp_ext_print (b_0, b_1, modulus, OUTPUT_TRACE); } mpres_add (tmp[0], a_0, a_1, modulus); mpres_add (tmp[1], b_0, b_1, modulus); mpres_mul (tmp[1], tmp[0], tmp[1], modulus); /* t[1] = (a_0+a_1)*(b_0+b_1) = a_0*b_0 + a_0*b_1 + a_1*b_0 + a_1*b_1 */ mpres_mul (r_0, a_0, b_0, modulus); /* r_0 = a_0*b_0. We don't need a_0 or b_0 any more now */ mpres_sub (tmp[1], tmp[1], r_0, modulus); /* t[1] = a_0*b_1 + a_1*b_0 + a_1*b_1 */ mpres_mul (tmp[0], a_1, b_1, modulus); /* t[0] = a_1*b_1. We don't need a_1 or b_1 any more now */ mpres_sub (r_1, tmp[1], tmp[0], modulus); /* r_1 == a_0*b_1 + a_1*b_0 */ mpres_mul (tmp[0], tmp[0], Delta, modulus); /* t[0] = a_1*b_1*Delta */ mpres_add (r_0, r_0, tmp[0], modulus); /* r_0 = a_0*b_0 + a_1*b_1*Delta */ if (0 && test_verbose (OUTPUT_TRACE)) { outputf (OUTPUT_TRACE, ") == "); gfp_ext_print (r_0, r_1, modulus, OUTPUT_TRACE); outputf (OUTPUT_TRACE, " /* PARI C */\n"); } } /* Computes (a_0 + a_1 * sqrt(Delta))^2, where the norm (a_0^2 - a_1^2*Delta) is assumed to be equal to 1. Hence (a_0 + a_1 * sqrt(Delta))^2 = a_0^2 + 2*a_0*a_1*sqrt(Delta) + a_1^2*Delta and a_0^2 + a_1^2*Delta = a_0^2 + a_1^2*Delta + norm - 1 = 2*a_0^2 - 1. a_0 and r_0, as well as a_1 and r_1 may overlap */ static void gfp_ext_sqr_norm1 (mpres_t r_0, mpres_t r_1, const mpres_t a_0, const mpres_t a_1, mpmod_t modulus) { ASSERT (a_0 != r_1); /* a_0 is read after r_1 is written */ if (pari) gmp_printf ("/* gfp_ext_sqr_norm1 */ (%Zd + %Zd * w)^2 %% N == ", a_0, a_1); mpres_mul (r_1, a_0, a_1, modulus); mpres_add (r_1, r_1, r_1, modulus); /* r_1 = 2*a_0*a_1 */ mpres_sqr (r_0, a_0, modulus); mpres_add (r_0, r_0, r_0, modulus); mpres_sub_ui (r_0, r_0, 1UL, modulus); /* r_0 = 2*a_0^2 - 1 */ if (pari) gmp_printf ("(%Zd + %Zd * w) %% N /* PARI C */\n", r_0, r_1); } /* Raise (a0 + a1*sqrt(Delta)) to the power e which is a signed long int. (a0 + a1*sqrt(Delta)) is assumed to have norm 1, i.e. a0^2 - a1^2*Delta == 1. The result is (r0 * r1*sqrt(Delta)). a0, a1, r0 and r1 must not overlap */ static void gfp_ext_pow_norm1_sl (mpres_t r0, mpres_t r1, const mpres_t a0, const mpres_t a1, const long e, const mpres_t Delta, mpmod_t modulus, unsigned long tmplen, mpres_t *tmp) { const unsigned long abs_e = labs (e); unsigned long mask = ~0UL - (~0UL >> 1); ASSERT (a0 != r0 && a1 != r0 && a0 != r1 && a1 != r1); if (e == 0) { mpres_set_ui (r0, 1UL, modulus); mpres_set_ui (r1, 0UL, modulus); return; } /* If e < 0, we want 1/(a0 + a1*sqrt(Delta)). By extending with a0 - a1*sqrt(Delta), we get (a0 - a1*sqrt(Delta)) / (a0^2 - a1^2 * Delta), but that denomiator is the norm which is known to be 1, so the result is a0 - a1*sqrt(Delta). */ while ((abs_e & mask) == 0UL) mask >>= 1; mpres_set (r0, a0, modulus); mpres_set (r1, a1, modulus); while (mask > 1UL) { gfp_ext_sqr_norm1 (r0, r1, r0, r1, modulus); mask >>= 1; if (abs_e & mask) gfp_ext_mul (r0, r1, r0, r1, a0, a1, Delta, modulus, tmplen, tmp); } if (e < 0) mpres_neg (r1, r1, modulus); if (0 && test_verbose (OUTPUT_TRACE)) { mpz_t t; mpz_init (t); mpres_get_z (t, Delta, modulus); outputf (OUTPUT_TRACE, "/* gfp_ext_pow_norm1_sl */ w = quadgen (4*%Zd); " "N = %Zd; /* PARI */\n", t, modulus->orig_modulus); mpz_clear (t); outputf (OUTPUT_TRACE, "/* gfp_ext_pow_norm1_sl */ ("); gfp_ext_print (a0, a1, modulus, OUTPUT_TRACE); outputf (OUTPUT_TRACE, ")^(%ld) == ", e); gfp_ext_print (r0, r1, modulus, OUTPUT_TRACE); outputf (OUTPUT_TRACE, " /* PARI C */\n"); } } /* Same, but taking an mpz_t argument for the exponent */ static void gfp_ext_pow_norm1 (mpres_t r0, mpres_t r1, const mpres_t a0, const mpres_t a1, mpz_t e, const mpres_t Delta, mpmod_t modulus, unsigned long tmplen, mpres_t *tmp) { mpz_t abs_e; unsigned long idx; ASSERT (a0 != r0 && a1 != r0 && a0 != r1 && a1 != r1); if (mpz_sgn (e) == 0) { mpres_set_ui (r0, 1UL, modulus); mpres_set_ui (r1, 0UL, modulus); return; } mpz_init (abs_e); mpz_abs (abs_e, e); idx = mpz_sizeinbase (abs_e, 2) - 1; /* Thus mpz_tstbit (abs_e, idx) == 1 */ ASSERT (mpz_tstbit (abs_e, idx) == 1); mpres_set (r0, a0, modulus); mpres_set (r1, a1, modulus); while (idx > 0UL) { gfp_ext_sqr_norm1 (r0, r1, r0, r1, modulus); idx--; if (mpz_tstbit (abs_e, idx)) gfp_ext_mul (r0, r1, r0, r1, a0, a1, Delta, modulus, tmplen, tmp); } if (mpz_sgn (e) < 0) mpres_neg (r1, r1, modulus); mpz_clear (abs_e); if (test_verbose (OUTPUT_TRACE)) { mpz_t t; mpz_init (t); mpres_get_z (t, Delta, modulus); outputf (OUTPUT_TRACE, "/* gfp_ext_pow_norm1 */ w = quadgen (4*%Zd); " "N = %Zd; /* PARI */\n", t, modulus->orig_modulus); mpz_clear (t); outputf (OUTPUT_TRACE, "/* gfp_ext_pow_norm1 */ ("); gfp_ext_print (a0, a1, modulus, OUTPUT_TRACE); outputf (OUTPUT_TRACE, ")^(%Zd) == ", e); gfp_ext_print (r0, r1, modulus, OUTPUT_TRACE); outputf (OUTPUT_TRACE, " /* PARI C */\n"); } } /* Compute r[i] = a^((k+i)^2) for i = 0, 1, ..., l-1, where "a" is an element of norm 1 in the quadratic extension ring */ ATTRIBUTE_UNUSED static void gfp_ext_rn2 (mpres_t *r_x, mpres_t *r_y, const mpres_t a_x, const mpres_t a_y, const long k, const unsigned long l, const mpres_t Delta, mpmod_t modulus, const unsigned long origtmplen, mpres_t *origtmp) { mpres_t *r2_x = origtmp, *r2_y = origtmp + 2, *v = origtmp + 4, *V2 = origtmp + 6; const unsigned long newtmplen = origtmplen - 7; mpres_t *newtmp = origtmp + 7; unsigned long i; if (l == 0UL) return; ASSERT (origtmplen >= 8UL); if (pari) gmp_printf ("/* In gfp_ext_rn2 */ ; a = %Zd + %Zd * w; /* PARI */\n", a_x, a_y, modulus->orig_modulus); /* Compute r[0] = a^(k^2). We do it by two exponentiations by k and use v[0] and v[1] as temp storage */ gfp_ext_pow_norm1_sl (v[0], v[1], a_x, a_y, k, Delta, modulus, newtmplen, newtmp); gfp_ext_pow_norm1_sl (r_x[0], r_y[0], v[0], v[1], k, Delta, modulus, newtmplen, newtmp); if (pari) gmp_printf ("/* In gfp_ext_rn2 */ a^(%ld^2) %% N == (%Zd + %Zd * w) %% N " "/* PARI C */\n", k, r_x[0], r_y[0]); /* Compute r[1] = a^((k+1)^2) = a^(k^2 + 2k + 1)*/ if (l > 1) { /* v[0] + v[1]*sqrt(Delta) still contains a^k */ gfp_ext_sqr_norm1 (r_x[1], r_y[1], v[0], v[1], modulus); /* Now r[1] = a^(2k) */ gfp_ext_mul (r_x[1], r_y[1], r_x[1], r_y[1], r_x[0], r_y[0], Delta, modulus, newtmplen, newtmp); /* Now r[1] = a^(k^2 + 2k) */ gfp_ext_mul (r_x[1], r_y[1], r_x[1], r_y[1], a_x, a_y, Delta, modulus, newtmplen, newtmp); /* Now r[1] = a^(k^2 + 2k + 1) = a^((k+1)^2) */ } if (pari) gmp_printf ("/* In gfp_ext_rn2 */ a^(%ld^2) %% N == (%Zd + %Zd * w) %% N " "/* PARI C */\n", k + 1, r_x[1], r_y[1]); /* Compute r2[0] = a^(k^2+2) = a^(k^2) * a^2 */ gfp_ext_sqr_norm1 (v[0], v[1], a_x, a_y, modulus); gfp_ext_mul (r2_x[0], r2_y[0], r_x[0], r_y[0], v[0], v[1], Delta, modulus, newtmplen, newtmp); if (pari) gmp_printf ("/* In gfp_ext_rn2 */ a^(%ld^2+2) %% N == (%Zd + %Zd * w) %% N " "/* PARI C */\n", k, r2_x[0], r2_y[0]); /* Compute a^((k+1)^2+2) = a^((k+1)^2) * a^2 */ gfp_ext_mul (r2_x[1], r2_y[1], r_x[1], r_y[1], v[0], v[1], Delta, modulus, newtmplen, newtmp); if (pari) gmp_printf ("/* In gfp_ext_rn2 */ a^(%ld^2+2) %% N == (%Zd + %Zd * w) %% N " "/* PARI C */\n", k + 1, r2_x[1], r2_y[1]); /* Compute V_2(a + 1/a). Since 1/a = a_x - a_y, we have a+1/a = 2*a_x. V_2(x) = x^2 - 2, so we want 4*a_x^2 - 2. */ mpres_add (*V2, a_x, a_x, modulus); /* V2 = a + 1/a = 2*a_x*/ V (v[0], *V2, 2 * k + 1, modulus); /* v[0] = V_{2k+1} (a + 1/a) */ V (v[1], *V2, 2 * k + 3, modulus); /* v[0] = V_{2k+3} (a + 1/a) */ mpres_sqr (*V2, *V2, modulus); /* V2 = 4*a_x^2 */ mpres_sub_ui (*V2, *V2, 2UL, modulus); /* V2 = 4*a_x^2 - 2 */ if (pari) { gmp_printf ("/* In gfp_ext_rn2 */ ((a + 1/a)^2 - 2) %% N == " "%Zd %% N /* PARI C */\n", *V2); gmp_printf ("/* In gfp_ext_rn2 */ V(%lu, a + 1/a) %% N == %Zd %% N " "/* PARI C */\n", 2 * k + 1, v[0]); gmp_printf ("/* In gfp_ext_rn2 */ V(%lu, a + 1/a) %% N == %Zd %% N " "/* PARI C */\n", 2 * k + 3, v[1]); } /* Compute the remaining a^((k+i)^2) values according to Peter's recurrence */ for (i = 2; i < l; i++) { /* r[i] = r2[i-1] * v[i-2] - r2[i-2], with indices of r2 and i taken modulo 2 */ mpres_mul (r_x[i], r2_x[1 - i % 2], v[i % 2], modulus); mpres_sub (r_x[i], r_x[i], r2_x[i % 2], modulus); mpres_mul (r_y[i], r2_y[1 - i % 2], v[i % 2], modulus); mpres_sub (r_y[i], r_y[i], r2_y[i % 2], modulus); /* r2[i] = r2[i-1] * v[i-1] - r[i-2] */ mpres_mul (r2_x[i % 2], r2_x[1 - i % 2], v[1 - i % 2], modulus); mpres_sub (r2_x[i % 2], r2_x[i % 2], r_x[i - 2], modulus); mpres_mul (r2_y[i % 2], r2_y[1 - i % 2], v[1 - i % 2], modulus); mpres_sub (r2_y[i % 2], r2_y[i % 2], r_y[i - 2], modulus); /* v[i] = v[i - 1] * V_2(a + 1/a) - v[i - 2] */ mpres_mul (newtmp[0], v[1 - i % 2], *V2, modulus); mpres_sub (v[i % 2], newtmp[0], v[i % 2], modulus); if (pari) gmp_printf ("/* In gfp_ext_rn2 */ V(%lu, a + 1/a) %% N == %Zd %% N " "/* PARI C */\n", 2 * (k + i) + 1, v[i % 2]); } } /* Compute g_i = x_0^{M-i} * r^{(M-i)^2} for 0 <= i < l. x_0 = b_1^{2*k_2 + (2*m_1 + 1) * P}. r = b_1^P. */ static void pp1_sequence_g (listz_t g_x, listz_t g_y, mpzspv_t g_x_ntt, mpzspv_t g_y_ntt, const mpres_t b1_x, const mpres_t b1_y, const unsigned long P, const mpres_t Delta, const long M_param, const unsigned long l_param, const mpz_t m_1, const long k_2, const mpmod_t modulus_param, const mpzspm_t ntt_context) { const unsigned long tmplen = 3; const int want_x = (g_x != NULL || g_x_ntt != NULL); const int want_y = (g_y != NULL || g_y_ntt != NULL); mpres_t r_x, r_y, x0_x, x0_y, v2, r1_x[2], r1_y[2], r2_x[2], r2_y[2], v[2], tmp[3]; mpz_t mt; mpmod_t modulus; /* Thread-local copy of modulus_param */ unsigned long i, l = l_param, offset = 0; long M = M_param; long timestart, realstart; int want_output = 1; outputf (OUTPUT_VERBOSE, "Computing %s%s%s", (want_x) ? "g_x" : "", (want_x && want_y) ? " and " : "", (want_y) ? "g_y" : ""); timestart = cputime (); realstart = realtime (); #ifdef _OPENMP #pragma omp parallel if (l > 100) private(r_x, r_y, x0_x, x0_y, v2, r1_x, r1_y, r2_x, r2_y, v, tmp, mt, modulus, i, l, offset, M, want_output) { /* When multi-threading, we adjust the parameters for each thread */ const int nr_chunks = omp_get_num_threads(); const int thread_nr = omp_get_thread_num(); l = (l_param - 1) / nr_chunks + 1; offset = thread_nr * l; ASSERT_ALWAYS (l_param >= offset); l = MIN(l, l_param - offset); M = M_param - (long) offset; want_output = (omp_get_thread_num() == 0); if (want_output) outputf (OUTPUT_VERBOSE, " using %d threads", nr_chunks); #endif mpmod_init_set (modulus, modulus_param); mpres_init (r_x, modulus); mpres_init (r_y, modulus); mpres_init (x0_x, modulus); mpres_init (x0_y, modulus); mpres_init (v2, modulus); for (i = 0; i < 2UL; i++) { mpres_init (r1_x[i], modulus); mpres_init (r1_y[i], modulus); mpres_init (r2_x[i], modulus); mpres_init (r2_y[i], modulus); mpres_init (v[i], modulus); } for (i = 0; i < tmplen; i++) mpres_init (tmp[i], modulus); mpz_init (mt); if (want_output && test_verbose (OUTPUT_TRACE)) { mpres_get_z (mt, Delta, modulus); outputf (OUTPUT_TRACE, "\n/* pp1_sequence_g */ w = quadgen (4*%Zd); P = %lu; " "M = %ld; k_2 = %ld; m_1 = %Zd; N = %Zd; /* PARI */\n", mt, P, M, k_2, m_1, modulus->orig_modulus); outputf (OUTPUT_TRACE, "/* pp1_sequence_g */ b_1 = "); gfp_ext_print (b1_x, b1_y, modulus, OUTPUT_TRACE); outputf (OUTPUT_TRACE, "; /* PARI */\n"); outputf (OUTPUT_TRACE, "/* pp1_sequence_g */ r = b_1^P; /* PARI */\n"); outputf (OUTPUT_TRACE, "/* pp1_sequence_g */ " "x_0 = b_1^(2*k_2 + (2*m_1 + 1) * P); /* PARI */\n"); outputf (OUTPUT_TRACE, "/* pp1_sequence_g */ addrec(x) = x + 1/x; /* PARI */\n"); } /* Compute r */ gfp_ext_pow_norm1_sl (r_x, r_y, b1_x, b1_y, P, Delta, modulus, tmplen, tmp); if (want_output && test_verbose (OUTPUT_TRACE)) { outputf (OUTPUT_TRACE, "/* pp1_sequence_g */ r == "); gfp_ext_print (r_x, r_y, modulus, OUTPUT_TRACE); outputf (OUTPUT_TRACE, " /* PARI C */\n"); } /* Compute x0 = x_0 */ mpz_mul_2exp (mt, m_1, 1UL); mpz_add_ui (mt, mt, 1UL); mpz_mul_ui (mt, mt, P); mpz_add_si (mt, mt, k_2); mpz_add_si (mt, mt, k_2); /* mt = 2*k_2 + (2*m_1 + 1) * P */ gfp_ext_pow_norm1 (x0_x, x0_y, b1_x, b1_y, mt, Delta, modulus, tmplen, tmp); if (want_output && test_verbose (OUTPUT_TRACE)) { outputf (OUTPUT_TRACE, "/* pp1_sequence_g */ x_0 == "); gfp_ext_print (x0_x, x0_y, modulus, OUTPUT_TRACE); outputf (OUTPUT_TRACE, " /* PARI C */\n"); } /* Compute g[1] = r1[0] = x0^M * r^(M^2) = (x0 * r^M)^M. We use v[0,1] as temporary storage */ gfp_ext_pow_norm1_sl (v[0], v[1], r_x, r_y, M, Delta, modulus, tmplen, tmp); /* v[0,1] = r^M */ gfp_ext_mul (v[0], v[1], v[0], v[1], x0_x, x0_y, Delta, modulus, tmplen, tmp); /* v[0,1] = r^M * x_0 */ gfp_ext_pow_norm1_sl (r1_x[0], r1_y[0], v[0], v[1], M, Delta, modulus, tmplen, tmp); /* r1[0] = (r^M * x_0)^M */ if (g_x != NULL) mpres_get_z (g_x[offset], r1_x[0], modulus); if (g_y != NULL) mpres_get_z (g_y[offset], r1_y[0], modulus); if (g_x_ntt != NULL) { mpres_get_z (mt, r1_x[0], modulus); mpzspv_from_mpzv (g_x_ntt, offset, &mt, 1UL, ntt_context); } if (g_y_ntt != NULL) { mpres_get_z (mt, r1_y[0], modulus); mpzspv_from_mpzv (g_y_ntt, offset, &mt, 1UL, ntt_context); } /* Compute g[1] = r1[1] = x0^(M-1) * r^((M-1)^2) = (x0 * r^(M-1))^(M-1). We use v[0,1] as temporary storage. FIXME: simplify, reusing g_0 */ gfp_ext_pow_norm1_sl (v[0], v[1], r_x, r_y, M - 1, Delta, modulus, tmplen, tmp); gfp_ext_mul (v[0], v[1], v[0], v[1], x0_x, x0_y, Delta, modulus, tmplen, tmp); gfp_ext_pow_norm1_sl (r1_x[1], r1_y[1], v[0], v[1], M - 1, Delta, modulus, tmplen, tmp); if (g_x != NULL) mpres_get_z (g_x[offset + 1], r1_x[1], modulus); if (g_y != NULL) mpres_get_z (g_y[offset + 1], r1_y[1], modulus); if (g_x_ntt != NULL) { mpres_get_z (mt, r1_x[1], modulus); mpzspv_from_mpzv (g_x_ntt, offset + 1, &mt, 1UL, ntt_context); } if (g_y_ntt != NULL) { mpres_get_z (mt, r1_y[1], modulus); mpzspv_from_mpzv (g_y_ntt, offset + 1, &mt, 1UL, ntt_context); } /* x0 := $x_0 * r^{2M - 3}$ */ /* We don't need x0 after this so we overwrite it. We use v[0,1] as temp storage for $r^{2M - 3}$. */ gfp_ext_pow_norm1_sl (v[0], v[1], r_x, r_y, 2UL*M - 3UL, Delta, modulus, tmplen, tmp); gfp_ext_mul (x0_x, x0_y, x0_x, x0_y, v[0], v[1], Delta, modulus, tmplen, tmp); /* Compute r2[0] = r1[0] * r^2 and r2[1] = r1[1] * r^2. */ /* We only need $r^2$ from here on, so we set r = $r^2$ */ gfp_ext_sqr_norm1 (r_x, r_y, r_x, r_y, modulus); gfp_ext_mul (r2_x[0], r2_y[0], r1_x[0], r1_y[0], r_x, r_y, Delta, modulus, tmplen, tmp); gfp_ext_mul (r2_x[1], r2_y[1], r1_x[1], r1_y[1], r_x, r_y, Delta, modulus, tmplen, tmp); /* v[1] := $x_0 * r^{2*M - 3} + 1/(x_0 * r^{2M - 3}) */ mpres_add (v[1], x0_x, x0_x, modulus); /* x0 := x0 * r = $x_0 * r^{2M - 1}$ */ gfp_ext_mul (x0_x, x0_y, x0_x, x0_y, r_x, r_y, Delta, modulus, tmplen, tmp); /* v[0] := $x_0 * r^{2M - 1} + 1/(x_0 * r^{2M - 1}) */ mpres_add (v[0], x0_x, x0_x, modulus); /* v2 = V_2 (r + 1/r) = r^2 + 1/r^2 */ mpres_add (v2, r_x, r_x, modulus); /* We don't need the contents of r any more and use it as a temp var */ for (i = 2; i < l; i++) { if (want_x) { /* r1[i] = r2[i-1] * v[i-2] - r2[i-2], with indices of r2 and i taken modulo 2. We store the new r1_x[i] in r_x for now */ mpres_mul (r_x, r2_x[1 - i % 2], v[i % 2], modulus); mpres_sub (r_x, r_x, r2_x[i % 2], modulus); /* r2[i] = r2[i-1] * v[i-1] - r1[i-2] */ mpres_mul (r2_x[i % 2], r2_x[1 - i % 2], v[1 - i % 2], modulus); mpres_sub (r2_x[i % 2], r2_x[i % 2], r1_x[i % 2], modulus); mpres_set (r1_x[i % 2], r_x, modulus); /* FIXME, avoid this copy */ if (g_x != NULL) mpres_get_z (g_x[offset + i], r_x, modulus); /* FIXME, avoid these REDC */ if (g_x_ntt != NULL) { mpres_get_z (mt, r_x, modulus); mpzspv_from_mpzv (g_x_ntt, offset + i, &mt, 1UL, ntt_context); } } if (want_y) { /* Same for y coordinate */ mpres_mul (r_y, r2_y[1 - i % 2], v[i % 2], modulus); mpres_sub (r_y, r_y, r2_y[i % 2], modulus); mpres_mul (r2_y[i % 2], r2_y[1 - i % 2], v[1 - i % 2], modulus); mpres_sub (r2_y[i % 2], r2_y[i % 2], r1_y[i % 2], modulus); mpres_set (r1_y[i % 2], r_y, modulus); if (g_y != NULL) mpres_get_z (g_y[offset + i], r_y, modulus); /* Keep r1, r2 in mpz_t ? */ if (g_y_ntt != NULL) { mpres_get_z (mt, r_y, modulus); mpzspv_from_mpzv (g_y_ntt, offset + i, &mt, 1UL, ntt_context); } } /* v[i] = v[i - 1] * V_2(a + 1/a) - v[i - 2] */ mpres_mul (r_x, v[1 - i % 2], v2, modulus); mpres_sub (v[i % 2], r_x, v[i % 2], modulus); if (want_output && test_verbose (OUTPUT_TRACE)) { mpz_t t; mpz_init (t); mpres_get_z (t, v[i % 2], modulus); outputf (OUTPUT_TRACE, "/* pp1_sequence_g */ " "addrec(x_0 * r^(2*(M-%lu) - 1)) == %Zd /* PARI C */\n", i, t); mpz_clear (t); } } mpres_clear (r_x, modulus); mpres_clear (r_y, modulus); mpres_clear (x0_x, modulus); mpres_clear (x0_y, modulus); mpres_clear (v2, modulus); for (i = 0; i < 2; i++) { mpres_clear (r1_x[i], modulus); mpres_clear (r1_y[i], modulus); mpres_clear (r2_x[i], modulus); mpres_clear (r2_y[i], modulus); mpres_clear (v[i], modulus); } for (i = 0; i < tmplen; i++) mpres_clear (tmp[i], modulus); mpz_clear (mt); mpmod_clear (modulus); #ifdef _OPENMP } #endif print_elapsed_time (OUTPUT_VERBOSE, timestart, realstart); if (g_x != NULL && g_y != NULL && test_verbose(OUTPUT_TRACE)) { for (i = 0; i < l_param; i++) { outputf (OUTPUT_TRACE, "/* pp1_sequence_g */ g_%lu = " "x_0^(M-%lu) * r^((M-%lu)^2); /* PARI */", i, i, i); outputf (OUTPUT_TRACE, "/* pp1_sequence_g */ g_%lu == " "%Zd + %Zd*w /* PARI C */\n", i, g_x[i], g_y[i]); } } } /* Compute r[i] = b1^(-P*(k+i)^2) * f_i for i = 0, 1, ..., l-1, where "b1" is an element of norm 1 in the quadratic extension ring */ static void pp1_sequence_h (listz_t h_x, listz_t h_y, mpzspv_t h_x_ntt, mpzspv_t h_y_ntt, const listz_t f, const mpres_t b1_x, const mpres_t b1_y, const long k_param, const unsigned long l_param, const unsigned long P, const mpres_t Delta, mpmod_t modulus_param, const mpzspm_t ntt_context) { unsigned long i; long timestart, realstart; if (l_param == 0UL) return; ASSERT (f != h_x); ASSERT (f != h_y); outputf (OUTPUT_VERBOSE, "Computing h_x and h_y"); timestart = cputime (); realstart = realtime (); if (test_verbose (OUTPUT_TRACE)) { mpz_t t; mpz_init (t); mpres_get_z (t, Delta, modulus_param); outputf (OUTPUT_TRACE, "\n/* pp1_sequence_h */ N = %Zd; " "Delta = %Zd; w = quadgen (4*Delta); k = %ld; P = %lu; " "/* PARI */\n", modulus_param->orig_modulus, t, k_param, P); outputf (OUTPUT_TRACE, "/* pp1_sequence_h */ b_1 = "); gfp_ext_print (b1_x, b1_y, modulus_param, OUTPUT_TRACE); outputf (OUTPUT_TRACE, "; r = b_1^P; rn = b_1^(-P); /* PARI */\n"); for (i = 0; i < l_param; i++) outputf (OUTPUT_TRACE, "/* pp1_sequence_h */ f_%lu = %Zd; /* PARI */\n", i, f[i]); mpz_clear (t); } #ifdef _OPENMP #pragma omp parallel if (l_param > 100) private(i) #endif { const size_t tmplen = 2; mpres_t s_x[3], s_y[3], s2_x[2], s2_y[2], v[2], V2, rn_x, rn_y, tmp[2]; mpmod_t modulus; /* Thread-local copy of modulus_param */ mpz_t mt; unsigned long l = l_param, offset = 0; long k = k_param; #ifdef _OPENMP /* When multi-threading, we adjust the parameters for each thread */ const int nr_chunks = omp_get_num_threads(); const int thread_nr = omp_get_thread_num(); l = (l_param - 1) / nr_chunks + 1; offset = thread_nr * l; ASSERT_ALWAYS (l_param >= offset); l = MIN(l, l_param - offset); if (thread_nr == 0) outputf (OUTPUT_VERBOSE, " using %d threads", nr_chunks); outputf (OUTPUT_TRACE, "\n"); #endif /* Each thread computes r[i + offset] = b1^(-P*(k+i+offset)^2) * f_i for i = 0, 1, ..., l-1, where l is the adjusted length of each thread */ /* Test that k+offset does not overflow */ ASSERT_ALWAYS (offset <= (unsigned long) LONG_MAX && k <= LONG_MAX - (long) offset); k += (long) offset; mpz_init (mt); /* Make thread-local copy of modulus */ mpmod_init_set (modulus, modulus_param); /* Init the local mpres_t variables */ for (i = 0; i < 2; i++) { mpres_init (s_x[i], modulus); mpres_init (s_y[i], modulus); mpres_init (s2_x[i], modulus); mpres_init (s2_y[i], modulus); mpres_init (v[i], modulus); } mpres_init (s_x[2], modulus); mpres_init (s_y[2], modulus); mpres_init (V2, modulus); mpres_init (rn_x, modulus); mpres_init (rn_y, modulus); for (i = 0; i < (unsigned long) tmplen; i++) mpres_init (tmp[i], modulus); /* Compute rn = b_1^{-P}. It has the same value for all threads, but we make thread local copies anyway. */ gfp_ext_pow_norm1_sl (rn_x, rn_y, b1_x, b1_y, P, Delta, modulus, tmplen, tmp); mpres_neg (rn_y, rn_y, modulus); /* Compute s[0] = rn^(k^2) = r^(-k^2). We do it by two exponentiations by k and use v[0] and v[1] as temp storage */ gfp_ext_pow_norm1_sl (v[0], v[1], rn_x, rn_y, k, Delta, modulus, tmplen, tmp); gfp_ext_pow_norm1_sl (s_x[0], s_y[0], v[0], v[1], k, Delta, modulus, tmplen, tmp); if (test_verbose (OUTPUT_TRACE)) { #ifdef _OPENMP #pragma omp critical #endif { outputf (OUTPUT_TRACE, "/* pp1_sequence_h */ rn^(%ld^2) == ", k); gfp_ext_print (s_x[0], s_y[0], modulus, OUTPUT_TRACE); outputf (OUTPUT_TRACE, " /* PARI C */\n"); } } /* Compute s[1] = r^(-(k+1)^2) = r^(-(k^2 + 2k + 1))*/ if (l > 1) { /* v[0] + v[1]*sqrt(Delta) still contains rn^k */ gfp_ext_sqr_norm1 (s_x[1], s_y[1], v[0], v[1], modulus); /* Now s[1] = r^(-2k) */ gfp_ext_mul (s_x[1], s_y[1], s_x[1], s_y[1], s_x[0], s_y[0], Delta, modulus, tmplen, tmp); /* Now s[1] = r^(-(k^2 + 2k)) */ gfp_ext_mul (s_x[1], s_y[1], s_x[1], s_y[1], rn_x, rn_y, Delta, modulus, tmplen, tmp); /* Now s[1] = r^(-(k^2 + 2k + 1)) = r^(-(k+1)^2) */ if (test_verbose (OUTPUT_TRACE)) { #ifdef _OPENMP #pragma omp critical #endif { outputf (OUTPUT_TRACE, "/* pp1_sequence_h */ rn^(%ld^2) == ", k + 1); gfp_ext_print (s_x[1], s_y[1], modulus, OUTPUT_TRACE); outputf (OUTPUT_TRACE, " /* PARI C */\n"); } } } /* Compute s2[0] = r^(k^2+2) = r^(k^2) * r^2 */ gfp_ext_sqr_norm1 (v[0], v[1], rn_x, rn_y, modulus); gfp_ext_mul (s2_x[0], s2_y[0], s_x[0], s_y[0], v[0], v[1], Delta, modulus, tmplen, tmp); if (test_verbose (OUTPUT_TRACE)) { #ifdef _OPENMP #pragma omp critical #endif { outputf (OUTPUT_TRACE, "/* pp1_sequence_h */ rn^(%ld^2+2) == ", k); gfp_ext_print (s2_x[0], s2_y[0], modulus, OUTPUT_TRACE); outputf (OUTPUT_TRACE, " /* PARI C */\n"); } } /* Compute a^((k+1)^2+2) = a^((k+1)^2) * a^2 */ gfp_ext_mul (s2_x[1], s2_y[1], s_x[1], s_y[1], v[0], v[1], Delta, modulus, tmplen, tmp); if (test_verbose (OUTPUT_TRACE)) { #ifdef _OPENMP #pragma omp critical #endif { outputf (OUTPUT_TRACE, "/* pp1_sequence_h */ rn^(%ld^2+2) == ", k + 1); gfp_ext_print (s2_x[1], s2_y[1], modulus, OUTPUT_TRACE); outputf (OUTPUT_TRACE, " /* PARI C */\n"); } } /* Compute V_2(r + 1/r). Since 1/r = rn_x - rn_y, we have r+1/r = 2*rn_x. V_2(x) = x^2 - 2, so we want 4*rn_x^2 - 2. */ mpres_add (V2, rn_x, rn_x, modulus); /* V2 = r + 1/r = 2*rn_x */ V (v[0], V2, 2 * k + 1, modulus); /* v[0] = V_{2k+1} (r + 1/r) */ V (v[1], V2, 2 * k + 3, modulus); /* v[1] = V_{2k+3} (r + 1/r) */ mpres_sqr (V2, V2, modulus); /* V2 = 4*a_x^2 */ mpres_sub_ui (V2, V2, 2UL, modulus); /* V2 = 4*a_x^2 - 2 */ if (test_verbose (OUTPUT_TRACE)) { #ifdef _OPENMP #pragma omp critical #endif { mpres_get_z (mt, V2, modulus); outputf (OUTPUT_TRACE, "/* pp1_sequence_h */ r^2 + 1/r^2 == %Zd " "/* PARI C */\n", mt); mpres_get_z (mt, v[0], modulus); outputf (OUTPUT_TRACE, "/* pp1_sequence_h */ r^(2*%ld+1) + " "1/r^(2*%ld+1) == %Zd /* PARI C */\n", k, k, mt); mpres_get_z (mt, v[1], modulus); outputf (OUTPUT_TRACE, "/* pp1_sequence_h */ r^(2*%ld+3) + " "1/r^(2*%ld+3) == %Zd /* PARI C */\n", k, k, mt); } } for (i = 0; i < 2UL && i < l; i++) { /* Multiply the 2nd coordinate by Delta, so that after the polynomial multipoint evaluation we get x1 + Delta*x2 */ mpres_mul (s_y[i], s_y[i], Delta, modulus); mpres_mul (s2_y[i], s2_y[i], Delta, modulus); if (h_x != NULL) mpres_mul_z_to_z (h_x[i + offset], s_x[i], f[i + offset], modulus); if (h_y != NULL) mpres_mul_z_to_z (h_y[i + offset], s_y[i], f[i + offset], modulus); if (h_x_ntt != NULL) { mpres_mul_z_to_z (mt, s_x[i], f[i + offset], modulus); mpzspv_from_mpzv (h_x_ntt, i + offset, &mt, 1UL, ntt_context); } if (h_y_ntt != NULL) { mpres_mul_z_to_z (mt, s_y[i], f[i + offset], modulus); mpzspv_from_mpzv (h_y_ntt, i + offset, &mt, 1UL, ntt_context); } } /* Compute the remaining r^((k+i)^2) values according to Peter's recurrence */ for (i = 2; i < l; i++) { if (h_x != NULL || h_x_ntt != NULL) { /* r[i] = r2[i-1] * v[i-2] - r2[i-2], with indices of r2 and i taken modulo 2 */ mpres_mul (s_x[i % 3], s2_x[1 - i % 2], v[i % 2], modulus); mpres_sub (s_x[i % 3], s_x[i % 3], s2_x[i % 2], modulus); /* r2[i] = r2[i-1] * v[i-1] - r[i-2] */ mpres_mul (s2_x[i % 2], s2_x[1 - i % 2], v[1 - i % 2], modulus); mpres_sub (s2_x[i % 2], s2_x[i % 2], s_x[(i - 2) % 3], modulus); if (h_x != NULL) mpres_mul_z_to_z (h_x[i + offset], s_x[i % 3], f[i + offset], modulus); if (h_x_ntt != NULL) { mpres_mul_z_to_z (mt, s_x[i % 3], f[i + offset], modulus); mpzspv_from_mpzv (h_x_ntt, i + offset, &mt, 1UL, ntt_context); } } if (h_y != NULL || h_y_ntt != NULL) { /* Same for y coordinate */ mpres_mul (s_y[i % 3], s2_y[1 - i % 2], v[i % 2], modulus); mpres_sub (s_y[i % 3], s_y[i % 3], s2_y[i % 2], modulus); mpres_mul (s2_y[i % 2], s2_y[1 - i % 2], v[1 - i % 2], modulus); mpres_sub (s2_y[i % 2], s2_y[i % 2], s_y[(i - 2) % 3], modulus); if (h_y != NULL) mpres_mul_z_to_z (h_y[i + offset], s_y[i % 3], f[i + offset], modulus); if (h_y_ntt != NULL) { mpres_mul_z_to_z (mt, s_y[i % 3], f[i + offset], modulus); mpzspv_from_mpzv (h_y_ntt, i + offset, &mt, 1UL, ntt_context); } } /* v[i] = v[i - 1] * V_2(a + 1/a) - v[i - 2] */ mpres_mul (tmp[0], v[1 - i % 2], V2, modulus); mpres_sub (v[i % 2], tmp[0], v[i % 2], modulus); } /* Clear the local mpres_t variables */ for (i = 0; i < 2; i++) { mpres_clear (s_x[i], modulus); mpres_clear (s_y[i], modulus); mpres_clear (s2_x[i], modulus); mpres_clear (s2_y[i], modulus); mpres_clear (v[i], modulus); } mpres_clear (s_x[2], modulus); mpres_clear (s_y[2], modulus); mpres_clear (V2, modulus); mpres_clear (rn_x, modulus); mpres_clear (rn_y, modulus); for (i = 0; i < tmplen; i++) mpres_clear (tmp[i], modulus); /* Clear the thread-local copy of modulus */ mpmod_clear (modulus); mpz_clear (mt); } print_elapsed_time (OUTPUT_VERBOSE, timestart, realstart); if (h_x != NULL && h_y != NULL && test_verbose (OUTPUT_TRACE)) { for (i = 0; i < l_param; i++) gmp_printf ("/* pp1_sequence_h */ (rn^((k+%lu)^2) * f_%lu) == " "(%Zd + Mod(%Zd / Delta, N) * w) /* PARI C */\n", i, i, h_x[i], h_y[i]); } } int pp1fs2 (mpz_t f, const mpres_t X, mpmod_t modulus, const faststage2_param_t *params) { unsigned long nr; unsigned long i, l, lenF, lenH, lenG, lenR, tmplen; sets_long_t *S_1; /* This is stored as a set of sets (arithmetic progressions of prime length */ set_long_t *S_2; /* This is stored as a regular set */ listz_t F; /* Polynomial F has roots X^{k_1} for k_1 \in S_1, so has degree s_1. It is symmetric, so has only s_1 / 2 + 1 distinct coefficients. The sequence h_j will be stored in the same memory and won't be a monic polynomial, so the leading 1 monomial of F will be stored explicitly. Hence we need s_1 / 2 + 1 entries. */ listz_t g_x, g_y, fh_x, fh_y, h_x, h_y, tmp, R_x, R_y; const unsigned long tmpreslen = 2UL; mpres_t b1_x, b1_y, Delta, tmpres[2]; mpz_t mt; /* All-purpose temp mpz_t */ int youpi = ECM_NO_FACTOR_FOUND; long timetotalstart, realtotalstart, timestart; timetotalstart = cputime (); realtotalstart = realtime (); ASSERT_ALWAYS (eulerphi (params->P) == params->s_1 * params->s_2); ASSERT_ALWAYS (params->s_1 < params->l); nr = params->l - params->s_1; /* Number of points we evaluate */ if (make_S_1_S_2 (&S_1, &S_2, params) == ECM_ERROR) return ECM_ERROR; /* Allocate all the memory we'll need */ /* Allocate the correct amount of space for each mpz_t or the reallocations will up to double the time for stage 2! */ mpz_init (mt); mpres_init (b1_x, modulus); mpres_init (b1_y, modulus); mpres_init (Delta, modulus); for (i = 0; i < tmpreslen; i++) mpres_init (tmpres[i], modulus); lenF = params->s_1 / 2 + 1 + 1; /* Another +1 because poly_from_sets_V stores the leading 1 monomial for each factor */ lenH = params->s_1 + 1; lenG = params->l; lenR = nr; F = init_list2 (lenF, (unsigned int) abs (modulus->bits)); fh_x = init_list2 (lenF, (unsigned int) abs (modulus->bits)); fh_y = init_list2 (lenF, (unsigned int) abs (modulus->bits)); h_x = malloc (lenH * sizeof (mpz_t)); h_y = malloc (lenH * sizeof (mpz_t)); if (h_x == NULL || h_y == NULL) { fprintf (stderr, "Cannot allocate memory in pp1fs2\n"); exit (1); } g_x = init_list2 (lenG, (unsigned int) abs (modulus->bits)); g_y = init_list2 (lenG, (unsigned int) abs (modulus->bits)); R_x = init_list2 (lenR, (unsigned int) abs (modulus->bits)); R_y = init_list2 (lenR, (unsigned int) abs (modulus->bits)); tmplen = 3UL * params->l + list_mul_mem (params->l / 2) + 20; outputf (OUTPUT_DEVVERBOSE, "tmplen = %lu\n", tmplen); if (TMulGen_space (params->l - 1, params->s_1, lenR) + 12 > tmplen) { tmplen = TMulGen_space (params->l - 1, params->s_1 - 1, lenR) + 12; /* FIXME: It appears TMulGen_space() returns a too small value! */ outputf (OUTPUT_DEVVERBOSE, "With TMulGen_space, tmplen = %lu\n", tmplen); } tmp = init_list2 (tmplen, (unsigned int) abs (modulus->bits)); if (test_verbose (OUTPUT_TRACE)) { mpres_get_z (mt, X, modulus); /* mpz_t copy of X for printing */ outputf (OUTPUT_TRACE, "N = %Zd; X = Mod(%Zd, N); /* PARI */\n", modulus->orig_modulus, mt); } /* Compute the polynomial f(x) = \prod_{k_1 in S_1} (x - X^{2 k_1}) */ outputf (OUTPUT_VERBOSE, "Computing F from factored S_1"); timestart = cputime (); i = poly_from_sets_V (F, X, S_1, tmp, tmplen, modulus, NULL, NULL); ASSERT_ALWAYS(2 * i == params->s_1); ASSERT(mpz_cmp_ui (F[i], 1UL) == 0); free (S_1); S_1 = NULL; outputf (OUTPUT_VERBOSE, " took %lums\n", cputime () - timestart); if (test_verbose (OUTPUT_TRACE)) { for (i = 0; i < params->s_1 / 2 + 1; i++) outputf (OUTPUT_TRACE, "f_%lu = %Zd; /* PARI */\n", i, F[i]); outputf (OUTPUT_TRACE, "f(x) = f_0"); for (i = 1; i < params->s_1 / 2 + 1; i++) outputf (OUTPUT_TRACE, "+ f_%lu * (x^%lu + x^(-%lu))", i, i, i); outputf (OUTPUT_TRACE, "/* PARI */ \n"); } /* Compute Delta and b1_x + b1_y * sqrt(Delta) = X) */ mpres_sqr (Delta, X, modulus); mpres_sub_ui (Delta, Delta, 4UL, modulus); mpres_div_2exp (b1_x, X, 1, modulus); mpres_set_ui (b1_y, 1UL, modulus); mpres_div_2exp (b1_y, b1_y, 1, modulus); if (test_verbose (OUTPUT_TRACE)) { mpres_get_z (mt, Delta, modulus); outputf (OUTPUT_TRACE, "Delta = Mod(%Zd, N); w = quadgen (4*lift(Delta)); b_1 = ", mt); gfp_ext_print (b1_x, b1_y, modulus, OUTPUT_TRACE); outputf (OUTPUT_TRACE, "; /* PARI */\n"); outputf (OUTPUT_TRACE, "X == b_1 + 1/b_1 /* PARI C */\n"); } /* Compute the h sequence h_j = b1^(P*-j^2) * f_j for 0 <= j <= s_1 */ pp1_sequence_h (fh_x, fh_y, NULL, NULL, F, b1_x, b1_y, 0L, params->s_1 / 2 + 1, params->P, Delta, modulus, NULL); /* We don't need F(x) any more */ clear_list (F, lenF); /* Make a symmetric copy of fh in h. */ for (i = 0; i < params->s_1 / 2 + 1; i++) { *(h_x[i]) = *(fh_x[params->s_1 / 2 - i]); /* Clone the mpz_t */ *(h_y[i]) = *(fh_y[params->s_1 / 2 - i]); } for (i = 0; i < params->s_1 / 2; i++) { *(h_x[i + params->s_1 / 2 + 1]) = *(fh_x[i + 1]); *(h_y[i + params->s_1 / 2 + 1]) = *(fh_y[i + 1]); } if (test_verbose (OUTPUT_TRACE)) { for (i = 0; i < params->s_1 + 1; i++) outputf (OUTPUT_VERBOSE, "h_%lu = %Zd + %Zd * w; /* PARI */\n", i, h_x[i], h_y[i]); } for (l = 0; l < params->s_2; l++) { const long M = params->l - 1 - params->s_1 / 2; outputf (OUTPUT_VERBOSE, "Multi-point evaluation %lu of %lu:\n", l + 1, params->s_2); pp1_sequence_g (g_x, g_y, NULL, NULL, b1_x, b1_y, params->P, Delta, M, params->l, params->m_1, S_2->elem[l], modulus, NULL); /* Do the two convolution products */ outputf (OUTPUT_VERBOSE, "TMulGen of g_x and h_x"); timestart = cputime (); if (TMulGen (R_x, nr - 1, h_x, params->s_1, g_x, params->l - 1, tmp, modulus->orig_modulus) < 0) { outputf (OUTPUT_ERROR, "TMulGen returned error code (probably out " "of memory)\n"); youpi = ECM_ERROR; break; } outputf (OUTPUT_VERBOSE, " took %lums\n", cputime () - timestart); outputf (OUTPUT_VERBOSE, "TMulGen of g_y and h_y"); timestart = cputime (); if (TMulGen (R_y, nr - 1, h_y, params->s_1, g_y, params->l - 1, tmp, modulus->orig_modulus) < 0) { outputf (OUTPUT_ERROR, "TMulGen returned error code (probably out " "of memory)\n"); youpi = ECM_ERROR; break; } outputf (OUTPUT_VERBOSE, " took %lums\n", cputime () - timestart); for (i = 0; i < nr; i++) mpz_add (R_x[i], R_x[i], R_y[i]); timestart = cputime (); mpres_set_ui (tmpres[1], 1UL, modulus); /* Accumulate product in tmpres[1] */ for (i = 0; i < nr; i++) { mpres_set_z_for_gcd (tmpres[0], R_x[i], modulus); #define TEST_ZERO_RESULT #ifdef TEST_ZERO_RESULT if (mpres_is_zero (tmpres[0], modulus)) outputf (OUTPUT_VERBOSE, "R_[%lu] = 0\n", i); #endif mpres_mul (tmpres[1], tmpres[1], tmpres[0], modulus); } outputf (OUTPUT_VERBOSE, "Computing product of F(g_i)^(1) took %lums\n", cputime () - timestart); if (test_verbose(OUTPUT_RESVERBOSE)) { mpres_get_z (mt, tmpres[1], modulus); outputf (OUTPUT_RESVERBOSE, "Product of R[i] = %Zd (times some " "power of 2 if REDC was used! Try -mpzmod)\n", mt); } mpres_gcd (mt, tmpres[1], modulus); if (mpz_cmp_ui (mt, 1UL) > 0) { mpz_set (f, mt); youpi = ECM_FACTOR_FOUND_STEP2; break; } } mpz_clear (mt); mpres_clear (b1_x, modulus); mpres_clear (b1_y, modulus); mpres_clear (Delta, modulus); for (i = 0; i < tmpreslen; i++) mpres_clear (tmpres[i], modulus); clear_list (fh_x, lenF); clear_list (fh_y, lenF); free (h_x); free (h_y); clear_list (g_x, lenG); clear_list (g_y, lenG); clear_list (R_x, lenR); clear_list (R_y, lenR); clear_list (tmp, tmplen); free (S_2); outputf (OUTPUT_NORMAL, "Step 2"); /* In normal output mode, print only cpu time as we always have. In verbose mode, print real time as well if we used multi-threading */ if (test_verbose (OUTPUT_VERBOSE)) print_elapsed_time (OUTPUT_NORMAL, timetotalstart, realtotalstart); else print_elapsed_time (OUTPUT_NORMAL, timetotalstart, 0L); return youpi; } int pp1fs2_ntt (mpz_t f, const mpres_t X, mpmod_t modulus, const faststage2_param_t *params, const int twopass) { unsigned long nr; unsigned long l, lenF; sets_long_t *S_1; /* This is stored as a set of sets (arithmetic progressions of prime length */ set_long_t *S_2; /* This is stored as a regular set */ listz_t F; /* Polynomial F has roots X^{k_1} for k_1 \in S_1, so has degree s_1. It is symmetric, so has only s_1 / 2 + 1 distinct coefficients. The sequence h_j will be stored in the same memory and won't be a monic polynomial, so the leading 1 monomial of F will be stored explicitly. Hence we need s_1 / 2 + 1 entries. */ listz_t R = NULL; /* Is used only for two-pass convolution, has nr entries. R is only ever referenced if twopass == 1, but gcc does not realize that and complains about uninitialized value, so we set it to NULL. */ mpzspm_t ntt_context; mpzspv_t g_x_ntt, g_y_ntt, h_x_ntt, h_y_ntt; mpres_t b1_x, b1_y, Delta; mpz_t mt; /* All-purpose temp mpz_t */ mpz_t product; mpz_t *product_ptr = NULL; int youpi = ECM_NO_FACTOR_FOUND; long timetotalstart, realtotalstart, timestart, realstart; timetotalstart = cputime (); realtotalstart = realtime (); ASSERT_ALWAYS (eulerphi (params->P) == params->s_1 * params->s_2); ASSERT_ALWAYS (params->s_1 < params->l); nr = params->l - params->s_1; /* Number of points we evaluate */ if (make_S_1_S_2 (&S_1, &S_2, params) == ECM_ERROR) return ECM_ERROR; mpz_init (mt); /* Prepare NTT for computing the h sequence, its DCT-I, and the convolution with g. We need NTT of transform length l here. If we want to add transformed vectors, we need to double the modulus. */ if (twopass) mpz_set (mt, modulus->orig_modulus); else mpz_mul_2exp (mt, modulus->orig_modulus, 1UL); ntt_context = mpzspm_init (params->l, mt); if (ntt_context == NULL) { outputf (OUTPUT_ERROR, "Could not initialise ntt_context, " "presumably out of memory\n"); mpz_clear (mt); free (S_1); S_1 = NULL; free (S_2); S_2 = NULL; return ECM_ERROR; } print_CRT_primes (OUTPUT_DEVVERBOSE, "CRT modulus for evaluation = ", ntt_context); /* Allocate memory for F with correct amount of space for each mpz_t */ lenF = params->s_1 / 2 + 1 + 1; /* Another +1 because poly_from_sets_V stores the leading 1 monomial for each factor */ MEMORY_TAG; F = init_list2 (lenF, (unsigned int) abs (modulus->bits) + GMP_NUMB_BITS); MEMORY_UNTAG; /* Build F */ if (build_F_ntt (F, X, S_1, params, modulus) == ECM_ERROR) { free (S_1); free (S_2); mpz_clear (mt); mpzspm_clear (ntt_context); clear_list (F, lenF); return ECM_ERROR; } free (S_1); S_1 = NULL; mpres_init (b1_x, modulus); mpres_init (b1_y, modulus); mpres_init (Delta, modulus); /* Compute Delta and b1_x + b1_y * sqrt(Delta) = X) */ mpres_sqr (Delta, X, modulus); mpres_sub_ui (Delta, Delta, 4UL, modulus); mpres_div_2exp (b1_x, X, 1, modulus); mpres_set_ui (b1_y, 1UL, modulus); mpres_div_2exp (b1_y, b1_y, 1, modulus); if (test_verbose (OUTPUT_TRACE)) { mpres_get_z (mt, Delta, modulus); outputf (OUTPUT_TRACE, "Delta = Mod(%Zd, N); w = quadgen (4*lift(Delta)); b_1 = ", mt); gfp_ext_print (b1_x, b1_y, modulus, OUTPUT_TRACE); outputf (OUTPUT_TRACE, "; /* PARI */\n"); outputf (OUTPUT_TRACE, "X == b_1 + 1/b_1 /* PARI C */\n"); } /* Allocate remaining memory for h_ntt */ h_x_ntt = mpzspv_init (params->l / 2 + 1, ntt_context); h_y_ntt = mpzspv_init (params->l / 2 + 1, ntt_context); /* Compute the h_j sequence */ pp1_sequence_h (NULL, NULL, h_x_ntt, h_y_ntt, F, b1_x, b1_y, 0L, params->s_1 / 2 + 1, params->P, Delta, modulus, ntt_context); /* We don't need F(x) any more */ clear_list (F, lenF); /* compute the forward transform of h and store the distinct coefficients in h_ntt */ g_x_ntt = mpzspv_init (params->l, ntt_context); if (twopass) { g_y_ntt = g_x_ntt; MEMORY_TAG; R = init_list2 (nr, (mpz_size (modulus->orig_modulus) + 2) * GMP_NUMB_BITS); MEMORY_UNTAG; } else g_y_ntt = mpzspv_init (params->l, ntt_context); /* Compute DCT-I of h_x and h_y */ outputf (OUTPUT_VERBOSE, "Computing DCT-I of h_x"); #ifdef _OPENMP outputf (OUTPUT_VERBOSE, " using %d threads", omp_get_thread_limit()); #endif timestart = cputime (); realstart = realtime (); mpzspv_to_dct1 (h_x_ntt, h_x_ntt, params->s_1 / 2 + 1, params->l / 2 + 1, g_x_ntt, ntt_context); print_elapsed_time (OUTPUT_VERBOSE, timestart, realstart); outputf (OUTPUT_VERBOSE, "Computing DCT-I of h_y"); #ifdef _OPENMP outputf (OUTPUT_VERBOSE, " using %d threads", omp_get_thread_limit()); #endif timestart = cputime (); realstart = realtime (); mpzspv_to_dct1 (h_y_ntt, h_y_ntt, params->s_1 / 2 + 1, params->l / 2 + 1, g_x_ntt, ntt_context); print_elapsed_time (OUTPUT_VERBOSE, timestart, realstart); if (test_verbose (OUTPUT_RESVERBOSE)) { mpz_init (product); product_ptr = &product; } for (l = 0; l < params->s_2; l++) { const long M = params->l - 1 - params->s_1 / 2; outputf (OUTPUT_VERBOSE, "Multi-point evaluation %lu of %lu:\n", l + 1, params->s_2); if (twopass) { /* Two-pass variant. Two separate convolutions, then addition in Z/NZ */ pp1_sequence_g (NULL, NULL, g_x_ntt, NULL, b1_x, b1_y, params->P, Delta, M, params->l, params->m_1, S_2->elem[l], modulus, ntt_context); /* Do the convolution product of g_x * h_x */ outputf (OUTPUT_VERBOSE, "Computing g_x*h_x"); #ifdef _OPENMP outputf (OUTPUT_VERBOSE, " using %d threads", omp_get_thread_limit()); #endif timestart = cputime (); realstart = realtime (); mpzspv_mul_by_dct (g_x_ntt, h_x_ntt, params->l, ntt_context, NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT); /* Store the product coefficients we want in R */ mpzspv_to_mpzv (g_x_ntt, params->s_1 / 2, R, nr, ntt_context); print_elapsed_time (OUTPUT_VERBOSE, timestart, realstart); /* Compute g_y sequence */ pp1_sequence_g (NULL, NULL, NULL, g_y_ntt, b1_x, b1_y, params->P, Delta, M, params->l, params->m_1, S_2->elem[l], modulus, ntt_context); /* Do the convolution product of g_y * (Delta * h_y) */ outputf (OUTPUT_VERBOSE, "Computing g_y*h_y"); #ifdef _OPENMP outputf (OUTPUT_VERBOSE, " using %d threads", omp_get_thread_limit()); #endif timestart = cputime (); realstart = realtime (); mpzspv_mul_by_dct (g_y_ntt, h_y_ntt, params->l, ntt_context, NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT); print_elapsed_time (OUTPUT_VERBOSE, timestart, realstart); /* Compute product of sum of coefficients and gcd with N */ ntt_gcd (mt, product_ptr, g_y_ntt, params->s_1 / 2, R, nr, ntt_context, modulus); } else { /* One-pass variant. Two forward transforms and point-wise products, then addition and single inverse transform */ pp1_sequence_g (NULL, NULL, g_x_ntt, g_y_ntt, b1_x, b1_y, params->P, Delta, M, params->l, params->m_1, S_2->elem[l], modulus, ntt_context); outputf (OUTPUT_VERBOSE, "Computing forward NTT of g_x"); #ifdef _OPENMP outputf (OUTPUT_VERBOSE, " using %d threads", omp_get_thread_limit()); #endif timestart = cputime (); realstart = realtime (); mpzspv_mul_by_dct (g_x_ntt, h_x_ntt, params->l, ntt_context, NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL); print_elapsed_time (OUTPUT_VERBOSE, timestart, realstart); outputf (OUTPUT_VERBOSE, "Computing forward NTT of g_y"); #ifdef _OPENMP outputf (OUTPUT_VERBOSE, " using %d threads", omp_get_thread_limit()); #endif timestart = cputime (); realstart = realtime (); mpzspv_mul_by_dct (g_y_ntt, h_y_ntt, params->l, ntt_context, NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL); print_elapsed_time (OUTPUT_VERBOSE, timestart, realstart); outputf (OUTPUT_VERBOSE, "Adding and computing inverse NTT of sum"); #ifdef _OPENMP outputf (OUTPUT_VERBOSE, " using %d threads", omp_get_thread_limit()); #endif timestart = cputime (); realstart = realtime (); mpzspv_add (g_x_ntt, (spv_size_t) 0, g_x_ntt, (spv_size_t) 0, g_y_ntt, (spv_size_t) 0, params->l, ntt_context); mpzspv_mul_by_dct (g_x_ntt, NULL, params->l, ntt_context, NTT_MUL_STEP_IFFT); print_elapsed_time (OUTPUT_VERBOSE, timestart, realstart); ntt_gcd (mt, product_ptr, g_x_ntt, params->s_1 / 2, NULL, nr, ntt_context, modulus); } outputf (OUTPUT_RESVERBOSE, "Product of R[i] = %Zd (times some " "power of 2 if REDC was used! Try -mpzmod)\n", product); if (mpz_cmp_ui (mt, 1UL) > 0) { mpz_set (f, mt); youpi = ECM_FACTOR_FOUND_STEP2; break; } } if (test_verbose (OUTPUT_RESVERBOSE)) { product_ptr = NULL; mpz_clear (product); } mpzspv_clear (g_x_ntt, ntt_context); if (twopass) clear_list (R, nr); else mpzspv_clear (g_y_ntt, ntt_context); mpzspv_clear (h_x_ntt, ntt_context); mpzspv_clear (h_y_ntt, ntt_context); mpzspm_clear (ntt_context); mpz_clear (mt); mpres_clear (b1_x, modulus); mpres_clear (b1_y, modulus); mpres_clear (Delta, modulus); free (S_2); outputf (OUTPUT_NORMAL, "Step 2"); /* In normal output mode, print only cpu time as we always have. In verbose mode, print real time as well if we used multi-threading */ if (test_verbose (OUTPUT_VERBOSE)) print_elapsed_time (OUTPUT_NORMAL, timetotalstart, realtotalstart); else print_elapsed_time (OUTPUT_NORMAL, timetotalstart, 0L); return youpi; } ecm-6.4.4/mul_fft-params.h.athlon640000644023561000001540000001121212106741273013737 00000000000000#define MUL_FFT_MODF_THRESHOLD 300 #define SQR_FFT_MODF_THRESHOLD 568 #define MUL_FFT_TABLE2 {{1, 4 /*66*/}, {401, 5 /*96*/}, {417, 4 /*98*/}, {433, 5 /*96*/}, {865, 6 /*96*/}, {897, 5 /*98*/}, {929, 6 /*96*/}, {2113, 7 /*97*/}, {2177, 6 /*98*/}, {2241, 7 /*97*/}, {2305, 6 /*98*/}, {2369, 7 /*97*/}, {3713, 8 /*93*/}, {3841, 7 /*98*/}, {4225, 8 /*94*/}, {4353, 7 /*98*/}, {4481, 8 /*94*/}, {4865, 7 /*98*/}, {4993, 8 /*95*/}, {6913, 9 /*87*/}, {7169, 8 /*96*/}, {7425, 9 /*93*/}, {7681, 8 /*96*/}, {8449, 9 /*94*/}, {8705, 8 /*97*/}, {8961, 9 /*90*/}, {9729, 8 /*97*/}, {9985, 9 /*90*/}, {11777, 8 /*97*/}, {12033, 9 /*92*/}, {13825, 10 /*87*/}, {14337, 9 /*96*/}, {17921, 10 /*90*/}, {19457, 9 /*97*/}, {20993, 10 /*87*/}, {21505, 9 /*97*/}, {22017, 10 /*91*/}, {23553, 9 /*97*/}, {26113, 10 /*92*/}, {31745, 9 /*98*/}, {32257, 10 /*88*/}, {44033, 11 /*91*/}, {47105, 10 /*97*/}, {56321, 11 /*87*/}, {63489, 10 /*98*/}, {70657, 11 /*87*/}, {71681, 10 /*98*/}, {72705, 11 /*90*/}, {79873, 10 /*98*/}, {80897, 11 /*83*/}, {81921, 10 /*96*/}, {82945, 11 /*85*/}, {96257, 10 /*98*/}, {97281, 12 /*75*/}, {98305, 10 /*97*/}, {101377, 12 /*78*/}, {102401, 11 /*91*/}, {110593, 12 /*87*/}, {126977, 11 /*98*/}, {161793, 12 /*83*/}, {192513, 11 /*98*/}, {194561, 13 /*75*/}, {253953, 12 /*98*/}, {258049, 11 /*99*/}, {276481, 12 /*85*/}, {282625, 11 /*96*/}, {284673, 12 /*87*/}, {389121, 11 /*99*/}, {391169, 13 /*75*/}, {434177, 12 /*95*/}, {438273, 13 /*84*/}, {516097, 12 /*99*/}, {585729, 11 /*99*/}, {620545, 13 /*79*/}, {630785, 12 /*96*/}, {651265, 13 /*83*/}, {778241, 12 /*99*/}, {782337, 11 /*99*/}, {817153, 12 /*96*/}, {819201, 14 /*79*/}, {1032193, 13 /*99*/}, {1040385, 11 /*99*/}, {1046529, 12 /*94*/}, {LONG_MAX, 0}} #define MUL_FFTM_TABLE2 {{1, 4 /*66*/}, {337, 5 /*95*/}, {353, 4 /*97*/}, {369, 5 /*96*/}, {385, 4 /*98*/}, {401, 5 /*96*/}, {801, 6 /*96*/}, {833, 5 /*98*/}, {865, 6 /*96*/}, {1729, 7 /*96*/}, {1793, 6 /*98*/}, {1857, 7 /*96*/}, {2049, 6 /*98*/}, {2113, 7 /*97*/}, {3841, 8 /*96*/}, {4097, 7 /*98*/}, {4225, 8 /*97*/}, {4609, 7 /*98*/}, {4737, 8 /*97*/}, {7169, 9 /*93*/}, {7681, 8 /*98*/}, {8449, 9 /*94*/}, {8705, 8 /*98*/}, {8961, 9 /*94*/}, {9217, 8 /*98*/}, {9473, 9 /*95*/}, {14849, 10 /*93*/}, {15361, 9 /*96*/}, {17921, 10 /*90*/}, {19457, 9 /*97*/}, {20481, 10 /*95*/}, {21505, 9 /*97*/}, {22017, 10 /*91*/}, {23553, 9 /*97*/}, {24065, 10 /*92*/}, {29697, 11 /*93*/}, {30721, 10 /*96*/}, {37889, 11 /*95*/}, {38913, 10 /*97*/}, {44033, 11 /*91*/}, {47105, 10 /*97*/}, {52225, 11 /*92*/}, {55297, 10 /*98*/}, {56321, 11 /*87*/}, {63489, 10 /*98*/}, {64513, 11 /*88*/}, {79873, 12 /*83*/}, {81921, 11 /*93*/}, {88065, 12 /*91*/}, {94209, 11 /*97*/}, {104449, 12 /*81*/}, {110593, 11 /*98*/}, {112641, 12 /*87*/}, {126977, 11 /*98*/}, {137217, 12 /*85*/}, {159745, 11 /*98*/}, {161793, 12 /*83*/}, {167937, 11 /*98*/}, {169985, 12 /*87*/}, {192513, 11 /*98*/}, {194561, 12 /*85*/}, {196609, 11 /*97*/}, {202753, 12 /*89*/}, {217089, 13 /*84*/}, {221185, 12 /*98*/}, {225281, 13 /*87*/}, {253953, 12 /*98*/}, {323585, 13 /*83*/}, {385025, 12 /*98*/}, {389121, 14 /*75*/}, {393217, 12 /*93*/}, {405505, 14 /*78*/}, {507905, 13 /*98*/}, {516097, 12 /*99*/}, {552961, 13 /*85*/}, {573441, 12 /*97*/}, {577537, 13 /*88*/}, {778241, 12 /*99*/}, {782337, 13 /*85*/}, {851969, 14 /*82*/}, {868353, 13 /*95*/}, {909313, 14 /*87*/}, {1032193, 13 /*99*/}, {LONG_MAX, 0}} #define MUL_FFT_FULL_TABLE2 {{16, 1}, {4224, 2}, {4416, 6}, {4480, 2}, {4608, 4}, {4640, 2}, {4800, 1}, {5120, 2}, {5184, 1}, {5632, 2}, {5760, 1}, {6656, 4}, {6720, 1}, {7168, 4}, {7360, 1}, {7936, 4}, {8000, 2}, {8064, 1}, {8704, 2}, {8832, 6}, {8960, 3}, {9216, 1}, {13312, 6}, {14336, 3}, {15360, 5}, {16896, 6}, {17920, 1}, {19968, 2}, {20736, 1}, {21504, 2}, {23808, 1}, {28672, 4}, {29440, 2}, {29952, 1}, {33792, 2}, {35328, 1}, {36864, 4}, {37120, 1}, {49152, 4}, {49920, 1}, {50176, 3}, {53248, 1}, {55296, 2}, {59904, 3}, {61440, 1}, {65536, 2}, {70656, 6}, {71680, 2}, {72192, 5}, {73728, 4}, {79360, 1}, {81920, 2}, {82944, 1}, {86016, 2}, {89088, 1}, {90112, 2}, {95232, 1}, {100352, 5}, {110592, 1}, {114688, 4}, {117760, 1}, {131072, 2}, {144384, 5}, {147456, 4}, {158720, 1}, {161792, 3}, {163840, 2}, {190464, 1}, {196608, 4}, {199680, 3}, {212992, 1}, {262144, 6}, {272384, 7}, {294912, 6}, {301056, 4}, {322560, 1}, {327680, 3}, {344064, 2}, {380928, 1}, {385024, 2}, {387072, 1}, {393216, 7}, {425984, 6}, {444416, 5}, {466944, 1}, {520192, 2}, {577536, 7}, {589824, 6}, {602112, 4}, {645120, 3}, {688128, 2}, {774144, 1}, {786432, 6}, {788480, 4}, {808960, 5}, {811008, 2}, {817152, 3}, {819200, 5}, {823296, 2}, {829440, 1}, {1048576, 2}, {1069056, 1}, {1073152, 5}, {1081344, 3}, {1089536, 2}, {LONG_MAX, 1}} ecm-6.4.4/ecm-params.h.pentium40000644023561000001540000000112112106741273013153 00000000000000/* those parameters were generated on 3 Jan 2012 on macaron.loria.fr (Intel(R) Pentium(R) 4 CPU 3.20GHz) for ecm-6.4 with GMP 5.0.2 */ #define MPZMOD_THRESHOLD 84 #define REDC_THRESHOLD 119 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 17 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 4096 #define PREREVERTDIVISION_NTT_THRESHOLD 64 #define POLYINVERT_NTT_THRESHOLD 1024 #define POLYEVALT_NTT_THRESHOLD 512 #define MPZSPV_NORMALISE_STRIDE 2048 ecm-6.4.4/ecm-params.h.ia640000644023561000001540000000160112106741273012154 00000000000000/* those parameters were obtained on gcc60.fsffrance.org with ecm-6.3-rc3 gmp-5.0.1, and gcc 4.3.2 -O2 -pedantic -mtune=itanium2 (ia64-unknown-linux-gnu) */ /* 0:mulredc 1:mul+redc_1 2:mul+redc_2 3:mul+redc_n */ #define TUNE_MULREDC_TABLE {0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2} /* 0:mulredc 1:sqr+redc_1 2:sqr+redc_2 3:sqr+redc_n */ #define TUNE_SQRREDC_TABLE {0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2} #define MPZMOD_THRESHOLD 61 #define REDC_THRESHOLD 512 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 11, 12, 1, 14, 14, 16, 1, 18, 19, 16, 20, 18, 19, 18, 19, 20, 21} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 17 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 17 #define MUL_NTT_THRESHOLD 262144 #define PREREVERTDIVISION_NTT_THRESHOLD 262144 #define POLYINVERT_NTT_THRESHOLD 262144 #define POLYEVALT_NTT_THRESHOLD 262144 #define MPZSPV_NORMALISE_STRIDE 2048 ecm-6.4.4/pentium4/0000755023561000001540000000000012113421640011032 500000000000000ecm-6.4.4/pentium4/autogen.py0000755023561000001540000001640412106741272013006 00000000000000#!/usr/bin/python import re import sys def offaddr(addr, offset): if offset == 0: return "("+addr+")" else: return str(offset)+"("+addr+")" # Generate asm for addmul1_k # src and dst are pointers (stored in regs) + offsets # multiplier is in a register # rax, rbx, rcx, rdx are free for use. def addmul1_k(src, off_src, dst, off_dst, mult, k): init = "### addmul1: src[0] is " + offaddr(src, off_src) + "\n" init = init + "### dst[0] is " + offaddr(dst, off_dst) + "\n" init = init + "### mult is " + mult + "\n" init = init + "### k is " + str(k) + "\n" init = init + "### kills %eax, %ebx, %ecx, %edx\n" init = init + "### dst[0,k[ += mult*src[0,k[ plus carry put in ecx or ebx\n" init = init + " movl " + offaddr(src, off_src) + ", %eax\n" init = init + " mull " + mult + "\n" init = init + " movl %eax, %ebx\n" init = init + " movl %edx, %ecx\n" block = """ movl __xii__, %eax mull __mult__ addl __cylo__, __zi__ adcl %eax, __cyhi__ movl %edx, __cylo__ adcl $0, __cylo__ """ code = init cylo = "%ebx" cyhi = "%ecx" for i in range(0,k-1): blocki = re.sub('__cylo__', cylo, block) blocki = re.sub('__cyhi__', cyhi, blocki) blocki = re.sub('__xii__', offaddr(src, off_src+(i+1)*4), blocki) blocki = re.sub('__zi__', offaddr(dst, off_dst+i*4), blocki) blocki = re.sub('__mult__', mult, blocki) code = code + blocki tmp = cylo cylo = cyhi cyhi = tmp final = " addl " + cylo + ", " + offaddr(dst, off_dst+4*(k-1)) + "\n" final = final + " adcl $0, " + cyhi + "\n" final = final + "### carry limb is in " + cyhi + "\n" code = code + final return code, cyhi ### Try mmx/sse2 addmul_1, copying the one of GMP for Pentium4 def addmul1_k_var(src, off_src, dst, off_dst, mult, k): init = "### addmul1: src[0] is " + offaddr(src, off_src) + "\n" init = init + "### dst[0] is " + offaddr(dst, off_dst) + "\n" init = init + "### mult is " + mult + "\n" init = init + "### k is " + str(k) + "\n" init = init + "### kills %eax, %edx and mmx regs \n" init = init + "### dst[0,k[ += mult*src[0,k[ plus carry put in ecx\n" init = init + " pxor %mm0, %mm0\n" init = init + " movd " + mult + ", %mm7\n" block = """ movd __xi__, %mm1 movd __zi__, %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, __zi__ psrlq $32, %mm0 """ code = init for i in range(0,k): blocki = re.sub('__xi__', offaddr(src, off_src+i*4), block) blocki = re.sub('__zi__', offaddr(dst, off_dst+i*4), blocki) code = code + blocki final = " movd %mm0, %ecx\n" final = final + "### carry limb is in %ecx\n" code = code + final return code, "%ecx" def mulredc_k_rolled(k): header = """# mp_limb_t mulredc__k(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc__k TYPE(GSYM_PREFIX`'mulredc__k,`function') GSYM_PREFIX`'mulredc__k: """ init = re.sub("__k", str(k), header) INV_M = offaddr("%esp", 4*(2*k+1) + 40) ADDR_M = offaddr("%esp", 4*(2*k+1) + 36) ADDR_Y = offaddr("%esp", 4*(2*k+1) + 32) ADDR_X = offaddr("%esp", 4*(2*k+1) + 28) ADDR_Z = offaddr("%esp", 4*(2*k+1) + 24) init = init + """ pushl %ebp pushl %edi pushl %esi pushl %ebx """ init = init + " subl $" + str(4*(2*k+2)) + ", %esp\n" init = init + " movl %esp, %edi\n" init = init + "### set tmp[0..2k+1[ to 0\n" for i in range(0,2*k+1): init = init + " movl $0, " + offaddr("%edi", 4*i) + "\n" code = init middle_code = "###########################################\n" middle_code = middle_code + " movl $" + str(k) + ", " + offaddr("%esp", 4*(2*k+1)) + "\n" middle_code = middle_code + """ .align 32 Loop: ## compute u and store in %ebp """ middle_code = middle_code + " movl " + ADDR_X + ", %eax\n" middle_code = middle_code + " movl " + ADDR_Y + ", %esi\n" middle_code = middle_code + """ movl (%eax), %eax mull (%esi) addl (%edi), %eax """ middle_code = middle_code + " mull " + INV_M + "\n" middle_code = middle_code + " movl %eax, %ebp\n" middle_code = middle_code + " movl " + ADDR_M + ", %esi\n" codeaddmul, carry = addmul1_k_var("%esi", 0, "%edi", 0, "%ebp", k) middle_code = middle_code + codeaddmul middle_code = middle_code + " addl " + carry + ", " + offaddr("%edi", 4*k) + "\n" middle_code = middle_code + " adcl $0, " + offaddr("%edi", 4*(k+1)) + "\n" middle_code = middle_code + " movl " + ADDR_X + ", %eax\n" middle_code = middle_code + " movl (%eax), %ebp\n" middle_code = middle_code + " movl " + ADDR_Y + ", %esi\n" codeaddmul, carry = addmul1_k_var("%esi", 0, "%edi", 0, "%ebp", k) middle_code = middle_code + codeaddmul middle_code = middle_code + " addl " + carry + ", " + offaddr("%edi", 4*k) + "\n" middle_code = middle_code + " adcl $0, " + offaddr("%edi", 4*(k+1)) + "\n\n" middle_code = middle_code + " addl $4, " + ADDR_X + "\n addl $4, %edi\n" middle_code = middle_code + " decl " + offaddr("%esp", 4*(2*k+1)) + "\n jnz Loop\n" code = code + middle_code final = "###########################################\n" final = final + "### Copy result in z\n" final = final + " movl " + ADDR_Z + ", %ebx\n" for i in range(0,k): final = final + " movl " + offaddr("%edi", 4*i) + ", %eax\n" final = final + " movl %eax, " + offaddr("%ebx", 4*i) + "\n" final = final + " movl " + offaddr("%edi", 4*k) + ", %eax # carry\n" final = final + " addl $" + str(4*(2*k+2)) + ", %esp\n" final = final + " popl %ebx\n" final = final + " popl %esi\n" final = final + " popl %edi\n" final = final + " popl %ebp\n" final = final + " emms\n" final = final + " ret\n" code = code + final return code k = int(sys.argv[1]) if k == 1: print """# # mp_limb_t mulredc1(mp_limb_t *z, const mp_limb_t x, const mp_limb_t y, # const mp_limb_t m, mp_limb_t inv_m) # # Compute z := x*y mod m, in Montgomery representation, where x, y < m # and m is n limb wide. inv_m is the less significant limb of the # inverse of m modulo 2^(n*GMP_LIMB_BITS) # # The result might be unreduced (larger than m) but becomes reduced # after subtracting m. The calling function should take care of that. # # We use a temporary space for unreduced product on the stack. # Therefore, this can not be used for large integers (anyway, the # algorithm is quadratic). # # WARNING: z is only n limbs but since it might be unreduced, there # could be a carry that does not fit in z. This carry is returned. include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc1 TYPE(GSYM_PREFIX`'mulredc1,`function') GSYM_PREFIX`'mulredc1: # Stack: # inv_m 20(%esp) # m 16 # y 12(%esp) # x 8 # z 4(%esp) movl 12(%esp), %eax mull 8(%esp) movl %edx, 12(%esp) movl %eax, 8(%esp) # store xy in [8(%esp):12(%esp)] mull 20(%esp) # compute u mull 16(%esp) # compute u*m addl 8(%esp), %eax # eax is 0, now (carry is important) adcl 12(%esp), %edx movl 4(%esp), %ecx movl %edx, (%ecx) adcl $0, %eax ret """ else: print mulredc_k_rolled(k) ecm-6.4.4/pentium4/mulredc3.asm0000644023561000001540000000560612106741272013211 00000000000000# mp_limb_t mulredc3(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc3 TYPE(GSYM_PREFIX`'mulredc3,`function') GSYM_PREFIX`'mulredc3: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $32, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) ########################################### movl $3, 28(%esp) .align 32 Loop: ## compute u and store in %ebp movl 56(%esp), %eax movl 60(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 68(%esp) movl %eax, %ebp movl 64(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 3 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 12(%edi) adcl $0, 16(%edi) movl 56(%esp), %eax movl (%eax), %ebp movl 60(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 3 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 12(%edi) adcl $0, 16(%edi) addl $4, 56(%esp) addl $4, %edi decl 28(%esp) jnz Loop ########################################### ### Copy result in z movl 52(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax # carry addl $32, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/mulredc.h0000644023561000001540000000462512106741272012575 00000000000000#ifndef __ASM_REDC_H__ #define __ASM_REDC_H__ #include /* Signals that we have assembly code for variable size redc */ #define HAVE_ASM_REDC3 extern void ecm_redc3(mp_limb_t *, const mp_limb_t *, mp_size_t, mp_limb_t); /* WARNING: the size-1 version doesn't take pointers in input */ extern mp_limb_t mulredc1(mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t); extern mp_limb_t mulredc2(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc3(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc4(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc5(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc6(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc7(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc8(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc9(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc10(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc11(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc12(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc13(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc14(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc15(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc16(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc17(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc18(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc19(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc20(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); #endif ecm-6.4.4/pentium4/mulredc14.asm0000644023561000001540000001532312106741272013270 00000000000000# mp_limb_t mulredc14(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc14 TYPE(GSYM_PREFIX`'mulredc14,`function') GSYM_PREFIX`'mulredc14: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $120, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) movl $0, 108(%edi) movl $0, 112(%edi) ########################################### movl $14, 116(%esp) .align 32 Loop: ## compute u and store in %ebp movl 144(%esp), %eax movl 148(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 156(%esp) movl %eax, %ebp movl 152(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 14 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd 52(%esi), %mm1 movd 52(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 52(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 56(%edi) adcl $0, 60(%edi) movl 144(%esp), %eax movl (%eax), %ebp movl 148(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 14 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd 52(%esi), %mm1 movd 52(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 52(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 56(%edi) adcl $0, 60(%edi) addl $4, 144(%esp) addl $4, %edi decl 116(%esp) jnz Loop ########################################### ### Copy result in z movl 140(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax movl %eax, 52(%ebx) movl 56(%edi), %eax # carry addl $120, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/mulredc5.asm0000644023561000001540000000711212106741272013205 00000000000000# mp_limb_t mulredc5(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc5 TYPE(GSYM_PREFIX`'mulredc5,`function') GSYM_PREFIX`'mulredc5: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $48, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) ########################################### movl $5, 44(%esp) .align 32 Loop: ## compute u and store in %ebp movl 72(%esp), %eax movl 76(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 84(%esp) movl %eax, %ebp movl 80(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 5 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 20(%edi) adcl $0, 24(%edi) movl 72(%esp), %eax movl (%eax), %ebp movl 76(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 5 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 20(%edi) adcl $0, 24(%edi) addl $4, 72(%esp) addl $4, %edi decl 44(%esp) jnz Loop ########################################### ### Copy result in z movl 68(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax # carry addl $48, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/Makefile.dev0000644023561000001540000000160412106741272013200 00000000000000.PHONY: all all: test_mulredc bench CFLAGS:=-g -O2 -funroll-loops ALLMULRED:= mulredc1.o mulredc2.o mulredc3.o mulredc4.o mulredc5.o\ mulredc6.o mulredc7.o mulredc8.o mulredc9.o mulredc10.o\ mulredc11.o mulredc12.o mulredc13.o mulredc14.o\ mulredc15.o mulredc16.o mulredc17.o mulredc18.o\ mulredc19.o mulredc20.o redc.s: redc.asm m4 redc.asm > redc.s redc.o: redc.s gcc -c $(CFLAGS) redc.s -o redc.o mulredc%.o: mulredc%.asm m4 $< > tmp-mulred.s gcc -c $(CFLAGS) tmp-mulred.s -o $@ rm tmp-mulred.s mulredc%.asm: ./autogen.py ./autogen.py $* > $@ test_mulredc: test_mulredc.c redc.o $(ALLMULRED) gcc -o test_mulredc $(CFLAGS) test_mulredc.c $(ALLMULRED) redc.o -lgmp bench: bench.c redc.o $(ALLMULRED) gcc -o bench $(CFLAGS) bench.c $(ALLMULRED) redc.o -lgmp clean: rm redc.s *.o mulredc[0-9]*.s mulredc[0-9]*.asm test_mulredc ecm-6.4.4/pentium4/generate_all0000755023561000001540000000016312106741272013332 00000000000000#!/bin/sh for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do ./autogen.py $i > mulredc$i.asm done ecm-6.4.4/pentium4/mulredc6.asm0000644023561000001540000000765412106741272013221 00000000000000# mp_limb_t mulredc6(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc6 TYPE(GSYM_PREFIX`'mulredc6,`function') GSYM_PREFIX`'mulredc6: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $56, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) ########################################### movl $6, 52(%esp) .align 32 Loop: ## compute u and store in %ebp movl 80(%esp), %eax movl 84(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 92(%esp) movl %eax, %ebp movl 88(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 6 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 24(%edi) adcl $0, 28(%edi) movl 80(%esp), %eax movl (%eax), %ebp movl 84(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 6 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 24(%edi) adcl $0, 28(%edi) addl $4, 80(%esp) addl $4, %edi decl 52(%esp) jnz Loop ########################################### ### Copy result in z movl 76(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax # carry addl $56, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/mulredc2.asm0000644023561000001540000000505112106741272013202 00000000000000# mp_limb_t mulredc2(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc2 TYPE(GSYM_PREFIX`'mulredc2,`function') GSYM_PREFIX`'mulredc2: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $24, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) ########################################### movl $2, 20(%esp) .align 32 Loop: ## compute u and store in %ebp movl 48(%esp), %eax movl 52(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 60(%esp) movl %eax, %ebp movl 56(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 2 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 8(%edi) adcl $0, 12(%edi) movl 48(%esp), %eax movl (%eax), %ebp movl 52(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 2 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 8(%edi) adcl $0, 12(%edi) addl $4, 48(%esp) addl $4, %edi decl 20(%esp) jnz Loop ########################################### ### Copy result in z movl 44(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax # carry addl $24, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/mulredc20.asm0000644023561000001540000002145312106741272013266 00000000000000# mp_limb_t mulredc20(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc20 TYPE(GSYM_PREFIX`'mulredc20,`function') GSYM_PREFIX`'mulredc20: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $168, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) movl $0, 108(%edi) movl $0, 112(%edi) movl $0, 116(%edi) movl $0, 120(%edi) movl $0, 124(%edi) movl $0, 128(%edi) movl $0, 132(%edi) movl $0, 136(%edi) movl $0, 140(%edi) movl $0, 144(%edi) movl $0, 148(%edi) movl $0, 152(%edi) movl $0, 156(%edi) movl $0, 160(%edi) ########################################### movl $20, 164(%esp) .align 32 Loop: ## compute u and store in %ebp movl 192(%esp), %eax movl 196(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 204(%esp) movl %eax, %ebp movl 200(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 20 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd 52(%esi), %mm1 movd 52(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 52(%edi) psrlq $32, %mm0 movd 56(%esi), %mm1 movd 56(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 56(%edi) psrlq $32, %mm0 movd 60(%esi), %mm1 movd 60(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 60(%edi) psrlq $32, %mm0 movd 64(%esi), %mm1 movd 64(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 64(%edi) psrlq $32, %mm0 movd 68(%esi), %mm1 movd 68(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 68(%edi) psrlq $32, %mm0 movd 72(%esi), %mm1 movd 72(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 72(%edi) psrlq $32, %mm0 movd 76(%esi), %mm1 movd 76(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 76(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 80(%edi) adcl $0, 84(%edi) movl 192(%esp), %eax movl (%eax), %ebp movl 196(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 20 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd 52(%esi), %mm1 movd 52(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 52(%edi) psrlq $32, %mm0 movd 56(%esi), %mm1 movd 56(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 56(%edi) psrlq $32, %mm0 movd 60(%esi), %mm1 movd 60(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 60(%edi) psrlq $32, %mm0 movd 64(%esi), %mm1 movd 64(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 64(%edi) psrlq $32, %mm0 movd 68(%esi), %mm1 movd 68(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 68(%edi) psrlq $32, %mm0 movd 72(%esi), %mm1 movd 72(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 72(%edi) psrlq $32, %mm0 movd 76(%esi), %mm1 movd 76(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 76(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 80(%edi) adcl $0, 84(%edi) addl $4, 192(%esp) addl $4, %edi decl 164(%esp) jnz Loop ########################################### ### Copy result in z movl 188(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax movl %eax, 52(%ebx) movl 56(%edi), %eax movl %eax, 56(%ebx) movl 60(%edi), %eax movl %eax, 60(%ebx) movl 64(%edi), %eax movl %eax, 64(%ebx) movl 68(%edi), %eax movl %eax, 68(%ebx) movl 72(%edi), %eax movl %eax, 72(%ebx) movl 76(%edi), %eax movl %eax, 76(%ebx) movl 80(%edi), %eax # carry addl $168, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/mulredc17.asm0000644023561000001540000001737712106741272013306 00000000000000# mp_limb_t mulredc17(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc17 TYPE(GSYM_PREFIX`'mulredc17,`function') GSYM_PREFIX`'mulredc17: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $144, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) movl $0, 108(%edi) movl $0, 112(%edi) movl $0, 116(%edi) movl $0, 120(%edi) movl $0, 124(%edi) movl $0, 128(%edi) movl $0, 132(%edi) movl $0, 136(%edi) ########################################### movl $17, 140(%esp) .align 32 Loop: ## compute u and store in %ebp movl 168(%esp), %eax movl 172(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 180(%esp) movl %eax, %ebp movl 176(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 17 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd 52(%esi), %mm1 movd 52(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 52(%edi) psrlq $32, %mm0 movd 56(%esi), %mm1 movd 56(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 56(%edi) psrlq $32, %mm0 movd 60(%esi), %mm1 movd 60(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 60(%edi) psrlq $32, %mm0 movd 64(%esi), %mm1 movd 64(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 64(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 68(%edi) adcl $0, 72(%edi) movl 168(%esp), %eax movl (%eax), %ebp movl 172(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 17 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd 52(%esi), %mm1 movd 52(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 52(%edi) psrlq $32, %mm0 movd 56(%esi), %mm1 movd 56(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 56(%edi) psrlq $32, %mm0 movd 60(%esi), %mm1 movd 60(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 60(%edi) psrlq $32, %mm0 movd 64(%esi), %mm1 movd 64(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 64(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 68(%edi) adcl $0, 72(%edi) addl $4, 168(%esp) addl $4, %edi decl 140(%esp) jnz Loop ########################################### ### Copy result in z movl 164(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax movl %eax, 52(%ebx) movl 56(%edi), %eax movl %eax, 56(%ebx) movl 60(%edi), %eax movl %eax, 60(%ebx) movl 64(%edi), %eax movl %eax, 64(%ebx) movl 68(%edi), %eax # carry addl $144, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/redc.asm0000644023561000001540000001600512106741272012403 00000000000000dnl Copyright 1999, 2000, 2001, 2002, 2005 Free Software Foundation, Inc. dnl dnl This file is a modified part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or dnl modify it under the terms of the GNU Lesser General Public License as dnl published by the Free Software Foundation; either version 2.1 of the dnl License, or (at your option) any later version. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, dnl but WITHOUT ANY WARRANTY; without even the implied warranty of dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU dnl Lesser General Public License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. include(`config.m4') TEXT GLOBL GSYM_PREFIX`'ecm_redc3 TYPE(GSYM_PREFIX`'ecm_redc3,`function') GSYM_PREFIX`'ecm_redc3: push %ebp # Push registers push %edi push %esi push %ebx subl $16, %esp # SF: 2 Cpt + Jump +1 movl 44(%esp), %ecx # Read size movl 36(%esp), %edi # Read Dest Ptr movl %ecx, (%esp) # Save counter cmpl $5, %ecx jae Unroll Loop: movl 48(%esp), %ebp # Read invm movl 40(%esp), %esi # Read Source Ptr imull (%edi), %ebp # Dest[0] * invm movl %edi, 36(%esp) # Save new Dest movl 44(%esp), %ecx # Read Size (2) xorl %ebx, %ebx # Initial Carry InnerLoop: # esi: Source # edi: Dest # ebp: Multiplier # ecx: Counter movl (%esi), %eax # U1 addl $4, %edi # V1 mull %ebp # U2 addl $4, %esi # V2 addl %ebx, %eax # U3 adcl $0, %edx # U4 addl %eax, -4(%edi) # V4 adcl $0, %edx # U5 decl %ecx # V5 movl %edx, %ebx # U6 jnz InnerLoop # V6 movl 36(%esp), %edi movl %ebx, (%edi) # Save final carry decl (%esp) lea 4(%edi), %edi # Advance Dest jnz Loop # Loop End: addl $16, %esp pop %ebx pop %esi pop %edi pop %ebp ret Unroll: # %ecx Read size // %edi Dest Ptr # Precalcul du saut movl %ecx, %edx decl %ecx subl $2, %edx negl %ecx shrl $4, %edx andl $15, %ecx movl %edx, 8(%esp) # Org Cpt of 4(%esp) movl %ecx, %edx shll $4, %edx negl %ecx leal UnrollEntry (%edx, %ecx,1), %edx movl %ecx, 44(%esp) # (-size)%16 movl %edx, 12(%esp) # Org PC inside UnrollLoop: movl 48(%esp), %ebp # Read invm movl 40(%esp), %esi # Read Source Ptr imull (%edi), %ebp # Dest[0] * invm movl %edi, 36(%esp) # Save new Dest movl 44(%esp), %ecx # Read Size %16 movl 8(%esp), %edx # Read InnerLoop Cpt movl %edx, 4(%esp) # Set InnerLoop Cpt # First mull and set initial carry movl (%esi), %eax leal 4(%esi,%ecx,4), %esi mull %ebp leal (%edi,%ecx,4), %edi movl %edx, %ebx # Do the Jump inside the unrolling loop # And set up the registers differently if odd movl 12(%esp), %edx testl $1, %ecx movl %eax, %ecx cmovnz %ebx, %ecx cmovnz %eax, %ebx jmp *%edx # eax scratch # ebx carry hi # ecx carry lo # edx scratch # esi src # edi dst # ebp multiplier .align 32, 0x90 UnrollInnerLoop: addl $64, %edi UnrollEntry: # movl 0(%esi), %eax # Can't use this instruction .byte 0x8b,0x46,0x00 mull %ebp # addl %ecx, 0(%edi) # Can't use this instruction .byte 0x01,0x4f,0x00 adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 4(%esi), %eax mull %ebp addl %ebx, 4(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx movl 8(%esi), %eax mull %ebp addl %ecx, 8(%edi) adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 12(%esi), %eax mull %ebp addl %ebx, 12(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx movl 16(%esi), %eax mull %ebp addl %ecx, 16(%edi) adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 20(%esi), %eax mull %ebp addl %ebx, 20(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx movl 24(%esi), %eax mull %ebp addl %ecx, 24(%edi) adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 28(%esi), %eax mull %ebp addl %ebx, 28(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx movl 32(%esi), %eax mull %ebp addl %ecx, 32(%edi) adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 36(%esi), %eax mull %ebp addl %ebx, 36(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx movl 40(%esi), %eax mull %ebp addl %ecx, 40(%edi) adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 44(%esi), %eax mull %ebp addl %ebx, 44(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx movl 48(%esi), %eax mull %ebp addl %ecx, 48(%edi) adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 52(%esi), %eax mull %ebp addl %ebx, 52(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx movl 56(%esi), %eax mull %ebp addl %ecx, 56(%edi) adcl %eax, %ebx movl %edx, %ecx adcl $0, %ecx movl 60(%esi), %eax mull %ebp addl %ebx, 60(%edi) adcl %eax, %ecx movl %edx, %ebx adcl $0, %ebx decl 4(%esp) leal 64(%esi), %esi jns UnrollInnerLoop addl %ecx, 64(%edi) movl 36(%esp), %edi adcl $0, %ebx movl %ebx, (%edi) # Save final carry decl (%esp) lea 4(%edi), %edi # Advance Dest jnz UnrollLoop # Loop End2: addl $16, %esp pop %ebx pop %esi pop %edi pop %ebp ret ecm-6.4.4/pentium4/mulredc18.asm0000644023561000001540000002014312106741272013270 00000000000000# mp_limb_t mulredc18(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc18 TYPE(GSYM_PREFIX`'mulredc18,`function') GSYM_PREFIX`'mulredc18: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $152, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) movl $0, 108(%edi) movl $0, 112(%edi) movl $0, 116(%edi) movl $0, 120(%edi) movl $0, 124(%edi) movl $0, 128(%edi) movl $0, 132(%edi) movl $0, 136(%edi) movl $0, 140(%edi) movl $0, 144(%edi) ########################################### movl $18, 148(%esp) .align 32 Loop: ## compute u and store in %ebp movl 176(%esp), %eax movl 180(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 188(%esp) movl %eax, %ebp movl 184(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 18 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd 52(%esi), %mm1 movd 52(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 52(%edi) psrlq $32, %mm0 movd 56(%esi), %mm1 movd 56(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 56(%edi) psrlq $32, %mm0 movd 60(%esi), %mm1 movd 60(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 60(%edi) psrlq $32, %mm0 movd 64(%esi), %mm1 movd 64(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 64(%edi) psrlq $32, %mm0 movd 68(%esi), %mm1 movd 68(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 68(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 72(%edi) adcl $0, 76(%edi) movl 176(%esp), %eax movl (%eax), %ebp movl 180(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 18 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd 52(%esi), %mm1 movd 52(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 52(%edi) psrlq $32, %mm0 movd 56(%esi), %mm1 movd 56(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 56(%edi) psrlq $32, %mm0 movd 60(%esi), %mm1 movd 60(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 60(%edi) psrlq $32, %mm0 movd 64(%esi), %mm1 movd 64(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 64(%edi) psrlq $32, %mm0 movd 68(%esi), %mm1 movd 68(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 68(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 72(%edi) adcl $0, 76(%edi) addl $4, 176(%esp) addl $4, %edi decl 148(%esp) jnz Loop ########################################### ### Copy result in z movl 172(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax movl %eax, 52(%ebx) movl 56(%edi), %eax movl %eax, 56(%ebx) movl 60(%edi), %eax movl %eax, 60(%ebx) movl 64(%edi), %eax movl %eax, 64(%ebx) movl 68(%edi), %eax movl %eax, 68(%ebx) movl 72(%edi), %eax # carry addl $152, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/mulredc19.asm0000644023561000001540000002070712106741272013277 00000000000000# mp_limb_t mulredc19(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc19 TYPE(GSYM_PREFIX`'mulredc19,`function') GSYM_PREFIX`'mulredc19: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $160, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) movl $0, 108(%edi) movl $0, 112(%edi) movl $0, 116(%edi) movl $0, 120(%edi) movl $0, 124(%edi) movl $0, 128(%edi) movl $0, 132(%edi) movl $0, 136(%edi) movl $0, 140(%edi) movl $0, 144(%edi) movl $0, 148(%edi) movl $0, 152(%edi) ########################################### movl $19, 156(%esp) .align 32 Loop: ## compute u and store in %ebp movl 184(%esp), %eax movl 188(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 196(%esp) movl %eax, %ebp movl 192(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 19 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd 52(%esi), %mm1 movd 52(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 52(%edi) psrlq $32, %mm0 movd 56(%esi), %mm1 movd 56(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 56(%edi) psrlq $32, %mm0 movd 60(%esi), %mm1 movd 60(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 60(%edi) psrlq $32, %mm0 movd 64(%esi), %mm1 movd 64(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 64(%edi) psrlq $32, %mm0 movd 68(%esi), %mm1 movd 68(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 68(%edi) psrlq $32, %mm0 movd 72(%esi), %mm1 movd 72(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 72(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 76(%edi) adcl $0, 80(%edi) movl 184(%esp), %eax movl (%eax), %ebp movl 188(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 19 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd 52(%esi), %mm1 movd 52(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 52(%edi) psrlq $32, %mm0 movd 56(%esi), %mm1 movd 56(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 56(%edi) psrlq $32, %mm0 movd 60(%esi), %mm1 movd 60(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 60(%edi) psrlq $32, %mm0 movd 64(%esi), %mm1 movd 64(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 64(%edi) psrlq $32, %mm0 movd 68(%esi), %mm1 movd 68(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 68(%edi) psrlq $32, %mm0 movd 72(%esi), %mm1 movd 72(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 72(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 76(%edi) adcl $0, 80(%edi) addl $4, 184(%esp) addl $4, %edi decl 156(%esp) jnz Loop ########################################### ### Copy result in z movl 180(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax movl %eax, 52(%ebx) movl 56(%edi), %eax movl %eax, 56(%ebx) movl 60(%edi), %eax movl %eax, 60(%ebx) movl 64(%edi), %eax movl %eax, 64(%ebx) movl 68(%edi), %eax movl %eax, 68(%ebx) movl 72(%edi), %eax movl %eax, 72(%ebx) movl 76(%edi), %eax # carry addl $160, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/mulredc9.asm0000644023561000001540000001173212106741272013214 00000000000000# mp_limb_t mulredc9(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc9 TYPE(GSYM_PREFIX`'mulredc9,`function') GSYM_PREFIX`'mulredc9: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $80, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) ########################################### movl $9, 76(%esp) .align 32 Loop: ## compute u and store in %ebp movl 104(%esp), %eax movl 108(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 116(%esp) movl %eax, %ebp movl 112(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 9 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 36(%edi) adcl $0, 40(%edi) movl 104(%esp), %eax movl (%eax), %ebp movl 108(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 9 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 36(%edi) adcl $0, 40(%edi) addl $4, 104(%esp) addl $4, %edi decl 76(%esp) jnz Loop ########################################### ### Copy result in z movl 100(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax # carry addl $80, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/mulredc13.asm0000644023561000001540000001455712106741272013277 00000000000000# mp_limb_t mulredc13(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc13 TYPE(GSYM_PREFIX`'mulredc13,`function') GSYM_PREFIX`'mulredc13: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $112, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) ########################################### movl $13, 108(%esp) .align 32 Loop: ## compute u and store in %ebp movl 136(%esp), %eax movl 140(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 148(%esp) movl %eax, %ebp movl 144(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 13 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 52(%edi) adcl $0, 56(%edi) movl 136(%esp), %eax movl (%eax), %ebp movl 140(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 13 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 52(%edi) adcl $0, 56(%edi) addl $4, 136(%esp) addl $4, %edi decl 108(%esp) jnz Loop ########################################### ### Copy result in z movl 132(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax # carry addl $112, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/mulredc12.asm0000644023561000001540000001401312106741272013261 00000000000000# mp_limb_t mulredc12(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc12 TYPE(GSYM_PREFIX`'mulredc12,`function') GSYM_PREFIX`'mulredc12: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $104, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) ########################################### movl $12, 100(%esp) .align 32 Loop: ## compute u and store in %ebp movl 128(%esp), %eax movl 132(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 140(%esp) movl %eax, %ebp movl 136(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 12 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 48(%edi) adcl $0, 52(%edi) movl 128(%esp), %eax movl (%eax), %ebp movl 132(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 12 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 48(%edi) adcl $0, 52(%edi) addl $4, 128(%esp) addl $4, %edi decl 100(%esp) jnz Loop ########################################### ### Copy result in z movl 124(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax # carry addl $104, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/Makefile.in0000644023561000001540000003525212113353770013036 00000000000000# Makefile.in generated by automake 1.11.3 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = pentium4 DIST_COMMON = README $(noinst_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = LTLIBRARIES = $(noinst_LTLIBRARIES) libmulredc_la_LIBADD = am__objects_1 = mulredc1.lo mulredc2.lo mulredc3.lo mulredc4.lo \ mulredc5.lo mulredc6.lo mulredc7.lo mulredc8.lo mulredc9.lo \ mulredc10.lo mulredc11.lo mulredc12.lo mulredc13.lo \ mulredc14.lo mulredc15.lo mulredc16.lo mulredc17.lo \ mulredc18.lo mulredc19.lo mulredc20.lo am_libmulredc_la_OBJECTS = $(am__objects_1) redc.lo libmulredc_la_OBJECTS = $(am_libmulredc_la_OBJECTS) DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) CCASCOMPILE = $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS) LTCCASCOMPILE = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=compile $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS) COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) CCLD = $(CC) LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ SOURCES = $(libmulredc_la_SOURCES) DIST_SOURCES = $(libmulredc_la_SOURCES) HEADERS = $(noinst_HEADERS) ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ ALLOCA = @ALLOCA@ AMTAR = @AMTAR@ AR = @AR@ ASMPATH = @ASMPATH@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCAS = @CCAS@ CCASDEPMODE = @CCASDEPMODE@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GMPLIB = @GMPLIB@ GREP = @GREP@ GSL_LD_FLAGS = @GSL_LD_FLAGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LD = @LD@ LDFLAGS = LIBOBJS = @LIBOBJS@ # The asm code does not depend on any libraries except libc for abort() # if assertions are enabled LIBS = LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ M4 = @M4@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ POW_LIB = @POW_LIB@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VALGRIND = @VALGRIND@ VERSION = @VERSION@ XSLDIR = @XSLDIR@ XSLTPROC = @XSLTPROC@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ MULREDC = mulredc1.asm mulredc2.asm mulredc3.asm mulredc4.asm mulredc5.asm \ mulredc6.asm mulredc7.asm mulredc8.asm mulredc9.asm mulredc10.asm \ mulredc11.asm mulredc12.asm mulredc13.asm mulredc14.asm \ mulredc15.asm mulredc16.asm mulredc17.asm mulredc18.asm \ mulredc19.asm mulredc20.asm EXTRA_DIST = Makefile.dev README autogen.py generate_all noinst_LTLIBRARIES = libmulredc.la noinst_HEADERS = mulredc.h # This library definition also causes the mulredc[n].asm and redc.asm files # to go in the distribution - no need for having them in EXTRA_DIST libmulredc_la_SOURCES = $(MULREDC) redc.asm # It's actually the .s files that depend on config.m4, but automake # knows them only as intermediate files, not as targets. Adding the # dependency to libmulredc.la should work so long as no stale .s # files exist. libmulredc_la_DEPENDENCIES = $(top_builddir)/config.m4 all: all-am .SUFFIXES: .SUFFIXES: .S .asm .lo .o .obj .s $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu pentium4/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu pentium4/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-noinstLTLIBRARIES: -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ test "$$dir" != "$$p" || dir=.; \ echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done libmulredc.la: $(libmulredc_la_OBJECTS) $(libmulredc_la_DEPENDENCIES) $(EXTRA_libmulredc_la_DEPENDENCIES) $(LINK) $(libmulredc_la_OBJECTS) $(libmulredc_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c .s.o: $(CCASCOMPILE) -c -o $@ $< .s.obj: $(CCASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .s.lo: $(LTCCASCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ mkid -fID $$unique tags: TAGS TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) set x; \ here=`pwd`; \ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: CTAGS CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(LTLIBRARIES) $(HEADERS) installdirs: install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: .MAKE: install-am install-strip .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ clean-libtool clean-noinstLTLIBRARIES ctags distclean \ distclean-compile distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ pdf pdf-am ps ps-am tags uninstall uninstall-am .asm.s: $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.s .asm.S: $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.S # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: ecm-6.4.4/pentium4/mulredc16.asm0000644023561000001540000001663312106741272013277 00000000000000# mp_limb_t mulredc16(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc16 TYPE(GSYM_PREFIX`'mulredc16,`function') GSYM_PREFIX`'mulredc16: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $136, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) movl $0, 108(%edi) movl $0, 112(%edi) movl $0, 116(%edi) movl $0, 120(%edi) movl $0, 124(%edi) movl $0, 128(%edi) ########################################### movl $16, 132(%esp) .align 32 Loop: ## compute u and store in %ebp movl 160(%esp), %eax movl 164(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 172(%esp) movl %eax, %ebp movl 168(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 16 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd 52(%esi), %mm1 movd 52(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 52(%edi) psrlq $32, %mm0 movd 56(%esi), %mm1 movd 56(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 56(%edi) psrlq $32, %mm0 movd 60(%esi), %mm1 movd 60(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 60(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 64(%edi) adcl $0, 68(%edi) movl 160(%esp), %eax movl (%eax), %ebp movl 164(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 16 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd 52(%esi), %mm1 movd 52(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 52(%edi) psrlq $32, %mm0 movd 56(%esi), %mm1 movd 56(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 56(%edi) psrlq $32, %mm0 movd 60(%esi), %mm1 movd 60(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 60(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 64(%edi) adcl $0, 68(%edi) addl $4, 160(%esp) addl $4, %edi decl 132(%esp) jnz Loop ########################################### ### Copy result in z movl 156(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax movl %eax, 52(%ebx) movl 56(%edi), %eax movl %eax, 56(%ebx) movl 60(%edi), %eax movl %eax, 60(%ebx) movl 64(%edi), %eax # carry addl $136, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/mulredc1.asm0000644023561000001540000000243612106741272013205 00000000000000# # mp_limb_t mulredc1(mp_limb_t *z, const mp_limb_t x, const mp_limb_t y, # const mp_limb_t m, mp_limb_t inv_m) # # Compute z := x*y mod m, in Montgomery representation, where x, y < m # and m is n limb wide. inv_m is the less significant limb of the # inverse of m modulo 2^(n*GMP_LIMB_BITS) # # The result might be unreduced (larger than m) but becomes reduced # after subtracting m. The calling function should take care of that. # # We use a temporary space for unreduced product on the stack. # Therefore, this can not be used for large integers (anyway, the # algorithm is quadratic). # # WARNING: z is only n limbs but since it might be unreduced, there # could be a carry that does not fit in z. This carry is returned. include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc1 TYPE(GSYM_PREFIX`'mulredc1,`function') GSYM_PREFIX`'mulredc1: # Stack: # inv_m 20(%esp) # m 16 # y 12(%esp) # x 8 # z 4(%esp) movl 12(%esp), %eax mull 8(%esp) movl %edx, 12(%esp) movl %eax, 8(%esp) # store xy in [8(%esp):12(%esp)] mull 20(%esp) # compute u mull 16(%esp) # compute u*m addl 8(%esp), %eax # eax is 0, now (carry is important) adcl 12(%esp), %edx movl 4(%esp), %ecx movl %edx, (%ecx) adcl $0, %eax ret ecm-6.4.4/pentium4/mulredc15.asm0000644023561000001540000001606712106741272013277 00000000000000# mp_limb_t mulredc15(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc15 TYPE(GSYM_PREFIX`'mulredc15,`function') GSYM_PREFIX`'mulredc15: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $128, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) movl $0, 92(%edi) movl $0, 96(%edi) movl $0, 100(%edi) movl $0, 104(%edi) movl $0, 108(%edi) movl $0, 112(%edi) movl $0, 116(%edi) movl $0, 120(%edi) ########################################### movl $15, 124(%esp) .align 32 Loop: ## compute u and store in %ebp movl 152(%esp), %eax movl 156(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 164(%esp) movl %eax, %ebp movl 160(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 15 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd 52(%esi), %mm1 movd 52(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 52(%edi) psrlq $32, %mm0 movd 56(%esi), %mm1 movd 56(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 56(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 60(%edi) adcl $0, 64(%edi) movl 152(%esp), %eax movl (%eax), %ebp movl 156(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 15 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd 44(%esi), %mm1 movd 44(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 44(%edi) psrlq $32, %mm0 movd 48(%esi), %mm1 movd 48(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 48(%edi) psrlq $32, %mm0 movd 52(%esi), %mm1 movd 52(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 52(%edi) psrlq $32, %mm0 movd 56(%esi), %mm1 movd 56(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 56(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 60(%edi) adcl $0, 64(%edi) addl $4, 152(%esp) addl $4, %edi decl 124(%esp) jnz Loop ########################################### ### Copy result in z movl 148(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax movl %eax, 44(%ebx) movl 48(%edi), %eax movl %eax, 48(%ebx) movl 52(%edi), %eax movl %eax, 52(%ebx) movl 56(%edi), %eax movl %eax, 56(%ebx) movl 60(%edi), %eax # carry addl $128, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/mulredc10.asm0000644023561000001540000001250312106741272013261 00000000000000# mp_limb_t mulredc10(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc10 TYPE(GSYM_PREFIX`'mulredc10,`function') GSYM_PREFIX`'mulredc10: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $88, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) ########################################### movl $10, 84(%esp) .align 32 Loop: ## compute u and store in %ebp movl 112(%esp), %eax movl 116(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 124(%esp) movl %eax, %ebp movl 120(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 10 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 40(%edi) adcl $0, 44(%edi) movl 112(%esp), %eax movl (%eax), %ebp movl 116(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 10 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 40(%edi) adcl $0, 44(%edi) addl $4, 112(%esp) addl $4, %edi decl 84(%esp) jnz Loop ########################################### ### Copy result in z movl 108(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax # carry addl $88, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/Makefile.am0000644023561000001540000000224712106741272013023 00000000000000MULREDC = mulredc1.asm mulredc2.asm mulredc3.asm mulredc4.asm mulredc5.asm \ mulredc6.asm mulredc7.asm mulredc8.asm mulredc9.asm mulredc10.asm \ mulredc11.asm mulredc12.asm mulredc13.asm mulredc14.asm \ mulredc15.asm mulredc16.asm mulredc17.asm mulredc18.asm \ mulredc19.asm mulredc20.asm EXTRA_DIST = Makefile.dev README autogen.py generate_all noinst_LTLIBRARIES = libmulredc.la noinst_HEADERS = mulredc.h # This library definition also causes the mulredc[n].asm and redc.asm files # to go in the distribution - no need for having them in EXTRA_DIST libmulredc_la_SOURCES = $(MULREDC) redc.asm # It's actually the .s files that depend on config.m4, but automake # knows them only as intermediate files, not as targets. Adding the # dependency to libmulredc.la should work so long as no stale .s # files exist. libmulredc_la_DEPENDENCIES = $(top_builddir)/config.m4 # The asm code does not depend on any libraries except libc for abort() # if assertions are enabled LIBS = LDFLAGS = .asm.s: $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.s .asm.S: $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.S ecm-6.4.4/pentium4/mulredc4.asm0000644023561000001540000000635012106741272013207 00000000000000# mp_limb_t mulredc4(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc4 TYPE(GSYM_PREFIX`'mulredc4,`function') GSYM_PREFIX`'mulredc4: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $40, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) ########################################### movl $4, 36(%esp) .align 32 Loop: ## compute u and store in %ebp movl 64(%esp), %eax movl 68(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 76(%esp) movl %eax, %ebp movl 72(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 4 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 16(%edi) adcl $0, 20(%edi) movl 64(%esp), %eax movl (%eax), %ebp movl 68(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 4 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 16(%edi) adcl $0, 20(%edi) addl $4, 64(%esp) addl $4, %edi decl 36(%esp) jnz Loop ########################################### ### Copy result in z movl 60(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax # carry addl $40, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/mulredc8.asm0000644023561000001540000001116412106741272013212 00000000000000# mp_limb_t mulredc8(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc8 TYPE(GSYM_PREFIX`'mulredc8,`function') GSYM_PREFIX`'mulredc8: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $72, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) ########################################### movl $8, 68(%esp) .align 32 Loop: ## compute u and store in %ebp movl 96(%esp), %eax movl 100(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 108(%esp) movl %eax, %ebp movl 104(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 8 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 32(%edi) adcl $0, 36(%edi) movl 96(%esp), %eax movl (%eax), %ebp movl 100(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 8 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 32(%edi) adcl $0, 36(%edi) addl $4, 96(%esp) addl $4, %edi decl 68(%esp) jnz Loop ########################################### ### Copy result in z movl 92(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax # carry addl $72, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/README0000644023561000001540000000133412106741272011643 00000000000000mulredc[1..20].s are size-specific asm code for mulredc. These are generated by the Python script autogen.py. In order to avoid dependency of the package to Python, this generation is not done automatically with the autoconf/automake stuff. If you need to regenerate them, the syntax is ./autogen.py 3 > mulredc3.s And you can generate all of them with the shell script ./generate_all This asm code uses MMX/SSE2 instructions and might not work on old x86 computers. If you have this problem, you should reconfigure with the --disable-asm-redc option. redc.asm is a version of redc separated from the multiplication, since there are cases where it is needed. test_mulredc.c, bench.c and the Makefile are for developpement. ecm-6.4.4/pentium4/mulredc11.asm0000644023561000001540000001324512106741272013266 00000000000000# mp_limb_t mulredc11(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc11 TYPE(GSYM_PREFIX`'mulredc11,`function') GSYM_PREFIX`'mulredc11: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $96, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) movl $0, 60(%edi) movl $0, 64(%edi) movl $0, 68(%edi) movl $0, 72(%edi) movl $0, 76(%edi) movl $0, 80(%edi) movl $0, 84(%edi) movl $0, 88(%edi) ########################################### movl $11, 92(%esp) .align 32 Loop: ## compute u and store in %ebp movl 120(%esp), %eax movl 124(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 132(%esp) movl %eax, %ebp movl 128(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 11 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 44(%edi) adcl $0, 48(%edi) movl 120(%esp), %eax movl (%eax), %ebp movl 124(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 11 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd 28(%esi), %mm1 movd 28(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 28(%edi) psrlq $32, %mm0 movd 32(%esi), %mm1 movd 32(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 32(%edi) psrlq $32, %mm0 movd 36(%esi), %mm1 movd 36(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 36(%edi) psrlq $32, %mm0 movd 40(%esi), %mm1 movd 40(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 40(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 44(%edi) adcl $0, 48(%edi) addl $4, 120(%esp) addl $4, %edi decl 92(%esp) jnz Loop ########################################### ### Copy result in z movl 116(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax movl %eax, 28(%ebx) movl 32(%edi), %eax movl %eax, 32(%ebx) movl 36(%edi), %eax movl %eax, 36(%ebx) movl 40(%edi), %eax movl %eax, 40(%ebx) movl 44(%edi), %eax # carry addl $96, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/pentium4/mulredc7.asm0000644023561000001540000001041712106741272013211 00000000000000# mp_limb_t mulredc7(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Stack: # inv_m ## parameters # m # y # x # z (4*(2k+7))%esp # ??? (1 limb???) # ebp ## pushed registers (4*(2k+5))%esp # edi # esi # ebx # ... ## counter (1 mp_limb_t) (4*(2k+1))%esp # ... ## tmp space (2*k+1 mp_limb_t) include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc7 TYPE(GSYM_PREFIX`'mulredc7,`function') GSYM_PREFIX`'mulredc7: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $64, %esp movl %esp, %edi ### set tmp[0..2k+1[ to 0 movl $0, (%edi) movl $0, 4(%edi) movl $0, 8(%edi) movl $0, 12(%edi) movl $0, 16(%edi) movl $0, 20(%edi) movl $0, 24(%edi) movl $0, 28(%edi) movl $0, 32(%edi) movl $0, 36(%edi) movl $0, 40(%edi) movl $0, 44(%edi) movl $0, 48(%edi) movl $0, 52(%edi) movl $0, 56(%edi) ########################################### movl $7, 60(%esp) .align 32 Loop: ## compute u and store in %ebp movl 88(%esp), %eax movl 92(%esp), %esi movl (%eax), %eax mull (%esi) addl (%edi), %eax mull 100(%esp) movl %eax, %ebp movl 96(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 7 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 28(%edi) adcl $0, 32(%edi) movl 88(%esp), %eax movl (%eax), %ebp movl 92(%esp), %esi ### addmul1: src[0] is (%esi) ### dst[0] is (%edi) ### mult is %ebp ### k is 7 ### kills %eax, %edx and mmx regs ### dst[0,k[ += mult*src[0,k[ plus carry put in ecx pxor %mm0, %mm0 movd %ebp, %mm7 movd (%esi), %mm1 movd (%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, (%edi) psrlq $32, %mm0 movd 4(%esi), %mm1 movd 4(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 4(%edi) psrlq $32, %mm0 movd 8(%esi), %mm1 movd 8(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 8(%edi) psrlq $32, %mm0 movd 12(%esi), %mm1 movd 12(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 12(%edi) psrlq $32, %mm0 movd 16(%esi), %mm1 movd 16(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 16(%edi) psrlq $32, %mm0 movd 20(%esi), %mm1 movd 20(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 20(%edi) psrlq $32, %mm0 movd 24(%esi), %mm1 movd 24(%edi), %mm2 pmuludq %mm7, %mm1 paddq %mm1, %mm2 paddq %mm2, %mm0 movd %mm0, 24(%edi) psrlq $32, %mm0 movd %mm0, %ecx ### carry limb is in %ecx addl %ecx, 28(%edi) adcl $0, 32(%edi) addl $4, 88(%esp) addl $4, %edi decl 60(%esp) jnz Loop ########################################### ### Copy result in z movl 84(%esp), %ebx movl (%edi), %eax movl %eax, (%ebx) movl 4(%edi), %eax movl %eax, 4(%ebx) movl 8(%edi), %eax movl %eax, 8(%ebx) movl 12(%edi), %eax movl %eax, 12(%ebx) movl 16(%edi), %eax movl %eax, 16(%ebx) movl 20(%edi), %eax movl %eax, 20(%ebx) movl 24(%edi), %eax movl %eax, 24(%ebx) movl 28(%edi), %eax # carry addl $64, %esp popl %ebx popl %esi popl %edi popl %ebp emms ret ecm-6.4.4/sp.c0000644023561000001540000000550312106741273010007 00000000000000/* sp.c - "small prime" functions that don't need to be inlined Copyright 2005, 2006, 2007, 2008, 2009, 2010 Dave Newman, Jason Papadopoulos, Alexander Kruppa, Paul Zimmermann. The SP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The SP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the SP Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include /* for stderr */ #include #include "sp.h" /* Test if m is a base "a" strong probable prime */ int sp_spp (sp_t a, sp_t m, sp_t d) { sp_t r, s, t, e; if (m == a) return 1; /* Set e * 2^s = m-1, e odd */ for (s = 0, e = m - 1; !(e & 1); s++, e >>= 1); t = sp_pow (a, e, m, d); if (t == 1) return 1; for (r = 0; r < s; r++) { if (t == m - 1) return 1; t = sp_sqr (t, m, d); } return 0; } /* Test if x is a prime, return 1 if it is. Note this only works on sp's, i.e. we need the top bit of x set */ int sp_prime (sp_t x) { sp_t d; if (!(x & 1)) return 0; if (x < SP_MIN) return 1; sp_reciprocal (d, x); if (SP_NUMB_BITS <= 32) { /* 32-bit primality test * See http://primes.utm.edu/prove/prove2_3.html */ if (!sp_spp (2, x, d) || !sp_spp (7, x, d) || !sp_spp (61, x, d)) return 0; } else { ASSERT (SP_NUMB_BITS <= 64); /* 64-bit primality test * follows from results by Jaeschke, "On strong pseudoprimes to several * bases" Math. Comp. 61 (1993) p916 */ if (!sp_spp (2, x, d) || !sp_spp (3, x, d) || !sp_spp (5, x, d) || !sp_spp (7, x, d) || !sp_spp (11, x, d) || !sp_spp (13, x, d) || !sp_spp (17, x, d) || ! sp_spp (19, x, d) || !sp_spp (23, x, d) || !sp_spp (29, x, d)) return 0; } return 1; } #define CACHE_LINE_SIZE 64 void * sp_aligned_malloc (size_t len) { void *ptr, *aligned_ptr; size_t addr; ptr = malloc (len + CACHE_LINE_SIZE); if (ptr == NULL) return NULL; addr = (size_t)ptr; addr = CACHE_LINE_SIZE - (addr % CACHE_LINE_SIZE); aligned_ptr = (void *)((char *)ptr + addr); *( (void **)aligned_ptr - 1 ) = ptr; return aligned_ptr; } void sp_aligned_free (void *newptr) { void *ptr; if (newptr == NULL) return; ptr = *( (void **)newptr - 1 ); free (ptr); } ecm-6.4.4/ecm-params.h.mips64el0000644023561000001540000000153012106741273013055 00000000000000/* those parameters were obtained on gcc42.fsffrance.org with ecm-6.4.1-rc3 gmp-5.0.2, and gcc 4.3.1 -O2 -mabi=n32 (mips64el-unknown-linux-gnu) */ /* 0:mulredc 1:mul+redc_1 2:mul+redc_2 3:mul+redc_n */ #define TUNE_MULREDC_TABLE {0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1} /* 0:mulredc 1:sqr+redc_1 2:sqr+redc_2 3:sqr+redc_n */ #define TUNE_SQRREDC_TABLE {0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1} #define MPZMOD_THRESHOLD 23 #define REDC_THRESHOLD 512 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 12 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 13 #define MUL_NTT_THRESHOLD 1024 #define PREREVERTDIVISION_NTT_THRESHOLD 16 #define POLYINVERT_NTT_THRESHOLD 256 #define POLYEVALT_NTT_THRESHOLD 256 #define MPZSPV_NORMALISE_STRIDE 128 ecm-6.4.4/ecm-params.h.alpha-ev50000644023561000001540000000071112106741273013174 00000000000000#define MPZMOD_THRESHOLD 86 #define REDC_THRESHOLD 182 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 8, 10, 1, 1, 1, 1, 1, 9, 1, 1, 1, 1, 13, 1, 1, 1, 1, 1, 1, 1, 17} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 11 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 11 #define MUL_NTT_THRESHOLD 1024 #define PREREVERTDIVISION_NTT_THRESHOLD 512 #define POLYINVERT_NTT_THRESHOLD 2048 #define POLYEVALT_NTT_THRESHOLD 512 #define MPZSPV_NORMALISE_STRIDE 128 ecm-6.4.4/rho.c0000644023561000001540000006443212106741273010163 00000000000000/* Dickman's rho function (to compute probability of success of ecm). Copyright 2004, 2005, 2006, 2008, 2009, 2010, 2011 Alexander Kruppa, Paul Zimmermann. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "config.h" #if defined(TESTDRIVE) #define _ISOC99_SOURCE 1 #endif #if defined(DEBUG_NUMINTEGRATE) || defined(TESTDRIVE) # include #endif #include #include #if defined(TESTDRIVE) #include #include "primegen.h" #endif #if defined(TESTDRIVE) && defined(HAVE_LIBGSL) #include #include #include #endif #include "ecm-impl.h" /* For Suyama's curves, we have a known torsion factor of 12 = 2^2*3^1, and an average extra exponent of 1/2 for 2, and 1/3 for 3 due to the probability that the group order divided by 12 is divisible by 2 or 3, thus on average we should have 2^2.5*3^1.333 ~ 24.5, however experimentally we have 2^3.323*3^1.687 ~ 63.9 (see Alexander Kruppa's thesis, Table 5.1 page 96, row sigma=2, http://tel.archives-ouvertes.fr/tel-00477005/en/). The exp(ECM_EXTRA_SMOOTHNESS) value takes into account the extra smoothness with respect to a random number. */ #ifndef ECM_EXTRA_SMOOTHNESS #define ECM_EXTRA_SMOOTHNESS 3.134 #endif #define M_PI_SQR 9.869604401089358619 /* Pi^2 */ #define M_PI_SQR_6 1.644934066848226436 /* Pi^2/6 */ /* gsl_math.h defines M_EULER */ #ifndef M_EULER #define M_EULER 0.577215664901532861 #endif #define M_EULER_1 0.422784335098467139 /* 1 - Euler */ #ifndef MAX #define MAX(x,y) ((x) > (y) ? (x) : (y)) #endif #ifndef MIN #define MIN(x,y) ((x) < (y) ? (x) : (y)) #endif void rhoinit (int, int); /* used in stage2.c */ static double *rhotable = NULL; static int invh = 0; static double h = 0.; static int tablemax = 0; #if defined(TESTDRIVE) #define PRIME_PI_MAX 10000 #define PRIME_PI_MAP(x) (((x)+1)/2) /* The number of primes up to i. Use prime_pi[PRIME_PI_MAP(i)]. Only correct for i >= 2. */ static unsigned int prime_pi[PRIME_PI_MAP(PRIME_PI_MAX)+1]; #endif /* Fixme: need prime generating funcion without static state variables */ const unsigned char primemap[667] = { 254, 223, 239, 126, 182, 219, 61, 249, 213, 79, 30, 243, 234, 166, 237, 158, 230, 12, 211, 211, 59, 221, 89, 165, 106, 103, 146, 189, 120, 30, 166, 86, 86, 227, 173, 45, 222, 42, 76, 85, 217, 163, 240, 159, 3, 84, 161, 248, 46, 253, 68, 233, 102, 246, 19, 58, 184, 76, 43, 58, 69, 17, 191, 84, 140, 193, 122, 179, 200, 188, 140, 79, 33, 88, 113, 113, 155, 193, 23, 239, 84, 150, 26, 8, 229, 131, 140, 70, 114, 251, 174, 101, 146, 143, 88, 135, 210, 146, 216, 129, 101, 38, 227, 160, 17, 56, 199, 38, 60, 129, 235, 153, 141, 81, 136, 62, 36, 243, 51, 77, 90, 139, 28, 167, 42, 180, 88, 76, 78, 38, 246, 25, 130, 220, 131, 195, 44, 241, 56, 2, 181, 205, 205, 2, 178, 74, 148, 12, 87, 76, 122, 48, 67, 11, 241, 203, 68, 108, 36, 248, 25, 1, 149, 168, 92, 115, 234, 141, 36, 150, 43, 80, 166, 34, 30, 196, 209, 72, 6, 212, 58, 47, 116, 156, 7, 106, 5, 136, 191, 104, 21, 46, 96, 85, 227, 183, 81, 152, 8, 20, 134, 90, 170, 69, 77, 73, 112, 39, 210, 147, 213, 202, 171, 2, 131, 97, 5, 36, 206, 135, 34, 194, 169, 173, 24, 140, 77, 120, 209, 137, 22, 176, 87, 199, 98, 162, 192, 52, 36, 82, 174, 90, 64, 50, 141, 33, 8, 67, 52, 182, 210, 182, 217, 25, 225, 96, 103, 26, 57, 96, 208, 68, 122, 148, 154, 9, 136, 131, 168, 116, 85, 16, 39, 161, 93, 104, 30, 35, 200, 50, 224, 25, 3, 68, 115, 72, 177, 56, 195, 230, 42, 87, 97, 152, 181, 28, 10, 104, 197, 129, 143, 172, 2, 41, 26, 71, 227, 148, 17, 78, 100, 46, 20, 203, 61, 220, 20, 197, 6, 16, 233, 41, 177, 130, 233, 48, 71, 227, 52, 25, 195, 37, 10, 48, 48, 180, 108, 193, 229, 70, 68, 216, 142, 76, 93, 34, 36, 112, 120, 146, 137, 129, 130, 86, 38, 27, 134, 233, 8, 165, 0, 211, 195, 41, 176, 194, 74, 16, 178, 89, 56, 161, 29, 66, 96, 199, 34, 39, 140, 200, 68, 26, 198, 139, 130, 129, 26, 70, 16, 166, 49, 9, 240, 84, 47, 24, 210, 216, 169, 21, 6, 46, 12, 246, 192, 14, 80, 145, 205, 38, 193, 24, 56, 101, 25, 195, 86, 147, 139, 42, 45, 214, 132, 74, 97, 10, 165, 44, 9, 224, 118, 196, 106, 60, 216, 8, 232, 20, 102, 27, 176, 164, 2, 99, 54, 16, 49, 7, 213, 146, 72, 66, 18, 195, 138, 160, 159, 45, 116, 164, 130, 133, 120, 92, 13, 24, 176, 97, 20, 29, 2, 232, 24, 18, 193, 1, 73, 28, 131, 48, 103, 51, 161, 136, 216, 15, 12, 244, 152, 136, 88, 215, 102, 66, 71, 177, 22, 168, 150, 8, 24, 65, 89, 21, 181, 68, 42, 82, 225, 179, 170, 161, 89, 69, 98, 85, 24, 17, 165, 12, 163, 60, 103, 0, 190, 84, 214, 10, 32, 54, 107, 130, 12, 21, 8, 126, 86, 145, 1, 120, 208, 97, 10, 132, 168, 44, 1, 87, 14, 86, 160, 80, 11, 152, 140, 71, 108, 32, 99, 16, 196, 9, 228, 12, 87, 136, 11, 117, 11, 194, 82, 130, 194, 57, 36, 2, 44, 86, 37, 122, 49, 41, 214, 163, 32, 225, 177, 24, 176, 12, 138, 50, 193, 17, 50, 9, 197, 173, 48, 55, 8, 188, 145, 130, 207, 32, 37, 107, 156, 48, 143, 68, 38, 70, 106, 7, 73, 142, 9, 88, 16, 2, 37, 197, 196, 66, 90, 128, 160, 128, 60, 144, 40, 100, 20, 225, 3, 132, 81, 12, 46, 163, 138, 164, 8, 192, 71, 126, 211, 43, 3, 205, 84, 42, 0, 4, 179, 146, 108, 66, 41, 76, 131, 193, 146, 204, 28}; #ifdef TESTDRIVE unsigned long gcd (unsigned long a, unsigned long b) { unsigned long t; while (b != 0) { t = a % b; a = b; b = t; } return a; } unsigned long eulerphi (unsigned long n) { unsigned long phi = 1, p; for (p = 2; p * p <= n; p += 2) { if (n % p == 0) { phi *= p - 1; n /= p; while (n % p == 0) { phi *= p; n /= p; } } if (p == 2) p--; } /* now n is prime */ return (n == 1) ? phi : phi * (n - 1); } /* The number of positive integers up to x that have no prime factor up to y, for x >= y >= 2. Uses Buchstab's identity */ unsigned long Buchstab_Phi(unsigned long x, unsigned long y) { unsigned long p, s; primegen pg[1]; if (x < 1) return 0; if (x <= y) return 1; #if 0 if (x < y^2) return(1 + primepi(x) - primepi (y))); #endif s = 1; primegen_init (pg); primegen_skipto (pg, y + 1); for (p = primegen_next(pg); p <= x; p = primegen_next(pg)) s += Buchstab_Phi(x / p, p - 1); return (s); } /* The number of positive integers up to x that have no prime factor greter than y, for x >= y >= 2. Uses Buchstab's identity */ unsigned long Buchstab_Psi(const unsigned long x, const unsigned long y) { unsigned long r, p; primegen pg[1]; if (x <= y) return (x); if (y == 1UL) return (1); /* If y^2 > x, then Psi(x,y) = x - \sum_{y < p < x, p prime} floor(x/p) We separate the sum into ranges where floor(x/p) = k, which is x/(k+1) < p <= x/k. We also need to satisfy y < p, so we need k < x/y - 1, or k_max = ceil (x/y) - 2. The primes y < p <= x/(k_max + 1) are summed separately. */ if (x <= PRIME_PI_MAX && x < y * y) { unsigned long kmax = x / y - 1; unsigned long s1, s2, k; s1 = (kmax + 1) * (prime_pi [PRIME_PI_MAP(x / (kmax + 1))] - prime_pi [PRIME_PI_MAP(y)]); s2 = 0; for (k = 1; k <= kmax; k++) s2 += prime_pi[PRIME_PI_MAP(x / k)]; s2 -= kmax * prime_pi [PRIME_PI_MAP(x / (kmax+1))]; return (x - s1 - s2); } r = 1; primegen_init (pg); for (p = primegen_next(pg); p <= y; p = primegen_next(pg)) r += Buchstab_Psi (x / p, p); return (r); } #endif /* TESTDRIVE */ #if defined(TESTDRIVE) && defined(HAVE_LIBGSL) static double Li (const double x) { return (- gsl_sf_expint_E1 (- log(x))); } #endif /* Evaluate dilogarithm via the sum \Li_{2}(z)=\sum_{k=1}^{\infty} \frac{z^k}{k^2}, see http://mathworld.wolfram.com/Dilogarithm.html Assumes |z| <= 0.5, for which the sum converges quickly. */ static double dilog_series (const double z) { double r = 0.0, zk; /* zk = z^k */ int k, k2; /* k2 = k^2 */ /* Doubles have 53 bits in significand, with |z| <= 0.5 the k+1-st term is <= 1/(2^k k^2) of the result, so 44 terms should do */ for (k = 1, k2 = 1, zk = z; k <= 44; k2 += 2 * k + 1, k++, zk *= z) r += zk / (double) k2; return r; } static double dilog (double x) { ASSERT(x <= -1.0); /* dilog(1-x) is called from rhoexact for 2 < x <= 3 */ if (x <= -2.0) return -dilog_series (1./x) - M_PI_SQR_6 - 0.5 * log(-1./x) * log(-1./x); else /* x <= -1.0 */ { /* L2(z) = -L2(1 - z) + 1/6 * Pi^2 - ln(1 - z)*ln(z) L2(z) = -L2(1/z) - 1/6 * Pi^2 - 0.5*ln^2(-1/z) -> L2(z) = -(-L2(1/(1-z)) - 1/6 * Pi^2 - 0.5*ln^2(-1/(1-z))) + 1/6 * Pi^2 - ln(1 - z)*ln(z) = L2(1/(1-z)) - 1/6 * Pi^2 + 0.5*ln(1 - z)^2 - ln(1 - z)*ln(-z) z in [-1, -2) -> 1/(1-z) in [1/2, 1/3) */ double log1x = log (1. - x); return dilog_series (1. / (1. - x)) - M_PI_SQR_6 + log1x * (0.5 * log1x - log (-x)); } } #if 0 static double L2 (double x) { return log (x) * (1 - log (x-1)) + M_PI_SQR_6 - dilog (1 - x); } #endif static double rhoexact (double x) { ASSERT(x <= 3.); if (x <= 0.) return 0.; if (x <= 1.) return 1.; if (x <= 2.) return 1. - log (x); if (x <= 3.) /* 2 < x <= 3 thus -2 <= 1-x < -1 */ return 1. - log (x) * (1. - log (x - 1.)) + dilog (1. - x) + 0.5 * M_PI_SQR_6; return 0.; /* x > 3. and asserting not enabled: bail out with 0. */ } #if defined(TESTDRIVE) && defined(HAVE_LIBGSL) /* The Buchstab omega(x) function, exact for x <= 4 where it can be evaluated without numerical integration, and approximated by exp(gamma) for larger x. */ static double Buchstab_omega (const double x) { /* magic = dilog(-1) + 1 = Pi^2/12 + 1 */ const double magic = 1.82246703342411321824; if (x < 1.) return (0.); if (x <= 2.) return (1. / x); if (x <= 3.) return ((log (x - 1.) + 1.) / x); if (x <= 4.) return ((dilog(2. - x) + (1. + log(x - 2.)) * log(x - 1.) + magic) / x); /* If argument is out of range, return the limiting value for $x->\infty$: e^-gamma. For x only a little larger than 4., this has relative error 2.2e-6, for larger x the error rapidly drops further */ return 0.56145948356688516982; } #endif void rhoinit (int parm_invh, int parm_tablemax) { int i; if (parm_invh == invh && parm_tablemax == tablemax) return; if (rhotable != NULL) { free (rhotable); rhotable = NULL; invh = 0; h = 0.; tablemax = 0; } /* The integration below expects 3 * invh > 4 */ if (parm_tablemax == 0 || parm_invh < 2) return; invh = parm_invh; h = 1. / (double) invh; tablemax = parm_tablemax; rhotable = (double *) malloc (parm_invh * parm_tablemax * sizeof (double)); if (rhotable == NULL) { fprintf (stderr, "Cannot allocate memory in rhoinit\n"); exit (1); } for (i = 0; i < (3 < parm_tablemax ? 3 : parm_tablemax) * invh; i++) rhotable[i] = rhoexact (i * h); for (i = 3 * invh; i < parm_tablemax * invh; i++) { /* rho(i*h) = 1 - \int_{1}^{i*h} rho(x-1)/x dx = rho((i-4)*h) - \int_{(i-4)*h}^{i*h} rho(x-1)/x dx */ rhotable[i] = rhotable[i - 4] - 2. / 45. * ( 7. * rhotable[i - invh - 4] / (double)(i - 4) + 32. * rhotable[i - invh - 3] / (double)(i - 3) + 12. * rhotable[i - invh - 2] / (double)(i - 2) + 32. * rhotable[i - invh - 1] / (double)(i - 1) + 7. * rhotable[i - invh] / (double)i ); if (rhotable[i] < 0.) { #ifndef DEBUG_NUMINTEGRATE rhotable[i] = 0.; #else printf (stderr, "rhoinit: rhotable[%d] = %.16f\n", i, rhotable[i]); exit (EXIT_FAILURE); #endif } } } static double dickmanrho (double alpha) { if (alpha <= 3.) return rhoexact (alpha); if (alpha < tablemax) { int a = floor (alpha * invh); double rho1 = rhotable[a]; double rho2 = (a + 1) < tablemax * invh ? rhotable[a + 1] : 0; return rho1 + (rho2 - rho1) * (alpha * invh - (double)a); } return 0.; } #if 0 static double dickmanrhosigma (double alpha, double x) { if (alpha <= 0.) return 0.; if (alpha <= 1.) return 1.; if (alpha < tablemax) return dickmanrho (alpha) + M_EULER_1 * dickmanrho (alpha - 1.) / log (x); return 0.; } static double dickmanrhosigma_i (int ai, double x) { if (ai <= 0) return 0.; if (ai <= invh) return 1.; if (ai < tablemax * invh) return rhotable[ai] - M_EULER * rhotable[ai - invh] / log(x); return 0.; } #endif static double dickmanlocal (double alpha, double x) { if (alpha <= 0.) return 0.; if (alpha <= 1.) return 1.; if (alpha < tablemax) return dickmanrho (alpha) - M_EULER * dickmanrho (alpha - 1.) / log (x); return 0.; } static double dickmanlocal_i (int ai, double x) { if (ai <= 0) return 0.; if (ai <= invh) return 1.; if (ai <= 2 * invh && ai < tablemax * invh) return rhotable[ai] - M_EULER / log (x); if (ai < tablemax * invh) { double logx = log (x); return rhotable[ai] - (M_EULER * rhotable[ai - invh] + M_EULER_1 * rhotable[ai - 2 * invh] / logx) / logx; } return 0.; } static int isprime(unsigned long n) { unsigned int r; if (n % 2 == 0) return (n == 2); if (n % 3 == 0) return (n == 3); if (n % 5 == 0) return (n == 5); if (n / 30 >= sizeof (primemap)) abort(); r = n % 30; /* 8 possible values: 1,7,11,13,17,19,23,29 */ r = (r * 16 + r) / 64; /* maps the 8 values onto 0, ..., 7 */ return ((primemap[n / 30] & (1 << r)) != 0); } static double dickmanmu_sum (const unsigned long B1, const unsigned long B2, const double x) { double s = 0.; const double logB1 = 1. / log(B1); const double logx = log(x); unsigned long p; for (p = B1 + 1; p <= B2; p++) if (isprime(p)) s += dickmanlocal ((logx - log(p)) * logB1, x / p) / p; return (s); } static double dickmanmu (double alpha, double beta, double x) { double a, b, sum; int ai, bi, i; ai = ceil ((alpha - beta) * invh); if (ai > tablemax * invh) ai = tablemax * invh; a = (double) ai * h; bi = floor ((alpha - 1.) * invh); if (bi > tablemax * invh) bi = tablemax * invh; b = (double) bi * h; sum = 0.; for (i = ai + 1; i < bi; i++) sum += dickmanlocal_i (i, x) / (alpha - i * h); sum += 0.5 * dickmanlocal_i (ai, x) / (alpha - a); sum += 0.5 * dickmanlocal_i (bi, x) / (alpha - b); sum *= h; sum += (a - alpha + beta) * 0.5 * (dickmanlocal_i (ai, x) / (alpha - a) + dickmanlocal (alpha - beta, x) / beta); sum += (alpha - 1. - b) * 0.5 * (dickmanlocal (alpha - 1., x) + dickmanlocal_i (bi, x) / (alpha - b)); return sum; } static double brentsuyama (double B1, double B2, double N, double nr) { double a, alpha, beta, sum; int ai, i; alpha = log (N) / log (B1); beta = log (B2) / log (B1); ai = floor ((alpha - beta) * invh); if (ai > tablemax * invh) ai = tablemax * invh; a = (double) ai * h; sum = 0.; for (i = 1; i < ai; i++) sum += dickmanlocal_i (i, N) / (alpha - i * h) * (1 - exp (-nr * pow (B1, (-alpha + i * h)))); sum += 0.5 * (1 - exp(-nr / pow (B1, alpha))); sum += 0.5 * dickmanlocal_i (ai, N) / (alpha - a) * (1 - exp(-nr * pow (B1, (-alpha + a)))); sum *= h; sum += 0.5 * (alpha - beta - a) * (dickmanlocal_i (ai, N) / (alpha - a) + dickmanlocal (alpha - beta, N) / beta); return sum; } static double brsudickson (double B1, double B2, double N, double nr, int S) { int i, f; double sum; sum = 0; f = eulerphi (S) / 2; for (i = 1; i <= S / 2; i++) if (gcd (i, S) == 1) sum += brentsuyama (B1, B2, N, nr * (gcd (i - 1, S) + gcd (i + 1, S) - 4) / 2); return sum / (double)f; } static double brsupower (double B1, double B2, double N, double nr, int S) { int i, f; double sum; sum = 0; f = eulerphi (S); for (i = 1; i < S; i++) if (gcd (i, S) == 1) sum += brentsuyama (B1, B2, N, nr * (gcd (i - 1, S) - 2)); return sum / (double)f; } /* Assume N is as likely smooth as a number around N/exp(delta) */ static double prob (double B1, double B2, double N, double nr, int S, double delta) { const double sumthresh = 20000.; double alpha, beta, stage1, stage2, brsu; const double effN = N / exp (delta); ASSERT(rhotable != NULL); /* What to do if rhotable is not initialised and asserting is not enabled? For now, bail out with 0. result. Not really pretty, either */ if (rhotable == NULL) return 0.; if (B1 < 2. || N <= 1.) return 0.; if (effN <= B1) return 1.; #ifdef TESTDRIVE printf ("B1 = %f, B2 = %f, N = %.0f, nr = %f, S = %d\n", B1, B2, N, nr, S); #endif alpha = log (effN) / log (B1); stage1 = dickmanlocal (alpha, effN); stage2 = 0.; if (B2 > B1) { if (B1 < sumthresh) { stage2 += dickmanmu_sum (B1, MIN(B2, sumthresh), effN); beta = log (B2) / log (MIN(B2, sumthresh)); } else beta = log (B2) / log (B1); if (beta > 1.) stage2 += dickmanmu (alpha, beta, effN); } brsu = 0.; if (S < -1) brsu = brsudickson (B1, B2, effN, nr, -S * 2); if (S > 1) brsu = brsupower (B1, B2, effN, nr, S * 2); #ifdef TESTDRIVE printf ("stage 1 : %f, stage 2 : %f, Brent-Suyama : %f\n", stage1, stage2, brsu); #endif return (stage1 + stage2 + brsu) > 0. ? (stage1 + stage2 + brsu) : 0.; } double ecmprob (double B1, double B2, double N, double nr, int S) { return prob (B1, B2, N, nr, S, ECM_EXTRA_SMOOTHNESS); } double pm1prob (double B1, double B2, double N, double nr, int S, const mpz_t go) { mpz_t cof; /* A prime power q^k divides p-1, p prime, with probability 1/(q^k-q^(k-1)) not with probability 1/q^k as for random numbers. This is taken into account by the "smoothness" value here; a prime p-1 is about as likely smooth as a random number around (p-1)/exp(smoothness). smoothness = \sum_{q in Primes} log(q)/(q-1)^2 */ double smoothness = 1.2269688; unsigned long i; if (go != NULL && mpz_cmp_ui (go, 1UL) > 0) { mpz_init (cof); mpz_set (cof, go); for (i = 2; i < 100; i++) if (mpz_divisible_ui_p (cof, i)) { /* If we know that q divides p-1 with probability 1, we need to adjust the smoothness parameter */ smoothness -= log ((double) i) / (double) ((i-1)*(i-1)); /* printf ("pm1prob: Dividing out %lu\n", i); */ while (mpz_divisible_ui_p (cof, i)) mpz_tdiv_q_ui (cof, cof, i); } /* printf ("pm1prob: smoothness after dividing out go primes < 100: %f\n", smoothness); */ return prob (B1, B2, N, nr, S, smoothness + log(mpz_get_d (cof))); mpz_clear (cof); } return prob (B1, B2, N, nr, S, smoothness); } /* Compute probability for primes p == r (mod m) */ double pm1prob_rm (double B1, double B2, double N, double nr, int S, unsigned long r, unsigned long m) { unsigned long cof; double smoothness = 1.2269688; unsigned long p; cof = m; for (p = 2UL; p < 100UL; p++) if (cof % p == 0UL) /* For each prime in m */ { unsigned long cof_r, k, i; /* Divisibility by i is determined by r and m. We need to adjust the smoothness parameter. In P-1, we had estimated the expected value for the exponent of p as p/(p-1)^2. Undo that. */ smoothness -= (double)p / ((p-1)*(p-1)) * log ((double) p); /* The expected value for the exponent of this prime is k s.t. p^k || r, plus 1/(p-1) if p^k || m as well */ cof_r = gcd (r - 1UL, m); for (k = 0UL; cof_r % p == 0UL; k++) cof_r /= p; smoothness += k * log ((double) p); cof_r = m; for (i = 0UL; cof_r % p == 0UL; i++) cof_r /= p; if (i == k) smoothness += (1./(p - 1.) * log ((double) p)); while (cof % p == 0UL) cof /= p; printf ("pm1prob_rm: p = %lu, k = %lu, i = %lu, new smoothness = %f\n", p, i, k, smoothness); } return prob (B1, B2, N, nr, S, smoothness); } /* The \Phi(x,y) function gives the number of natural numbers <= x that have no prime factor <= y, see Tenenbaum, "Introduction the analytical and probabilistic number theory", III.6. This function estimates the \Phi(x,y) function via eq. (48) of the 1st edition resp. equation (6.49) of the 3rd edition of Tenenbaum's book. */ #if defined(TESTDRIVE) && defined(HAVE_LIBGSL) static double integrand1 (double x, double *y) { return pow (*y, x) / x * log(x-1.); } static double integrand2 (double v, double *y) { return Buchstab_omega (v) * pow (*y, v); } /* Return approximate number of integers n with x1 < n <= x2 that have no prime factor <= y */ double no_small_prime (double x1, double x2, double y) { double u1, u2; ASSERT (x1 >= 2.); ASSERT (x2 >= x1); ASSERT (y >= 2.); if (x1 == x2 || x2 <= y) return 0.; if (x1 < y) x1 = y; u1 = log(x1)/log(y); u2 = log(x2)/log(y); /* If no prime factors <= sqrt(x2), numbers must be a primes > y */ if (x2 <= y*y) return (Li(x2) - Li(x1)); if (u2 <= 3) { double r, abserr; size_t neval; gsl_function f; f.function = (double (*) (double, void *)) &integrand1; f.params = &y; /* intnum(v=1,u,buchstab(v)*y^v) */ /* First part: intnum(v=u1, u, y^v/v*log(v-1.)) */ gsl_integration_qng (&f, MAX(u1, 2.) , u2, 0., 0.001, &r, &abserr, &neval); /* Second part: intnum(v=u1, u2, y^v/v) = Li(x2) - Li(x1) */ r += Li (x2) - Li (x1); return r; } { double r, abserr; size_t neval; gsl_function f; f.function = (double (*) (double, void *)) &integrand2; f.params = &y; gsl_integration_qng (&f, u1, u2, 0., 0.001, &r, &abserr, &neval); return r; } } static double integrand3 (double p, double *param) { const double x1 = param[0]; const double x2 = param[1]; const double y = param[2]; return no_small_prime (x1 / p, x2 / p, y) / log(p); } double no_small_prime_factor (const double x1, const double x2, const double y, const double z1, const double z2) { double r, abserr, param[3]; size_t neval; gsl_function f; param[0] = x1; param[1] = x2; param[2] = y; f.function = (double (*) (double, void *)) &integrand3; f.params = ¶m; gsl_integration_qng (&f, z1, z2, 0., 0.01, &r, &abserr, &neval); return r; } #endif #ifdef TESTDRIVE int main (int argc, char **argv) { double B1, B2, N, nr, r, m; int S; unsigned long p, i, pi; primegen pg[1]; primegen_init (pg); i = pi = 0; for (p = primegen_next (pg); p <= PRIME_PI_MAX; p = primegen_next (pg)) { for ( ; i < p; i++) prime_pi[PRIME_PI_MAP(i)] = pi; pi++; } for ( ; i < p; i++) prime_pi[PRIME_PI_MAP(i)] = pi; if (argc < 2) { printf ("Usage: rho [ ]\n"); return 1; } if (strcmp (argv[1], "-Buchstab_Phi") == 0) { unsigned long x, y, r; if (argc < 4) { printf ("-Buchstab_Phi needs x and y paramters\n"); exit (EXIT_FAILURE); } x = strtoul (argv[2], NULL, 10); y = strtoul (argv[3], NULL, 10); r = Buchstab_Phi (x, y); printf ("Buchstab_Phi (%lu, %lu) = %lu\n", x, y, r); exit (EXIT_SUCCESS); } else if (strcmp (argv[1], "-Buchstab_Psi") == 0) { unsigned long x, y, r; if (argc < 4) { printf ("-Buchstab_Psi needs x and y paramters\n"); exit (EXIT_FAILURE); } x = strtoul (argv[2], NULL, 10); y = strtoul (argv[3], NULL, 10); r = Buchstab_Psi (x, y); printf ("Buchstab_Psi (%lu, %lu) = %lu\n", x, y, r); exit (EXIT_SUCCESS); } else if (strcmp (argv[1], "-nsp") == 0) { double x1, x2, y, r; if (argc < 5) { printf ("-nsp needs x1, x2, and y paramters\n"); exit (EXIT_FAILURE); } x1 = atof (argv[2]); x2 = atof (argv[3]); y = atof (argv[4]); r = no_small_prime (x1, x2, y); printf ("no_small_prime(%f, %f, %f) = %f\n", x1, x2, y, r); exit (EXIT_SUCCESS); } else if (strcmp (argv[1], "-nspf") == 0) { double x1, x2, y, z1, z2, r; if (argc < 7) { printf ("-nspf needs x1, x2, y, z1, and z2 paramters\n"); exit (EXIT_FAILURE); } x1 = atof (argv[2]); x2 = atof (argv[3]); y = atof (argv[4]); z1 = atof (argv[5]); z2 = atof (argv[6]); r = no_small_prime_factor (x1, x2, y, z1, z2); printf ("no_small_prime(%f, %f, %f, %f, %f) = %f\n", x1, x2, y, z1, z2, r); exit (EXIT_SUCCESS); } if (argc < 6) { printf ("Need 5 or 7 arguments: B1 B2 N nr S [r m]\n"); exit (EXIT_FAILURE); } B1 = atof (argv[1]); B2 = atof (argv[2]); N = atof (argv[3]); nr = atof (argv[4]); S = atoi (argv[5]); r = 0; m = 1; if (argc > 7) { r = atoi (argv[6]); m = atoi (argv[7]); } rhoinit (256, 10); if (N < 50.) { double sum; sum = ecmprob(B1, B2, exp2 (N), nr, S); sum += 4. * ecmprob(B1, B2, 3./2. * exp2 (N), nr, S); sum += ecmprob(B1, B2, 2. * exp2 (N), nr, S); sum *= 1./6.; printf ("ECM: %.16f\n", sum); sum = pm1prob_rm (B1, B2, exp2 (N), nr, S, r, m); sum += 4. * pm1prob_rm (B1, B2, 3./2. * exp2 (N), nr, S, r, m); sum += pm1prob_rm (B1, B2, 2. * exp2 (N), nr, S, r, m); sum *= 1./6.; printf ("P-1: %.16f\n", sum); } else { printf ("ECM: %.16f\n", ecmprob(B1, B2, N, nr, S)); printf ("P-1: %.16f\n", pm1prob_rm (B1, B2, N, nr, S, r, m)); } rhoinit (0, 0); return 0; } #endif ecm-6.4.4/ecm-params.h.default0000644023561000001540000000071212106741273013037 00000000000000#define MPZMOD_THRESHOLD 170 #define REDC_THRESHOLD 294 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 0, 0, 0, 0, 0, 1, 7, 8, 1, 1, 8, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 16, 16, 1, 1, 16, 1} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 11 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 11 #define MUL_NTT_THRESHOLD 1024 #define PREREVERTDIVISION_NTT_THRESHOLD 64 #define POLYINVERT_NTT_THRESHOLD 512 #define POLYEVALT_NTT_THRESHOLD 512 #define MPZSPV_NORMALISE_STRIDE 512 ecm-6.4.4/missing0000755023561000001540000002415212106744312010616 00000000000000#! /bin/sh # Common stub for a few missing GNU programs while installing. scriptversion=2012-01-06.13; # UTC # Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006, # 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. # Originally by Fran,cois Pinard , 1996. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. if test $# -eq 0; then echo 1>&2 "Try \`$0 --help' for more information" exit 1 fi run=: sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p' sed_minuso='s/.* -o \([^ ]*\).*/\1/p' # In the cases where this matters, `missing' is being run in the # srcdir already. if test -f configure.ac; then configure_ac=configure.ac else configure_ac=configure.in fi msg="missing on your system" case $1 in --run) # Try to run requested program, and just exit if it succeeds. run= shift "$@" && exit 0 # Exit code 63 means version mismatch. This often happens # when the user try to use an ancient version of a tool on # a file that requires a minimum version. In this case we # we should proceed has if the program had been absent, or # if --run hadn't been passed. if test $? = 63; then run=: msg="probably too old" fi ;; -h|--h|--he|--hel|--help) echo "\ $0 [OPTION]... PROGRAM [ARGUMENT]... Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an error status if there is no known handling for PROGRAM. Options: -h, --help display this help and exit -v, --version output version information and exit --run try to run the given command, and emulate it if it fails Supported PROGRAM values: aclocal touch file \`aclocal.m4' autoconf touch file \`configure' autoheader touch file \`config.h.in' autom4te touch the output file, or create a stub one automake touch all \`Makefile.in' files bison create \`y.tab.[ch]', if possible, from existing .[ch] flex create \`lex.yy.c', if possible, from existing .c help2man touch the output file lex create \`lex.yy.c', if possible, from existing .c makeinfo touch the output file yacc create \`y.tab.[ch]', if possible, from existing .[ch] Version suffixes to PROGRAM as well as the prefixes \`gnu-', \`gnu', and \`g' are ignored when checking the name. Send bug reports to ." exit $? ;; -v|--v|--ve|--ver|--vers|--versi|--versio|--version) echo "missing $scriptversion (GNU Automake)" exit $? ;; -*) echo 1>&2 "$0: Unknown \`$1' option" echo 1>&2 "Try \`$0 --help' for more information" exit 1 ;; esac # normalize program name to check for. program=`echo "$1" | sed ' s/^gnu-//; t s/^gnu//; t s/^g//; t'` # Now exit if we have it, but it failed. Also exit now if we # don't have it and --version was passed (most likely to detect # the program). This is about non-GNU programs, so use $1 not # $program. case $1 in lex*|yacc*) # Not GNU programs, they don't have --version. ;; *) if test -z "$run" && ($1 --version) > /dev/null 2>&1; then # We have it, but it failed. exit 1 elif test "x$2" = "x--version" || test "x$2" = "x--help"; then # Could not run --version or --help. This is probably someone # running `$TOOL --version' or `$TOOL --help' to check whether # $TOOL exists and not knowing $TOOL uses missing. exit 1 fi ;; esac # If it does not exist, or fails to run (possibly an outdated version), # try to emulate it. case $program in aclocal*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified \`acinclude.m4' or \`${configure_ac}'. You might want to install the \`Automake' and \`Perl' packages. Grab them from any GNU archive site." touch aclocal.m4 ;; autoconf*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified \`${configure_ac}'. You might want to install the \`Autoconf' and \`GNU m4' packages. Grab them from any GNU archive site." touch configure ;; autoheader*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified \`acconfig.h' or \`${configure_ac}'. You might want to install the \`Autoconf' and \`GNU m4' packages. Grab them from any GNU archive site." files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}` test -z "$files" && files="config.h" touch_files= for f in $files; do case $f in *:*) touch_files="$touch_files "`echo "$f" | sed -e 's/^[^:]*://' -e 's/:.*//'`;; *) touch_files="$touch_files $f.in";; esac done touch $touch_files ;; automake*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'. You might want to install the \`Automake' and \`Perl' packages. Grab them from any GNU archive site." find . -type f -name Makefile.am -print | sed 's/\.am$/.in/' | while read f; do touch "$f"; done ;; autom4te*) echo 1>&2 "\ WARNING: \`$1' is needed, but is $msg. You might have modified some files without having the proper tools for further handling them. You can get \`$1' as part of \`Autoconf' from any GNU archive site." file=`echo "$*" | sed -n "$sed_output"` test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` if test -f "$file"; then touch $file else test -z "$file" || exec >$file echo "#! /bin/sh" echo "# Created by GNU Automake missing as a replacement of" echo "# $ $@" echo "exit 0" chmod +x $file exit 1 fi ;; bison*|yacc*) echo 1>&2 "\ WARNING: \`$1' $msg. You should only need it if you modified a \`.y' file. You may need the \`Bison' package in order for those modifications to take effect. You can get \`Bison' from any GNU archive site." rm -f y.tab.c y.tab.h if test $# -ne 1; then eval LASTARG=\${$#} case $LASTARG in *.y) SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` if test -f "$SRCFILE"; then cp "$SRCFILE" y.tab.c fi SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` if test -f "$SRCFILE"; then cp "$SRCFILE" y.tab.h fi ;; esac fi if test ! -f y.tab.h; then echo >y.tab.h fi if test ! -f y.tab.c; then echo 'main() { return 0; }' >y.tab.c fi ;; lex*|flex*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified a \`.l' file. You may need the \`Flex' package in order for those modifications to take effect. You can get \`Flex' from any GNU archive site." rm -f lex.yy.c if test $# -ne 1; then eval LASTARG=\${$#} case $LASTARG in *.l) SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` if test -f "$SRCFILE"; then cp "$SRCFILE" lex.yy.c fi ;; esac fi if test ! -f lex.yy.c; then echo 'main() { return 0; }' >lex.yy.c fi ;; help2man*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified a dependency of a manual page. You may need the \`Help2man' package in order for those modifications to take effect. You can get \`Help2man' from any GNU archive site." file=`echo "$*" | sed -n "$sed_output"` test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` if test -f "$file"; then touch $file else test -z "$file" || exec >$file echo ".ab help2man is required to generate this page" exit $? fi ;; makeinfo*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified a \`.texi' or \`.texinfo' file, or any other file indirectly affecting the aspect of the manual. The spurious call might also be the consequence of using a buggy \`make' (AIX, DU, IRIX). You might want to install the \`Texinfo' package or the \`GNU make' package. Grab either from any GNU archive site." # The file to touch is that specified with -o ... file=`echo "$*" | sed -n "$sed_output"` test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` if test -z "$file"; then # ... or it is the one specified with @setfilename ... infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` file=`sed -n ' /^@setfilename/{ s/.* \([^ ]*\) *$/\1/ p q }' $infile` # ... or it is derived from the source name (dir/f.texi becomes f.info) test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info fi # If the file does not exist, the user really needs makeinfo; # let's fail without touching anything. test -f $file || exit 1 touch $file ;; *) echo 1>&2 "\ WARNING: \`$1' is needed, and is $msg. You might have modified some files without having the proper tools for further handling them. Check the \`README' file, it often tells you about the needed prerequisites for installing this package. You may also peek at any GNU archive site, in case some other package would contain this missing \`$1' program." exit 1 ;; esac exit 0 # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: ecm-6.4.4/depcomp0000755023561000001540000004755612106744313010612 00000000000000#! /bin/sh # depcomp - compile a program generating dependencies as side-effects scriptversion=2011-12-04.11; # UTC # Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006, 2007, 2009, 2010, # 2011 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # Originally written by Alexandre Oliva . case $1 in '') echo "$0: No command. Try \`$0 --help' for more information." 1>&2 exit 1; ;; -h | --h*) cat <<\EOF Usage: depcomp [--help] [--version] PROGRAM [ARGS] Run PROGRAMS ARGS to compile a file, generating dependencies as side-effects. Environment variables: depmode Dependency tracking mode. source Source file read by `PROGRAMS ARGS'. object Object file output by `PROGRAMS ARGS'. DEPDIR directory where to store dependencies. depfile Dependency file to output. tmpdepfile Temporary file to use when outputting dependencies. libtool Whether libtool is used (yes/no). Report bugs to . EOF exit $? ;; -v | --v*) echo "depcomp $scriptversion" exit $? ;; esac if test -z "$depmode" || test -z "$source" || test -z "$object"; then echo "depcomp: Variables source, object and depmode must be set" 1>&2 exit 1 fi # Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. depfile=${depfile-`echo "$object" | sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} rm -f "$tmpdepfile" # Some modes work just like other modes, but use different flags. We # parameterize here, but still list the modes in the big case below, # to make depend.m4 easier to write. Note that we *cannot* use a case # here, because this file can only contain one case statement. if test "$depmode" = hp; then # HP compiler uses -M and no extra arg. gccflag=-M depmode=gcc fi if test "$depmode" = dashXmstdout; then # This is just like dashmstdout with a different argument. dashmflag=-xM depmode=dashmstdout fi cygpath_u="cygpath -u -f -" if test "$depmode" = msvcmsys; then # This is just like msvisualcpp but w/o cygpath translation. # Just convert the backslash-escaped backslashes to single forward # slashes to satisfy depend.m4 cygpath_u='sed s,\\\\,/,g' depmode=msvisualcpp fi if test "$depmode" = msvc7msys; then # This is just like msvc7 but w/o cygpath translation. # Just convert the backslash-escaped backslashes to single forward # slashes to satisfy depend.m4 cygpath_u='sed s,\\\\,/,g' depmode=msvc7 fi case "$depmode" in gcc3) ## gcc 3 implements dependency tracking that does exactly what ## we want. Yay! Note: for some reason libtool 1.4 doesn't like ## it if -MD -MP comes after the -MF stuff. Hmm. ## Unfortunately, FreeBSD c89 acceptance of flags depends upon ## the command line argument order; so add the flags where they ## appear in depend2.am. Note that the slowdown incurred here ## affects only configure: in makefiles, %FASTDEP% shortcuts this. for arg do case $arg in -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; *) set fnord "$@" "$arg" ;; esac shift # fnord shift # $arg done "$@" stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile" exit $stat fi mv "$tmpdepfile" "$depfile" ;; gcc) ## There are various ways to get dependency output from gcc. Here's ## why we pick this rather obscure method: ## - Don't want to use -MD because we'd like the dependencies to end ## up in a subdir. Having to rename by hand is ugly. ## (We might end up doing this anyway to support other compilers.) ## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like ## -MM, not -M (despite what the docs say). ## - Using -M directly means running the compiler twice (even worse ## than renaming). if test -z "$gccflag"; then gccflag=-MD, fi "$@" -Wp,"$gccflag$tmpdepfile" stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" echo "$object : \\" > "$depfile" alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ## The second -e expression handles DOS-style file names with drive letters. sed -e 's/^[^:]*: / /' \ -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" ## This next piece of magic avoids the `deleted header file' problem. ## The problem is that when a header file which appears in a .P file ## is deleted, the dependency causes make to die (because there is ## typically no way to rebuild the header). We avoid this by adding ## dummy dependencies for each header file. Too bad gcc doesn't do ## this for us directly. tr ' ' ' ' < "$tmpdepfile" | ## Some versions of gcc put a space before the `:'. On the theory ## that the space means something, we add a space to the output as ## well. hp depmode also adds that space, but also prefixes the VPATH ## to the object. Take care to not repeat it in the output. ## Some versions of the HPUX 10.20 sed can't process this invocation ## correctly. Breaking it into two sed invocations is a workaround. sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \ | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; hp) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; sgi) if test "$libtool" = yes; then "$@" "-Wp,-MDupdate,$tmpdepfile" else "$@" -MDupdate "$tmpdepfile" fi stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files echo "$object : \\" > "$depfile" # Clip off the initial element (the dependent). Don't try to be # clever and replace this with sed code, as IRIX sed won't handle # lines with more than a fixed number of characters (4096 in # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; # the IRIX cc adds comments like `#:fec' to the end of the # dependency line. tr ' ' ' ' < "$tmpdepfile" \ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \ tr ' ' ' ' >> "$depfile" echo >> "$depfile" # The second pass generates a dummy entry for each header file. tr ' ' ' ' < "$tmpdepfile" \ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ >> "$depfile" else # The sourcefile does not contain any dependencies, so just # store a dummy comment line, to avoid errors with the Makefile # "include basename.Plo" scheme. echo "#dummy" > "$depfile" fi rm -f "$tmpdepfile" ;; aix) # The C for AIX Compiler uses -M and outputs the dependencies # in a .u file. In older versions, this file always lives in the # current directory. Also, the AIX compiler puts `$object:' at the # start of each line; $object doesn't have directory information. # Version 6 uses the directory in both cases. dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` test "x$dir" = "x$object" && dir= base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` if test "$libtool" = yes; then tmpdepfile1=$dir$base.u tmpdepfile2=$base.u tmpdepfile3=$dir.libs/$base.u "$@" -Wc,-M else tmpdepfile1=$dir$base.u tmpdepfile2=$dir$base.u tmpdepfile3=$dir$base.u "$@" -M fi stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" do test -f "$tmpdepfile" && break done if test -f "$tmpdepfile"; then # Each line is of the form `foo.o: dependent.h'. # Do two passes, one to just change these to # `$object: dependent.h' and one to simply `dependent.h:'. sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" # That's a tab and a space in the []. sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile" else # The sourcefile does not contain any dependencies, so just # store a dummy comment line, to avoid errors with the Makefile # "include basename.Plo" scheme. echo "#dummy" > "$depfile" fi rm -f "$tmpdepfile" ;; icc) # Intel's C compiler understands `-MD -MF file'. However on # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c # ICC 7.0 will fill foo.d with something like # foo.o: sub/foo.c # foo.o: sub/foo.h # which is wrong. We want: # sub/foo.o: sub/foo.c # sub/foo.o: sub/foo.h # sub/foo.c: # sub/foo.h: # ICC 7.1 will output # foo.o: sub/foo.c sub/foo.h # and will wrap long lines using \ : # foo.o: sub/foo.c ... \ # sub/foo.h ... \ # ... "$@" -MD -MF "$tmpdepfile" stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" # Each line is of the form `foo.o: dependent.h', # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. # Do two passes, one to just change these to # `$object: dependent.h' and one to simply `dependent.h:'. sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" # Some versions of the HPUX 10.20 sed can't process this invocation # correctly. Breaking it into two sed invocations is a workaround. sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; hp2) # The "hp" stanza above does not work with aCC (C++) and HP's ia64 # compilers, which have integrated preprocessors. The correct option # to use with these is +Maked; it writes dependencies to a file named # 'foo.d', which lands next to the object file, wherever that # happens to be. # Much of this is similar to the tru64 case; see comments there. dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` test "x$dir" = "x$object" && dir= base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` if test "$libtool" = yes; then tmpdepfile1=$dir$base.d tmpdepfile2=$dir.libs/$base.d "$@" -Wc,+Maked else tmpdepfile1=$dir$base.d tmpdepfile2=$dir$base.d "$@" +Maked fi stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile1" "$tmpdepfile2" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" do test -f "$tmpdepfile" && break done if test -f "$tmpdepfile"; then sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile" # Add `dependent.h:' lines. sed -ne '2,${ s/^ *// s/ \\*$// s/$/:/ p }' "$tmpdepfile" >> "$depfile" else echo "#dummy" > "$depfile" fi rm -f "$tmpdepfile" "$tmpdepfile2" ;; tru64) # The Tru64 compiler uses -MD to generate dependencies as a side # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'. # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put # dependencies in `foo.d' instead, so we check for that too. # Subdirectories are respected. dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` test "x$dir" = "x$object" && dir= base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` if test "$libtool" = yes; then # With Tru64 cc, shared objects can also be used to make a # static library. This mechanism is used in libtool 1.4 series to # handle both shared and static libraries in a single compilation. # With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d. # # With libtool 1.5 this exception was removed, and libtool now # generates 2 separate objects for the 2 libraries. These two # compilations output dependencies in $dir.libs/$base.o.d and # in $dir$base.o.d. We have to check for both files, because # one of the two compilations can be disabled. We should prefer # $dir$base.o.d over $dir.libs/$base.o.d because the latter is # automatically cleaned when .libs/ is deleted, while ignoring # the former would cause a distcleancheck panic. tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4 tmpdepfile2=$dir$base.o.d # libtool 1.5 tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5 tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504 "$@" -Wc,-MD else tmpdepfile1=$dir$base.o.d tmpdepfile2=$dir$base.d tmpdepfile3=$dir$base.d tmpdepfile4=$dir$base.d "$@" -MD fi stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4" do test -f "$tmpdepfile" && break done if test -f "$tmpdepfile"; then sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" # That's a tab and a space in the []. sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile" else echo "#dummy" > "$depfile" fi rm -f "$tmpdepfile" ;; msvc7) if test "$libtool" = yes; then showIncludes=-Wc,-showIncludes else showIncludes=-showIncludes fi "$@" $showIncludes > "$tmpdepfile" stat=$? grep -v '^Note: including file: ' "$tmpdepfile" if test "$stat" = 0; then : else rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" echo "$object : \\" > "$depfile" # The first sed program below extracts the file names and escapes # backslashes for cygpath. The second sed program outputs the file # name when reading, but also accumulates all include files in the # hold buffer in order to output them again at the end. This only # works with sed implementations that can handle large buffers. sed < "$tmpdepfile" -n ' /^Note: including file: *\(.*\)/ { s//\1/ s/\\/\\\\/g p }' | $cygpath_u | sort -u | sed -n ' s/ /\\ /g s/\(.*\)/ \1 \\/p s/.\(.*\) \\/\1:/ H $ { s/.*/ / G p }' >> "$depfile" rm -f "$tmpdepfile" ;; msvc7msys) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; #nosideeffect) # This comment above is used by automake to tell side-effect # dependency tracking mechanisms from slower ones. dashmstdout) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout, regardless of -o. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi # Remove `-o $object'. IFS=" " for arg do case $arg in -o) shift ;; $object) shift ;; *) set fnord "$@" "$arg" shift # fnord shift # $arg ;; esac done test -z "$dashmflag" && dashmflag=-M # Require at least two characters before searching for `:' # in the target name. This is to cope with DOS-style filenames: # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise. "$@" $dashmflag | sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile" rm -f "$depfile" cat < "$tmpdepfile" > "$depfile" tr ' ' ' ' < "$tmpdepfile" | \ ## Some versions of the HPUX 10.20 sed can't process this invocation ## correctly. Breaking it into two sed invocations is a workaround. sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; dashXmstdout) # This case only exists to satisfy depend.m4. It is never actually # run, as this mode is specially recognized in the preamble. exit 1 ;; makedepend) "$@" || exit $? # Remove any Libtool call if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi # X makedepend shift cleared=no eat=no for arg do case $cleared in no) set ""; shift cleared=yes ;; esac if test $eat = yes; then eat=no continue fi case "$arg" in -D*|-I*) set fnord "$@" "$arg"; shift ;; # Strip any option that makedepend may not understand. Remove # the object too, otherwise makedepend will parse it as a source file. -arch) eat=yes ;; -*|$object) ;; *) set fnord "$@" "$arg"; shift ;; esac done obj_suffix=`echo "$object" | sed 's/^.*\././'` touch "$tmpdepfile" ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" rm -f "$depfile" # makedepend may prepend the VPATH from the source file name to the object. # No need to regex-escape $object, excess matching of '.' is harmless. sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile" sed '1,2d' "$tmpdepfile" | tr ' ' ' ' | \ ## Some versions of the HPUX 10.20 sed can't process this invocation ## correctly. Breaking it into two sed invocations is a workaround. sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" "$tmpdepfile".bak ;; cpp) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi # Remove `-o $object'. IFS=" " for arg do case $arg in -o) shift ;; $object) shift ;; *) set fnord "$@" "$arg" shift # fnord shift # $arg ;; esac done "$@" -E | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' | sed '$ s: \\$::' > "$tmpdepfile" rm -f "$depfile" echo "$object : \\" > "$depfile" cat < "$tmpdepfile" >> "$depfile" sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; msvisualcpp) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test "X$1" != 'X--mode=compile'; do shift done shift fi IFS=" " for arg do case "$arg" in -o) shift ;; $object) shift ;; "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") set fnord "$@" shift shift ;; *) set fnord "$@" "$arg" shift shift ;; esac done "$@" -E 2>/dev/null | sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile" rm -f "$depfile" echo "$object : \\" > "$depfile" sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile" echo " " >> "$depfile" sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile" rm -f "$tmpdepfile" ;; msvcmsys) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; none) exec "$@" ;; *) echo "Unknown depmode $depmode" 1>&2 exit 1 ;; esac exit 0 # Local Variables: # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: ecm-6.4.4/mul_fft.c0000644023561000001540000022621712113414351011020 00000000000000/* An implementation in GMP of Scho"nhage's fast multiplication algorithm modulo 2^N+1, by Paul Zimmermann, INRIA Lorraine, February 1998. Revised July 2002 and January 2003, Paul Zimmermann. Further revised by Pierrick Gaudry, Paul Zimmermann, and Torbjorn Granlund, March/April and November/December 2006, and also by Alexander Kruppa in December 2006. Revised December 2007 for inclusion into GMP-ECM. THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND THE FUNCTIONS HAVE MUTABLE INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU MP Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /* References: Schnelle Multiplikation grosser Zahlen, by Arnold Scho"nhage and Volker Strassen, Computing 7, p. 281-292, 1971. Asymptotically fast algorithms for the numerical multiplication and division of polynomials with complex coefficients, by Arnold Scho"nhage, Computer Algebra, EUROCAM'82, LNCS 144, p. 3-15, 1982. Tapes versus Pointers, a study in implementing fast algorithms, by Arnold Scho"nhage, Bulletin of the EATCS, 30, p. 23-32, 1986. See also http://www.loria.fr/~zimmerma/bignum Future: It might be possible to avoid a small number of MPN_COPYs by using a rotating temporary or two. Multiplications of unequal sized operands can be done with this code, but it needs a tighter test for identifying squaring (same sizes as well as same pointers). */ /* Throughout this file, Mp is chosen so that ord_{2^Nprime + 1}(sqrt(2)^Mp) == 2^k */ #include "config.h" #include #include /* for abort() */ #include /* for LONG_MAX */ #include #ifdef HAVE_ALLOCA_H #include #endif #ifdef HAVE_MALLOC_H #include #endif #include "gmp.h" #include "mul_fft-params.h" /* All functions that are not declared static are renamed to avoid conflicts with GMP's functions. Should we include ecm-impl.h instead? */ #define mpn_mul_fft __ecm_mpn_mul_fft #define mpn_mul_fft_full __ecm_mpn_mul_fft_full #define mpn_fft_best_k __ecm_mpn_fft_best_k #define mpn_fft_next_size __ecm_mpn_fft_next_size #ifndef MUL_FFT_TABLE2 #define MUL_FFT_TABLE2 {{1, 4}, {897, 5}, {2305, 6}, {4865, 7}, {11777, 8}, {31745, 9}, {98305, 10}, {1040385, 11}, {LONG_MAX, 0}} #endif #ifndef MUL_FFTM_TABLE2 #define MUL_FFTM_TABLE2 {{1, 4}, {833, 5}, {2049, 6}, {4609, 7}, {9217, 8}, {23553, 9}, {63489, 10}, {196609, 11}, {778241, 12}, {1032193, 13}, {LONG_MAX, 0}} #endif #ifndef SQR_FFT_TABLE2 #define SQR_FFT_TABLE2 MUL_FFT_TABLE2 #endif #ifndef SQR_FFTM_TABLE2 #define SQR_FFTM_TABLE2 MUL_FFTM_TABLE2 #endif #ifndef MUL_FFT_MODF_THRESHOLD #define MUL_FFT_MODF_THRESHOLD 300 #endif #ifndef SQR_FFT_MODF_THRESHOLD #define SQR_FFT_MODF_THRESHOLD 568 #endif #ifndef ASSERT #ifdef WANT_ASSERT #define ASSERT(x) assert(x) #else #define ASSERT(x) #endif #endif #ifndef ASSERT_ALWAYS #define ASSERT_ALWAYS(x) assert(x) #endif #ifndef LIKELY #if defined(__GNUC__) #define LIKELY(x) __builtin_expect ((x) != 0, 1) #else #define LIKELY(x) x #endif #endif /* _PROTO macro is copied from longlong.h of GMP */ #ifndef _PROTO #if (__STDC__-0) || defined (__cplusplus) || defined( _MSC_VER ) #define _PROTO(x) x #else #define _PROTO(x) () #endif #endif #ifndef MP_LIMB_T_MAX #define MP_LIMB_T_MAX (~(mp_limb_t)0) #endif #ifndef GMP_LIMB_HIGHBIT #define GMP_LIMB_HIGHBIT (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1)) #endif #ifndef TMP_DECL #define TMP_DECL #endif #ifndef TMP_MARK #define TMP_MARK #endif #ifndef TMP_FREE #define TMP_FREE #endif #ifndef TMP_ALLOC_LIMBS #define TMP_ALLOC_LIMBS(n) alloca((n) * sizeof(mp_limb_t)) #endif #ifndef TMP_ALLOC_MP_PTRS #define TMP_ALLOC_MP_PTRS(n) alloca((n) * sizeof(mp_ptr)) #endif #ifndef TMP_ALLOC_TYPE #define TMP_ALLOC_TYPE(n,t) alloca((n) * sizeof(t)) #endif #ifndef __GMP_ALLOCATE_FUNC_LIMBS #define __GMP_ALLOCATE_FUNC_LIMBS(n) malloc((n) * sizeof(mp_limb_t)) #endif #ifndef __GMP_FREE_FUNC_LIMBS #define __GMP_FREE_FUNC_LIMBS(a,n) free(a) #endif #if !defined(__GNUC__) #define __builtin_constant_p(x) 0 #endif #ifndef MPN_ZERO /* from gmp-impl.h */ #define MPN_ZERO(dst, n) \ do { \ ASSERT ((n) >= 0); \ if ((n) != 0) \ { \ mp_ptr __dst = (dst); \ mp_size_t __n = (n); \ do \ *__dst++ = 0; \ while (--__n); \ } \ } while (0) #endif #ifndef MPN_DECR_U /* copied from gmp-4.2.1/gmp-impl.h */ #define MPN_DECR_U(p,size,incr) \ do { \ mp_limb_t __x; \ mp_ptr __p = (p); \ if (__builtin_constant_p (incr) && (incr) == 1) \ { \ while ((*(__p++))-- == 0) \ ; \ } \ else \ { \ __x = *__p; \ *__p = __x - (incr); \ if (__x < (incr)) \ while ((*(++__p))-- == 0) \ ; \ } \ } while (0) #endif #ifndef mpn_incr_u /* copied from gmp-4.2.1/gmp-impl.h */ #define mpn_incr_u(p,incr) \ do { \ mp_limb_t __x; \ mp_ptr __p = (p); \ if (__builtin_constant_p (incr) && (incr) == 1) \ { \ while (++(*(__p++)) == 0) \ ; \ } \ else \ { \ __x = *__p + (incr); \ *__p = __x; \ if (__x < (incr)) \ while (++(*(++__p)) == 0) \ ; \ } \ } while (0) #endif #ifndef MPN_INCR_U /* copied from gmp-4.2.1/gmp-impl.h */ #define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n) #endif #ifndef MPN_COPY /* copied from gmp-4.2.1/gmp-impl.h */ #define MPN_COPY(dst, src, n) \ do { \ if ((n) != 0) \ { \ mp_size_t __n = (n) - 1; \ mp_ptr __dst = (dst); \ mp_srcptr __src = (src); \ mp_limb_t __x; \ ASSERT ((n) > 0); \ __x = *__src++; \ if (__n != 0) \ { \ do \ { \ *__dst++ = __x; \ __x = *__src++; \ } \ while (--__n); \ } \ *__dst++ = __x; \ } \ } while (0) #endif #ifndef mpn_com_n /* copied from gmp-4.2.1/gmp-impl.h */ #define mpn_com_n(d,s,n) \ do { \ mp_ptr __d = (d); \ mp_srcptr __s = (s); \ mp_size_t __n = (n); \ ASSERT (__n >= 1); \ do \ *__d++ = (~ *__s++) & GMP_NUMB_MASK; \ while (--__n); \ } while (0) #endif #ifndef mpn_sqr_n #define mpn_sqr_n(a,b,n) mpn_mul_n(a,b,b,n) #endif /* Uncomment this define to disable to use of sqrt(2) as a root of unity for the transform/weight signal. The function mpn_fft_mul_sqrt2exp_modF() will still get called, but parameters for the transform will be chosen so that it will always be called with an even exponent, thus the multiplication will be by a power of 2. */ /* #define NO_SQRT_2 */ /* Change this to "#define TRACE(x) x" for some traces. */ #define TRACE(x) /* #define COUNT_ZEROCOPY */ /* This define enables interleaved decomposition/forward transform in Bailey's algorithm for better data locality */ #define MERGED_BAILEY_DECOMPOSE /* The MPN_ZERO and MPN_COPY macros are pretty slow in GMP 4.2 (and presumably previous versions) so we try to define quicker functions here. For now we simply use the string store/copy instruction which is ok, although not optimal (MMX or XMM would probably do better). */ #define OWN_MPN_FFT_ZERO /* REP MOVSL/MOVSQ seems to be no faster or slower than MPN_COPY() */ /* #define OWN_MPN_FFT_COPY */ #if defined(__x86_64__) && defined(__GNUC__) && defined(OWN_MPN_FFT_ZERO) static inline void MPN_FFT_ZERO (mp_ptr dst, mp_size_t n) { __asm__ __volatile__ ("rep stosq": "+c" (n), "+D" (dst): "a" (0L) : "memory"); /* Put n in %rcx, which will also be written (decreased to 0) by the instruction and put dst in %rdi which will also be written (increased by 8*n). Put 0 in %rax. */ } #elif defined(__i386__) && defined(__GNUC__) && defined(OWN_MPN_FFT_ZERO) static inline void MPN_FFT_ZERO (mp_ptr dst, mp_size_t n) { __asm__ __volatile__ ("rep stosl" : "+c" (n), "+D" (dst) : "a" (0) : "memory"); } #elif defined(_MSC_VER) && !defined(_WIN64) static inline void MPN_FFT_ZERO (mp_ptr dst, mp_size_t n) { ASSERT(n >= 0); __asm { push edi mov edi,dst xor eax,eax mov ecx,n rep stosd pop edi } } #else /* Fall back to GMP's MPN_ZERO() macro */ #define MPN_FFT_ZERO(dst, n) MPN_ZERO(dst,n) #endif #if defined(__x86_64__) && defined(__GNUC__) && defined(OWN_MPN_FFT_ZERO) static inline void MPN_FFT_STORE (mp_ptr dst, mp_size_t n, mp_limb_t d) { __asm__ __volatile__ ("rep stosq": "+c" (n), "+D" (dst): "a" (d) : "memory"); /* Put n in %rcx, which will also be written (decreased to 0) by the instruction and put dst in %rdi which will also be written (increased by 8*n). Put 0 in %rax. */ } #elif defined(__i386__) && defined(__GNUC__) && defined(OWN_MPN_FFT_ZERO) static inline void MPN_FFT_STORE (mp_ptr dst, mp_size_t n, mp_limb_t d) { __asm__ __volatile__ ("rep stosl" : "+c" (n), "+D" (dst) : "a" (d) : "memory"); } #elif defined(_MSC_VER) && !defined(_WIN64) static inline void MPN_FFT_STORE (mp_ptr dst, mp_size_t n, mp_limb_t d) { ASSERT(n >= 0); __asm { push edi mov edi,dst mov eax,d mov ecx,n rep stosd pop edi } } #else static inline void MPN_FFT_STORE (mp_ptr dst, mp_size_t n, mp_limb_t d) { ASSERT(n >= 0); for (; n > 0; n--) *dst++ = d; } #endif #if defined(__x86_64__) && defined(__GNUC__) && defined(OWN_MPN_FFT_COPY) static inline void MPN_FFT_COPY (mp_ptr dst, const mp_srcptr src, mp_size_t n) { __asm__ __volatile__ ("rep movsq": "+c" (n), "+S" (src), "+D" (dst) : "memory"); /* Put n in %rcx, which will also be written (decreased to 0) by the instruction, put src in %rsi and put dst in %rdi which will both also be written (each increased by 8*n). FIXME: should "memory" go in the clobbered list? */ } #elif defined(__i386__) && defined(__GNUC__) && defined(OWN_MPN_FFT_COPY) static inline void MPN_FFT_COPY (mp_ptr dst, const mp_srcptr src, mp_size_t n) { __asm__ __volatile__ ("rep movsl" : "+c" (n), "+S" (src), "+D" (dst) : "memory"); } #elif defined(_MSC_VER) && !defined(_WIN64) static inline void MPN_FFT_COPY (mp_ptr dst, const mp_srcptr src, mp_size_t n) { __asm { push esi push edi mov edi,dst mov esi,src mov ecx,n rep movsd pop edi pop esi } } #else /* Fall back to GMP's MPN_COPY() macro */ #define MPN_FFT_COPY(dst, src, n) MPN_COPY(dst,src,n) #endif /* If LOG2_GMP_NUMB_BITS is defined, GMP_NUMB_BITS=2^LOG2_GMP_NUMB_BITS; this enables to speed up multiplication or division by GMP_NUMB_BITS. */ #if (GMP_NUMB_BITS == 32) #define LOG2_GMP_NUMB_BITS 5 #elif (GMP_NUMB_BITS == 64) #define LOG2_GMP_NUMB_BITS 6 #endif static inline unsigned int mpn_mul_fft_lcm (unsigned int, unsigned int); /* quotient, remainder, product by GMP_NUMB_BITS */ #ifdef LOG2_GMP_NUMB_BITS #define MOD_GMP_NUMB_BITS(x) ((x) & ((1 << LOG2_GMP_NUMB_BITS) - 1)) #define DIV_GMP_NUMB_BITS(x) ((x) >> LOG2_GMP_NUMB_BITS) /* x <- y / (2 * GMP_NUMB_BITS), y <- y % (2 * GMP_NUMB_BITS) */ #define DIVMOD_2GMP_NUMB_BITS(x,y) \ x = (y) >> (LOG2_GMP_NUMB_BITS + 1); \ y = (y) & ((1 << (LOG2_GMP_NUMB_BITS + 1)) - 1) #define MUL_GMP_NUMB_BITS(x) ((x) << LOG2_GMP_NUMB_BITS) #define MUL_2GMP_NUMB_BITS(x) ((x) << (LOG2_GMP_NUMB_BITS + 1)) #define MUL_4GMP_NUMB_BITS(x) ((x) << (LOG2_GMP_NUMB_BITS + 2)) #define LCM_GMP_NUMB_BITS(x) (((x) > LOG2_GMP_NUMB_BITS) ? (1<<(x)) : GMP_NUMB_BITS) #else #define MOD_GMP_NUMB_BITS(x) ((x) % GMP_NUMB_BITS) #define DIV_GMP_NUMB_BITS(x) ((x) / GMP_NUMB_BITS) #define DIVMOD_2GMP_NUMB_BITS(x,y) \ x = (y) / (2 * GMP_NUMB_BITS); \ y = (y) - (x) * (2 * GMP_NUMB_BITS) #define MUL_GMP_NUMB_BITS(x) ((x) * GMP_NUMB_BITS) #define MUL_2GMP_NUMB_BITS(x) ((x) * (2 * GMP_NUMB_BITS)) #define MUL_4GMP_NUMB_BITS(x) ((x) * (4 * GMP_NUMB_BITS)) /* lcm(GMP_NUMB_BITS, 2^x) */ #define LCM_GMP_NUMB_BITS(x) mpn_mul_fft_lcm (GMP_NUMB_BITS, x) #endif #define ONE ((mp_limb_t) 1) static int mpn_mul_fft_internal _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int, mp_ptr *, mp_ptr *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int **, mp_ptr, int, int)); /* Find the best k to use for a mod 2^(m*GMP_NUMB_BITS)+1 FFT for m >= n. sqr==0 if for a multiply, sqr==1 for a square. Don't declare it static since it is needed by tuneup. */ #define MPN_FFT_TABLE2_SIZE 256 struct nk { mp_size_t n; unsigned char k; }; static struct nk mpn_fft_table2[4][MPN_FFT_TABLE2_SIZE] = { MUL_FFT_TABLE2, SQR_FFT_TABLE2, MUL_FFTM_TABLE2, SQR_FFTM_TABLE2 }; /* sqr_b = 0: plain multiplication mod 2^N+1 sqr_b = 1: square mod 2^N+1 sqr_b = 2: plain multiplication mod 2^N-1 sqr_b = 3: square mod 2^N-1 */ int mpn_fft_best_k (mp_size_t n, int sqr_b) { struct nk *tab; int last_k; last_k = 4; for (tab = mpn_fft_table2[sqr_b] + 1; ; tab++) { if (n < tab->n) break; last_k = tab->k; } return last_k; } #ifdef MUL_FFT_FULL_TABLE2 #define MPN_FFT_FULL_TABLE2_SIZE 256 #ifndef SQR_FFT_FULL_TABLE2 #define SQR_FFT_FULL_TABLE2 MUL_FFT_FULL_TABLE2 #endif static struct nk mpn_fft_full_table2[4][MPN_FFT_FULL_TABLE2_SIZE] = { MUL_FFT_FULL_TABLE2, SQR_FFT_FULL_TABLE2 }; static int mpn_fft_best_a (mp_size_t pl, int sqr) { struct nk *tab; int last_a; last_a = 1; for (tab = mpn_fft_full_table2[sqr] + 1; ; tab++) { if (pl < tab->n) break; last_a = tab->k; } return last_a; } #endif /* MUL_FFT_FULL_TABLE2 */ /* Returns smallest possible number of limbs >= pl for a fft of size 2^k, i.e. smallest multiple of 2^k >= pl. Don't declare static: needed by tuneup. */ mp_size_t mpn_fft_next_size (mp_size_t pl, int k) { pl = 1 + ((pl - 1) >> k); /* ceil (pl/2^k) */ return pl << k; } /* Initialize l[i][j] with bitrev(j) */ static void mpn_fft_initl (int **l, int k) { int i, j, K; int *li; l[0][0] = 0; for (i = 1, K = 1; i <= k; i++, K *= 2) { li = l[i]; for (j = 0; j < K; j++) { li[j] = 2 * l[i - 1][j]; li[K + j] = 1 + li[j]; } } } #ifndef HAVE_NATIVE_mpn_lshiftc /* Shift {up, n} cnt bits to the left, store the complemented result in {rp, n}, and output the shifted bits (not complemented). Same as: cc = mpn_lshift (rp, up, n, cnt); mpn_com_n (rp, rp, n); return cc; Assumes n >= 1 and 1 <= cnt < GMP_NUMB_BITS. {rp, n} and {up, n} may overlap, provided rp >= up (like mpn_lshift). */ static mp_limb_t mpn_lshiftc (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt) { mp_limb_t high_limb, low_limb; unsigned int tnc; mp_size_t i; mp_limb_t retval; ASSERT(n >= 1); ASSERT(1 <= cnt && cnt < GMP_NUMB_BITS); up += n; rp += n; tnc = GMP_NUMB_BITS - cnt; low_limb = *--up; retval = low_limb >> tnc; high_limb = (low_limb << cnt); for (i = n - 1; i != 0; i--) { low_limb = *--up; *--rp = (~(high_limb | (low_limb >> tnc))) & GMP_NUMB_MASK; high_limb = low_limb << cnt; } *--rp = (~high_limb) & GMP_NUMB_MASK; return retval; } #endif /* Given ap[0..n] with ap[n]<=1, reduce it modulo 2^(n*GMP_NUMB_BITS)+1, by subtracting that modulus if necessary. If ap[0..n] is exactly 2^(n*GMP_NUMB_BITS) then mpn_sub_1 produces a borrow and the limbs must be zeroed out again. This will occur very infrequently. */ static inline void mpn_fft_normalize (mp_ptr ap, mp_size_t n) { if (ap[n] != 0) { MPN_DECR_U (ap, n + 1, ONE); if (ap[n] == 0) { /* This happens with very low probability; we have yet to trigger it, and thereby make sure this code is correct. */ MPN_FFT_ZERO (ap, n); ap[n] = 1; } else ap[n] = 0; } } /* r <- a*2^d mod 2^(n*GMP_NUMB_BITS)+1 with a = {a, n+1} Assumes a is semi-normalized, i.e. a[n] <= 1. r and a must have n+1 limbs, and not overlap. */ static void mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, unsigned int d, mp_size_t n) { int sh, negate; mp_limb_t cc, rd; ASSERT(d < 2 * (unsigned int) n * GMP_NUMB_BITS); sh = MOD_GMP_NUMB_BITS(d); d = DIV_GMP_NUMB_BITS(d); negate = d >= (unsigned int) n; if (negate) { d -= n; /* r[0..d-1] <-- lshift(a[n-d]..a[n-1], sh) r[d..n-1] <-- -lshift(a[0]..a[n-d-1], sh) */ if (sh != 0) { /* no out shift below since a[n] <= 1 */ mpn_lshift (r, a + n - d, d + 1, sh); rd = r[d]; cc = mpn_lshiftc (r + d, a, n - d, sh); } else { #ifdef COUNT_ZEROCOPY printf ("mpn_fft_mul_2exp_modF: MPN_FFT_COPY 1 with %d limbs\n", d); #endif /* Executed 256 times for 1000000 limb mpn_mul_n, each d in [0, 255] appears exactly once */ MPN_COPY (r, a + n - d, d); rd = a[n]; mpn_com_n (r + d, a, n - d); cc = 0; } /* add cc to r[0], and add rd to r[d] */ /* now add 1 in r[d], subtract 1 in r[n], i.e. add 1 in r[0] */ r[n] = 0; /* cc < 2^sh <= 2^(GMP_NUMB_BITS-1) thus no overflow here */ mpn_incr_u (r, cc + 1); rd ++; /* rd might overflow when sh=GMP_NUMB_BITS-1 */ cc = (rd == 0) ? ONE : rd; r = r + d + (rd == 0); mpn_incr_u (r, cc); return; } /* if negate=0, r[0..d-1] <-- -lshift(a[n-d]..a[n-1], sh) r[d..n-1] <-- lshift(a[0]..a[n-d-1], sh) */ if (sh != 0) { /* no out bits below since a[n] <= 1 */ mpn_lshiftc (r, a + n - d, d + 1, sh); rd = ~r[d]; /* {r, d+1} = {a+n-d, d+1} << sh */ cc = mpn_lshift (r + d, a, n - d, sh); /* {r+d, n-d} = {a, n-d}<= (unsigned int) n) /* (a-b)*B^d = (b-a)*B^(d-n) */ { mp_srcptr t; t = a; a = b; b = t; d -= n; } if (d == 0) mpn_fft_sub_modF (r, a, b, n); else { mp_limb_t cc; /* let a = ah * B^(n-d) + al and b = bh * B^(n-d) + bl, where ah, bh have d limbs + 1 bit, and al, bl have n-d limbs. Then (a-b)*B^d = (al-bl) * B^d + (bh-ah). */ ASSERT (0 < d && d < (unsigned int) n); cc = mpn_sub_n (r, b + n - d, a + n - d, d); /* bh-ah */ #ifdef HAVE_NATIVE_mpn_sub_nc cc = mpn_sub_nc (r + d, a, b, n - d, cc); /* al-bl */ #else cc = mpn_sub_n (r + d, a, b, n - d) + mpn_sub_1 (r + d, r + d, n - d, cc); #endif /* 0 <= cc <= 1 */ if (a[n] > b[n]) cc += mpn_sub_1 (r + d, r + d, n - d, a[n] - b[n]); /* 0 <= cc <= 2 */ else cc -= mpn_add_1 (r + d, r + d, n - d, b[n] - a[n]); /* -1 <= cc <= 1 */ /* -1 <= cc <= 2 */ /* cc is the borrow at r[n], which must be added at r[0] */ r[n] = cc >> (GMP_NUMB_BITS - 1); MPN_INCR_U (r, n + 1, cc + r[n]); } } #ifdef _MSC_VER /* optimisation bug on VC++ v9 */ # pragma optimize( "", on ) #endif /* r <- a*sqrt(2)^d mod 2^(n*GMP_NUMB_BITS)+1 with a = {a, n+1} Assumes a is semi-normalized, i.e. a[n] <= 1. Assumes 0 < d < 4*n*GMP_NUMB_BITS. r and a must have n+1 limbs, and not overlap. Calls mpn_fft_mul_2exp_modF() and mpn_fft_sub_modF(). */ static void mpn_fft_mul_sqrt2exp_modF (mp_ptr r, mp_srcptr a, unsigned int d, mp_size_t n) { unsigned int e = d >> 1; unsigned int N = MUL_GMP_NUMB_BITS(n); /* n * GMP_NUMB_BITS */ mp_ptr tmp; mp_srcptr b; mp_limb_t ca, cc; mp_size_t l; TMP_DECL; ASSERT(0 < d && d < 4 * N); ASSERT(a != r); #ifdef NO_SQRT_2 ASSERT_ALWAYS(d % 2 == 0); #endif /* If d is even, we have a regular multiplication by a power of 2 */ if ((d & 1) == 0) { /* since d cannot be zero, e cannot be zero too */ if (e < GMP_NUMB_BITS) mpn_fft_mul_2exp_modFa (r, a, e, n); else mpn_fft_mul_2exp_modF (r, a, e, n); return; } ASSERT(N % 4 == 0); /* Multiply by sqrt(2) * 2^e = (2^(3N/4) - 2^(N/4)) * 2^e = 2^(3N/4 + e) - 2^(N/4 + e) */ e += 3 * (N >> 2); /* 3N/4 <= e < 11N/4 */ if (e >= 2 * N) e -= 2 * N; /* 0 <= e < 2N */ TMP_MARK; tmp = TMP_ALLOC_LIMBS(n + 1); ASSERT(tmp != NULL); /* the following variant avoids the -H-L computation, which requires a call to mpn_com_n(). */ if (e != 0) { mpn_fft_mul_2exp_modF (r, a, e, n); /* a*2^(e+N/2) */ b = r; } else b = a; l = n >> 1; if ((n & 1) != 0) { mpn_lshift (tmp, b, n + 1, GMP_NUMB_BITS >> 1); } else MPN_COPY (tmp + n - l, b + n - l, l + 1); /* we still have to shift {tmp, n+1} by l limbs to the left: let tl = {tmp, n-l} and th = {tmp+n-l,l+1} */ /* rh <- bh + tl, rl <- bl - th */ ca = b[n] + mpn_add_n (r + l, b + l, (n & 1) ? tmp : b, n - l); cc = tmp[n] + mpn_sub_n (r, b, tmp + n - l, l); cc = mpn_sub_1 (r + l, r + l, n - l, cc); /* We must subtract 0 <= ca <= 2 and add 0 <= cc <= 1 at r[0]. If cc >= ca: r[n]=0 and add cc - ca. If cc < ca: r[n]=1 and subtract ca-cc-1. */ r[n] = cc < ca; if (cc >= ca) MPN_INCR_U (r, n + 1, cc - ca); else /* cc < ca */ MPN_DECR_U (r, n + 1, ca - ONE - cc); TMP_FREE; } /* normalize {n, nl} mod 2^(Kl*GMP_NUMB_BITS)+b and store in tmp. tmp must have space for Kl + 1 limbs */ static void mpn_mul_fft_reduce (mp_ptr tmp, /* mp_srcptr A, */ mp_srcptr n, mp_size_t nl, mp_size_t Kl, /* int l, */ int b) { mp_size_t dif = nl - Kl; mp_limb_signed_t cy; if (dif > Kl) { int subp = 0; cy = ((b == 1) ? mpn_sub_n : mpn_add_n) (tmp, n, n + Kl, Kl); n += 2 * Kl; dif -= Kl; /* now dif > 0 */ while (dif > Kl) { if (b == -1) cy += mpn_add_n (tmp, tmp, n, Kl); else if (subp) cy += mpn_sub_n (tmp, tmp, n, Kl); else cy -= mpn_add_n (tmp, tmp, n, Kl); subp ^= 1; n += Kl; dif -= Kl; } /* now dif <= Kl */ if (b == -1) cy += mpn_add (tmp, tmp, Kl, n, dif); else if (subp) cy += mpn_sub (tmp, tmp, Kl, n, dif); else cy -= mpn_add (tmp, tmp, Kl, n, dif); if (cy >= 0) cy = mpn_add_1 (tmp, tmp, Kl, cy); else cy = mpn_sub_1 (tmp, tmp, Kl, -cy); } else /* dif <= Kl, i.e. nl <= 2 * Kl */ { cy = ((b == 1) ? mpn_sub : mpn_add) (tmp, n, Kl, n + Kl, dif); cy = mpn_add_1 (tmp, tmp, Kl, cy); } tmp[Kl] = cy; } /* Store in {A+(nprime + 1) * offset, nprime+1} the first l limbs (with zero padding) from {n + l*offset, ...}, and in {A + (nprime+1)*(offset + 1< Kl + 1) { /* FIXME: We really don't want to do this multiple times if stride > 0 ! */ TRACE(printf ("mpn_mul_fft_decompose: This takes too long!\n");) tmp = TMP_ALLOC_LIMBS(Kl + 1); ASSERT(tmp != NULL); mpn_mul_fft_reduce (tmp, /* A, */ n, nl, Kl, /* l, */ b); n = tmp; nl = Kl + 1; } A += (nprime + 1) * offset; n += l * offset; nl -= (l * offset < nl) ? l * offset : nl; /* for b=1, since we use {T, nprime+1} as temporary array below, and only the first l limbs may be non-zero, except for the last part, we can set {T+l, nprime+1-l} to zero now. */ MPN_FFT_ZERO (T + l, nprime + 1 - l); for (i = offset; i < K; i += 1 << stride) { Ap[i] = A; /* store the next l limbs of n into A[0..nprime] */ /* nl == 0 => j == 0, nl unchanged */ j = (l <= nl && i < K - 1) ? l : nl; /* store j next limbs */ nl -= j; nl -= (nl > (l << stride) - l) ? (l << stride) - l : nl; if (b == 1 && i != 0 && j > 0) { /* add weight signal for negacyclic convolution. We need a root of unity here whose order is twice the transform length K. Since ord(sqrt(2)^Mp) = K, sqrt(2)^(Mp/2) will do, so long as Mp is even. */ #define FORCE_EXPENSIVE_DECOMPOSE 0 if ((FORCE_EXPENSIVE_DECOMPOSE) || (i & (Mp / 2) & 1)) { #ifdef COUNT_ZEROCOPY printf ("mpn_mul_fft_decompose: MPN_FFT_COPY 1 with %d limbs\n", j); #endif MPN_FFT_COPY (T, n, j); ASSERT_ALWAYS (j <= l + 1); if (j < l) MPN_FFT_ZERO (T + j, l - j); mpn_fft_mul_sqrt2exp_modF (A, T, i * (Mp / 2), nprime); } else { /* i * Mp / 2 is even, so weight signal is sqrt(2)^(i * Mp / 2) = 2^(i * Mp / 4). Shift directly into A. */ const int c = (i * Mp) / 4; const int d = c % GMP_NUMB_BITS; const int e = c / GMP_NUMB_BITS; #undef DECOMPOSE_CAREFUL_CHECK #ifdef DECOMPOSE_CAREFUL_CHECK /* Do it the expensive way and store result in T for comparison */ MPN_FFT_COPY (T, n, j); ASSERT_ALWAYS (j <= l + 1); if (j < l) MPN_FFT_ZERO (T + j, l - j); mpn_fft_mul_2exp_modF (A, T, c, nprime); MPN_COPY (T, A, nprime + 1); #endif /* Copy data from n to A+e, shifted by d bits. */ if (e + j < nprime || (e + j == nprime && d <= 1)) { /* The shifted data fits without wrapping */ MPN_FFT_ZERO (A, e); if (d == 0) { MPN_COPY(A + e, n, j); MPN_FFT_ZERO (A + e + j, nprime + 1 - e - j); } else { A[e + j] = mpn_lshift (A + e, n, j, d); /* Now zero A[e + j + 1 ... nprime] */ MPN_FFT_ZERO (A + e + j + 1, nprime - e - j); } } else { const int of = j + e - nprime; if (d == 0) { /* Here, e + j > nprime, i.e. there is wrapping but d == 0, so no bit shifting */ mp_limb_t cc; ASSERT(e + j > nprime); /* Hence of > 0 */ /* Store ~(N_hi) to A[0 ... of[ */ mpn_com_n (A, n + nprime - e, of); cc = mpn_add_1 (A, A, of, ONE); MPN_FFT_STORE (A + of, nprime - j, cc - ONE); /* Store n_lo * w^e */ ASSERT(nprime - e > 0); cc = mpn_sub_1 (A + e, n, nprime - e, ONE - cc); A[nprime] = 0; MPN_INCR_U (A, nprime + 1, cc); } else { /* Here, e + j >= nprime and d != 0 */ mp_limb_t cc; /* We want n*2^i with i < nprime*w, i > (nprime-j)*w, Store nprime-e words, shifted left by d, at A+e. */ cc = mpn_lshift (A + e, n, nprime - e, d); A[nprime] = 0; if (of > 0) { /* Store a_hi to A[0 ... of] */ A[of] = mpn_lshift (A, n + nprime - e, of, d); A[0] |= cc; /* And do binary negate */ mpn_com_n (A, A, of + 1); cc = mpn_add_1 (A, A, of + 1, ONE); } else { A[0] = -cc; cc = (cc == 0); } /* Store cc-1 to A[of+1 ... e[ */ MPN_FFT_STORE (A + of + 1, nprime - j - 1, cc - ONE); cc = mpn_sub_1 (A + e, A + e, nprime - e, ONE - cc); MPN_INCR_U (A, nprime + 1, cc); } } #ifdef DECOMPOSE_CAREFUL_CHECK ASSERT(A[nprime] <= 1); if (A[nprime] == 1) { /* Fully normalize for the sake of the following comparison */ mp_limb_t cc; cc = mpn_sub_1 (A, A, nprime, 1); A[nprime] = 0; mpn_add_1 (A, A, nprime + 1, cc); } if (mpn_cmp (A, T, nprime + 1) != 0) { printf ("nprime = %d, i = %d, j = %d, d = %d, " "e = %d\n", nprime, i, j, d, e); for (i = 0; i < nprime + 1; i++) printf ("%d: %lx %lx %c\n", i, A[i], T[i], (A[i] != T[i]) ? '!' : ' '); abort (); } MPN_ZERO (T, nprime + 1); #endif } } else /* b = -1 or i == 0 or j == 0. No weight to be added here. */ { #ifdef COUNT_ZEROCOPY printf ("mpn_mul_fft_decompose: MPN_FFT_COPY 2 with %d limbs\n", j); #endif MPN_COPY (A, n, j); MPN_FFT_ZERO (A + j, nprime + 1 - j); } ASSERT(A[nprime] <= 1); n += l << stride; A += (nprime + 1) << stride; } ASSERT_ALWAYS (nl == 0 || (nl == 1 && stride > 0 && offset == 0)); TMP_FREE; } /* A0 <- A0+A1 A1 <- (A0-A1)*2^e0 Butterfly using a rotating buffer instead of temp space. The variable rotbuf is a size-1 array of coefficients; this might be exchanged with one of the coefficients of A. */ static inline void mpn_fft_butterfly_rotbuf (mp_ptr *A, mp_size_t i0, mp_size_t i1, unsigned int e0, mp_ptr *rotbuf, mp_size_t n) { unsigned int d, e = e0; ASSERT(e0 != 0); DIVMOD_2GMP_NUMB_BITS(d, e); /* 0 <= d < 2*n, 0 <= e0 < 2*GMP_NUMB_BITS */ mpn_fft_lshsub_modF (rotbuf[0], A[i0], A[i1], d, n); mpn_fft_add_modF (A[i0], A[i0], A[i1], n); if (e != 0) mpn_fft_mul_sqrt2exp_modF (A[i1], rotbuf[0], e, n); else { mp_ptr tmp = rotbuf[0]; rotbuf[0] = A[i1]; A[i1] = tmp; } } static inline void mpn_fft_butterfly_rotbuf0 (mp_ptr *A, mp_size_t i0, mp_size_t i1, mp_ptr *rotbuf, mp_size_t n) { mp_ptr tmp; mpn_fft_sub_modF (rotbuf[0], A[i0], A[i1], n); mpn_fft_add_modF (A[i0], A[i0], A[i1], n); tmp = rotbuf[0]; rotbuf[0] = A[i1]; A[i1] = tmp; } /* In this version, the shift e0 is in [0..N], so we have to do one more test on e0. */ static inline void mpn_fft_butterfly_rotbufN (mp_ptr *A, mp_size_t i0, mp_size_t i1, unsigned int e0, mp_ptr *rotbuf, mp_size_t n) { mp_size_t N = MUL_4GMP_NUMB_BITS(n); /* 4 * n * GMP_NUMB_BITS */ unsigned int d; if (e0 >= (unsigned int) N) e0 -= N; DIVMOD_2GMP_NUMB_BITS (d,e0); /* 0 <= d < 2*n, 0 <= e0 < 2*GMP_NUMB_BITS */ mpn_fft_lshsub_modF (rotbuf[0], A[i0], A[i1], d, n); mpn_fft_add_modF (A[i0], A[i0], A[i1], n); if (e0 != 0) mpn_fft_mul_sqrt2exp_modF (A[i1], rotbuf[0], e0, n); else { mp_ptr tmp = rotbuf[0]; rotbuf[0] = A[i1]; A[i1] = tmp; } } /* Radix 4 transform. This uses a rotating buffer: the array Ap gets unsorted (but we usually don't care). */ static void mpn_fft_fft_radix4Rec (mp_ptr *Ap, mp_size_t ind_start, mp_size_t k, mp_size_t omega, mp_size_t n, mp_ptr *rotbuf) { mp_size_t i, stride, stride2; stride2 = 1<<(k-2); stride = 1<<(k-1); if (k == 1) { mpn_fft_butterfly_rotbuf0(Ap, ind_start, ind_start+1, rotbuf, n); return; } mpn_fft_butterfly_rotbuf0 (Ap, ind_start, ind_start+stride, rotbuf, n); mpn_fft_butterfly_rotbuf (Ap, ind_start+stride2, ind_start+stride+stride2, omega*stride2, rotbuf, n); mpn_fft_butterfly_rotbuf0 (Ap, ind_start+stride, ind_start+stride+stride2, rotbuf, n); mpn_fft_butterfly_rotbuf0 (Ap, ind_start, ind_start+stride2, rotbuf, n); for (i = 1; i < stride2; ++i) { mpn_fft_butterfly_rotbuf(Ap, ind_start+i, ind_start+i+stride, omega*i, rotbuf, n); mpn_fft_butterfly_rotbuf(Ap, ind_start+i+stride2, ind_start+i+stride+stride2, omega*(i+stride2), rotbuf, n); mpn_fft_butterfly_rotbuf(Ap, ind_start+i+stride, ind_start+i+stride+stride2, omega*i*2, rotbuf, n); mpn_fft_butterfly_rotbuf(Ap, ind_start+i, ind_start+i+stride2, omega*i*2, rotbuf, n); } if (k == 3) { mpn_fft_butterfly_rotbuf0(Ap, ind_start+stride+stride2, ind_start+stride+stride2+1, rotbuf, n); mpn_fft_butterfly_rotbuf0(Ap, ind_start+stride, ind_start+stride+1, rotbuf, n); mpn_fft_butterfly_rotbuf0(Ap, ind_start, ind_start+1, rotbuf, n); mpn_fft_butterfly_rotbuf0(Ap, ind_start+stride2, ind_start+stride2+1, rotbuf, n); } if (k > 3) { mp_size_t omega4 = omega<<2; mpn_fft_fft_radix4Rec(Ap, ind_start, k-2, omega4, n, rotbuf); mpn_fft_fft_radix4Rec(Ap, ind_start+stride2, k-2, omega4, n, rotbuf); mpn_fft_fft_radix4Rec(Ap, ind_start+stride, k-2, omega4, n, rotbuf); mpn_fft_fft_radix4Rec(Ap, ind_start+stride+stride2, k-2, omega4, n, rotbuf); } } static void mpn_fft_fft_radix4 (mp_ptr *Ap, mp_size_t k, mp_size_t omega, mp_size_t n, mp_ptr *rotbuf) { mpn_fft_fft_radix4Rec(Ap, 0, k, omega, n, rotbuf); } /* The "Neg" versions multiply by the *inverse* of the root. This is used for the backward transform. Propagating this bit of information saves the %N, since only at the end we do N-blah. FIXME: The Neg and non-Neg versions can probably be merged at almost no cost. */ static void mpn_fft_fft_radix4RecNeg (mp_ptr *Ap, mp_size_t ind_start, mp_size_t k, mp_size_t omega, mp_size_t n, mp_ptr *rotbuf) { mp_size_t i, stride, stride2; mp_size_t N = MUL_4GMP_NUMB_BITS(n); /* 4 * n * GMP_NUMB_BITS */ stride2 = 1 << (k - 2); stride = 1 << (k - 1); if (k == 1) { mpn_fft_butterfly_rotbufN(Ap, ind_start, ind_start+1, 0, rotbuf, n); return; } for (i = 0; i < stride2; ++i) { mpn_fft_butterfly_rotbufN(Ap, ind_start+i, ind_start+i+stride, N-omega*i, rotbuf, n); mpn_fft_butterfly_rotbufN(Ap, ind_start+i+stride2, ind_start+i+stride+stride2, N-omega*(i+stride2), rotbuf, n); mpn_fft_butterfly_rotbufN(Ap, ind_start+i+stride, ind_start+i+stride+stride2, N-omega*i*2, rotbuf, n); mpn_fft_butterfly_rotbufN(Ap, ind_start+i, ind_start+i+stride2, N-omega*i*2, rotbuf, n); } if (k == 3) { mpn_fft_butterfly_rotbufN(Ap, ind_start+stride+stride2, ind_start+stride+stride2+1, 0, rotbuf, n); mpn_fft_butterfly_rotbufN(Ap, ind_start+stride, ind_start+stride+1, 0, rotbuf, n); mpn_fft_butterfly_rotbufN(Ap, ind_start, ind_start+1, 0, rotbuf, n); mpn_fft_butterfly_rotbufN(Ap, ind_start+stride2, ind_start+stride2+1, 0, rotbuf, n); } if (k > 3) { mp_size_t omega4 = omega<<2; mpn_fft_fft_radix4RecNeg(Ap, ind_start, k-2, omega4, n, rotbuf); mpn_fft_fft_radix4RecNeg(Ap, ind_start+stride2, k-2, omega4, n, rotbuf); mpn_fft_fft_radix4RecNeg(Ap, ind_start+stride, k-2, omega4, n, rotbuf); mpn_fft_fft_radix4RecNeg(Ap, ind_start+stride+stride2, k-2, omega4, n, rotbuf); } } static void mpn_fft_fft_radix4Neg (mp_ptr *Ap, mp_size_t k, mp_size_t omega, mp_size_t n, mp_ptr *rotbuf) { mpn_fft_fft_radix4RecNeg(Ap, 0, k, omega, n, rotbuf); } static void mpn_fft_fft_radix4Inv(mp_ptr *Ap, mp_size_t k, mp_size_t omega, mp_size_t n, mp_ptr *rotbuf, int **ll) { int i; /* Bit-reverse table Ap. FIXME: these bit-rev copies might be avaoided. But do they really cost? */ for (i = 0; i < 1< 3) { mpn_fft_fftR4_twistedRec(Ap, ind, k-2, omega, om_curr<<2, om_mult<<2, n, rotbuf); mpn_fft_fftR4_twistedRec(Ap, ind+stride2, k-2, omega, om_curr<<2, om_mult<<2, n, rotbuf); mpn_fft_fftR4_twistedRec(Ap, ind+stride, k-2, omega, om_curr<<2, om_mult<<2, n, rotbuf); mpn_fft_fftR4_twistedRec(Ap, ind+stride+stride2, k-2, omega, om_curr<<2, om_mult<<2, n, rotbuf); } return; } if (k == 1) { mpn_fft_butterfly_rotbuf (Ap, ind, ind + 1, omega * om_mult, rotbuf, n); return; } for (i = 0; i < stride2; ++i) { mp_size_t root = omega*(om_curr*i + om_mult); mpn_fft_butterfly_rotbuf(Ap, ind+i, ind+stride+i, root, rotbuf, n); root = omega*(om_curr*(i+stride2) + om_mult); mpn_fft_butterfly_rotbuf(Ap, ind+i+stride2, ind+stride+stride2+i, root, rotbuf, n); mpn_fft_butterfly_rotbuf(Ap, ind+i+stride, ind+stride+stride2+i, omega*(om_curr*i + om_mult)*2, rotbuf, n); mpn_fft_butterfly_rotbuf(Ap, ind+i, ind+stride2+i, omega*(om_curr*i + om_mult)*2, rotbuf, n); } if (k == 3) { mp_size_t root = omega*om_mult*4; mpn_fft_butterfly_rotbuf(Ap, ind+stride+stride2, ind+stride+stride2+1, root, rotbuf, n); mpn_fft_butterfly_rotbuf(Ap, ind+stride, ind+stride+1, root, rotbuf, n); mpn_fft_butterfly_rotbuf(Ap, ind, ind+1, root, rotbuf, n); mpn_fft_butterfly_rotbuf(Ap, ind+stride2, ind+stride2+1, root, rotbuf, n); } if (k > 3) { mpn_fft_fftR4_twistedRec(Ap, ind, k-2, omega, om_curr<<2, om_mult<<2, n, rotbuf); mpn_fft_fftR4_twistedRec(Ap, ind+stride2, k-2, omega, om_curr<<2, om_mult<<2, n, rotbuf); mpn_fft_fftR4_twistedRec(Ap, ind+stride, k-2, omega, om_curr<<2, om_mult<<2, n, rotbuf); mpn_fft_fftR4_twistedRec(Ap, ind+stride+stride2, k-2, omega, om_curr<<2, om_mult<<2, n, rotbuf); } } static void mpn_fft_fftR4_twisted(mp_ptr * Ap, mp_size_t rk, mp_size_t k1, mp_size_t k, mp_size_t omega, mp_size_t n, mp_ptr *rotbuf) { mpn_fft_fftR4_twistedRec(Ap, 0, k1, omega, 1<<(k-k1), rk, n, rotbuf); } /* Neg version for reverse transform. (see comments above) */ static void mpn_fft_fftR4_twistedRecNeg(mp_ptr * Ap, mp_size_t ind, mp_size_t k, mp_size_t omega, mp_size_t om_curr, mp_size_t om_mult, mp_size_t n, mp_ptr *rotbuf) { mp_size_t stride = 1<<(k-1); mp_size_t stride2 = 1<<(k-2); int i; mp_size_t N = MUL_4GMP_NUMB_BITS(n); /* 4 * n * GMP_NUMB_BITS */ if (k == 0) return; if (k == 1) { mpn_fft_butterfly_rotbufN (Ap, ind, ind + 1, N - omega * om_mult, rotbuf, n); return; } for (i = 0; i < stride2; ++i) { mp_size_t root = omega*(om_curr*i + om_mult); mpn_fft_butterfly_rotbufN(Ap, ind+i, ind+stride+i, N-root, rotbuf, n); root = omega*(om_curr*(i+stride2) + om_mult); mpn_fft_butterfly_rotbufN(Ap, ind+i+stride2, ind+stride+stride2+i, N-root, rotbuf, n); mpn_fft_butterfly_rotbufN(Ap, ind+i+stride, ind+stride+stride2+i, N-omega*(om_curr*i + om_mult)*2, rotbuf, n); mpn_fft_butterfly_rotbufN(Ap, ind+i, ind+stride2+i, N-omega*(om_curr*i + om_mult)*2, rotbuf, n); } if (k == 3) { mp_size_t root = N-omega*om_mult*4; mpn_fft_butterfly_rotbufN(Ap, ind+stride+stride2, ind+stride+stride2+1, root, rotbuf, n); mpn_fft_butterfly_rotbufN(Ap, ind+stride, ind+stride+1, root, rotbuf, n); mpn_fft_butterfly_rotbufN(Ap, ind, ind+1, root, rotbuf, n); mpn_fft_butterfly_rotbufN(Ap, ind+stride2, ind+stride2+1, root, rotbuf, n); } if (k > 3) { mpn_fft_fftR4_twistedRecNeg(Ap, ind, k-2, omega, om_curr<<2, om_mult<<2, n, rotbuf); mpn_fft_fftR4_twistedRecNeg(Ap, ind+stride2, k-2, omega, om_curr<<2, om_mult<<2, n, rotbuf); mpn_fft_fftR4_twistedRecNeg(Ap, ind+stride, k-2, omega, om_curr<<2, om_mult<<2, n, rotbuf); mpn_fft_fftR4_twistedRecNeg(Ap, ind+stride+stride2, k-2, omega, om_curr<<2, om_mult<<2, n, rotbuf); } } static void mpn_fft_fftR4_twistedNeg(mp_ptr * Ap, mp_size_t rk, mp_size_t k1, mp_size_t k, mp_size_t omega, mp_size_t n, mp_ptr *rotbuf) { mpn_fft_fftR4_twistedRecNeg(Ap, 0, k1, omega, 1<<(k-k1), rk, n, rotbuf); } #if 0 /* Radix-2 version of the previous function. Obsolete, now, but more easy to understand; so I let it here. */ static void mpn_fft_fft_twistedRec(mp_ptr * Ap, mp_size_t ind, mp_size_t k, mp_size_t omega, mp_size_t om_curr, mp_size_t om_mult, mp_size_t n, mp_ptr *rotbuf) { const mp_size_t stride = 1<<(k-1); int i; if (k == 0) return; for (i = 0; i < stride; ++i) { mp_size_t root = (omega*(om_curr*i + om_mult)); mpn_fft_butterfly_rotbuf(Ap, ind+i, ind+stride+i, root, rotbuf, n); } mpn_fft_fft_twistedRec(Ap, ind, k-1, omega, om_curr<<1, om_mult<<1, n, rotbuf); mpn_fft_fft_twistedRec(Ap, ind+stride, k-1, omega, om_curr<<1, om_mult<<1, n, rotbuf); } #endif static void mpn_fft_fft_bailey_decompose (mp_ptr A, mp_ptr *Ap, mp_size_t k, mp_size_t omega, mp_size_t nprime, mp_srcptr n, mp_size_t nl, int l, mp_ptr *rotbuf, int b) { const mp_size_t k1 = k >> 1; const mp_size_t k2 = k - k1; int i, j; const mp_size_t K1 = 1 << k1; const mp_size_t K2 = 1 << k2; mp_size_t omegai; mp_ptr *BufA; mp_ptr T, tmp = NULL; const int Kl = l << k; TMP_DECL; TMP_MARK; BufA = TMP_ALLOC_MP_PTRS (K1); ASSERT(BufA != NULL); T = __GMP_ALLOCATE_FUNC_LIMBS(nprime + 1); ASSERT(T != NULL); if (nl > Kl) { tmp = __GMP_ALLOCATE_FUNC_LIMBS(Kl + 1); ASSERT(tmp != NULL); mpn_mul_fft_reduce (tmp, /* A, */ n, nl, Kl, /* l, */ b); n = tmp; nl = Kl + 1; } for (i = 0; i < K2; ++i) { /* Do the decomposition */ /* omega is equal to Mp value */ mpn_mul_fft_decompose (A, Ap, 1<= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD)) { int k, K2, nprime2, Nprime2, M2, maxLK, l; int **_fft_l; mp_ptr *Ap, *Bp, A, B, T; k = mpn_fft_best_k (n, sqr); K2 = 1 << k; ASSERT_ALWAYS((n & (K2 - 1)) == 0); maxLK = LCM_GMP_NUMB_BITS (k); M2 = MUL_GMP_NUMB_BITS(n) >> k; l = n >> k; Nprime2 = ((2 * M2 + k + 2 + maxLK) / maxLK) * maxLK; /* Nprime2 = ceil((2*M2+k+3)/maxLK)*maxLK*/ nprime2 = DIV_GMP_NUMB_BITS (Nprime2); /* Nprime2 / GMP_NUMB_BITS */ /* we should ensure that nprime2 is a multiple of the next K */ if (nprime2 >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD)) { unsigned long K3; for (;;) { K3 = 1L << mpn_fft_best_k (nprime2, sqr); if ((nprime2 & (K3 - 1)) == 0) break; nprime2 = (nprime2 + K3 - 1) & -K3; Nprime2 = nprime2 * GMP_LIMB_BITS; /* warning: since nprime2 changed, K3 may change too! */ } } ASSERT_ALWAYS(nprime2 < n); /* otherwise we'll loop */ Ap = TMP_ALLOC_MP_PTRS (K2); ASSERT(Ap != NULL); Bp = TMP_ALLOC_MP_PTRS (K2); ASSERT(Bp != NULL); A = TMP_ALLOC_LIMBS (2 * K2 * (nprime2 + 1)); ASSERT(A != NULL); T = TMP_ALLOC_LIMBS (2 * (nprime2 + 1)); ASSERT(T != NULL); B = A + K2 * (nprime2 + 1); _fft_l = TMP_ALLOC_TYPE (k + 1, int *); ASSERT(_fft_l != NULL); for (i = 0; i <= k; i++) { _fft_l[i] = TMP_ALLOC_TYPE (1< %d times %dx%d (%1.2f)\n", n, n, K2, nprime2, nprime2, 2.0*(double)n/nprime2/K2)); for (i = 0; i < K; i++, ap++, bp++) { mpn_fft_normalize (*ap, n); if (!sqr) mpn_fft_normalize (*bp, n); mpn_mul_fft_internal (*ap, n, *ap, n + 1, *bp, n + 1, k, Ap, Bp, A, B, nprime2, l, _fft_l, T, 1, 1); } } else { mp_ptr a, b, tp, tpn; mp_limb_t cc; int n2 = 2 * n; tp = TMP_ALLOC_LIMBS (n2); ASSERT(tp != NULL); tpn = tp + n; TRACE (printf ("mpn_fft_mul_modF_K: mpn_mul_n %d of %d limbs\n", K, n)); /* FIXME: write a special loop for the square case, to put the test out of the loop, and optimize the case a[n] != 0: maybe normalizing a and b will be faster? */ for (i = 0; i < K; i++) { a = *ap++; b = *bp++; if (LIKELY(a[0] >= a[n])) { a[0] -= a[n]; a[n] = 0; } if (LIKELY(b[0] >= b[n])) { b[0] -= b[n]; b[n] = 0; } if (sqr) mpn_sqr_n (tp, a, n); else mpn_mul_n (tp, b, a, n); cc = a[n] && mpn_add_n (tpn, tpn, b, n); cc += b[n] && mpn_add_n (tpn, tpn, a, n); cc += b[n] && a[n]; /* 0 <= cc <= 3 */ cc += mpn_sub_n (a, tp, tpn, n); /* 0 <= cc <= 4 */ a[n] = 0; MPN_INCR_U (a, n + 1, cc); } } TMP_FREE; } /* * Mix Point-wise multiplication and inverse FFT. * This is useful, since we save one pass on the whole data, thus * improving the locality. * * FIXME: A lot of duplicated code in this function. At some point it * will be necessary to clean-up things to keep it possible to maintain. * */ static void mpn_fft_mul_modF_K_fftInv (mp_ptr *ap, mp_ptr *bp, mp_size_t n, mp_size_t Mp, int old_k, mp_ptr *rotbuf, int**ll) { int i, j; int sqr = (ap == bp); #if 0 mp_size_t K = 1<= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD)) { int k, K2, nprime2, Nprime2, M2, maxLK, l; int **_fft_l; mp_ptr *Ap, *Bp, A, B, T; k = mpn_fft_best_k (n, sqr); K2 = 1 << k; ASSERT_ALWAYS((n & (K2 - 1)) == 0); maxLK = LCM_GMP_NUMB_BITS(k); M2 = MUL_GMP_NUMB_BITS(n) >> k; l = n >> k; Nprime2 = ((2 * M2 + k + 2 + maxLK) / maxLK) * maxLK; /* Nprime2 = ceil((2*M2+k+3)/maxLK)*maxLK*/ nprime2 = DIV_GMP_NUMB_BITS(Nprime2); /* we should ensure that nprime2 is a multiple of the next K */ if (nprime2 >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD)) { unsigned long K3; for (;;) { K3 = 1L << mpn_fft_best_k (nprime2, sqr); if ((nprime2 & (K3 - 1)) == 0) break; nprime2 = (nprime2 + K3 - 1) & -K3; Nprime2 = nprime2 * GMP_LIMB_BITS; /* warning: since nprime2 changed, K3 may change too! */ } } ASSERT_ALWAYS(nprime2 < n); /* otherwise we'll loop */ Ap = TMP_ALLOC_MP_PTRS (K2); ASSERT(Ap != NULL); Bp = TMP_ALLOC_MP_PTRS (K2); ASSERT(Bp != NULL); A = __GMP_ALLOCATE_FUNC_LIMBS (2 * K2 * (nprime2 + 1)); ASSERT(A != NULL); T = TMP_ALLOC_LIMBS (2 * (nprime2 + 1)); ASSERT(T != NULL); B = A + K2 * (nprime2 + 1); _fft_l = TMP_ALLOC_TYPE (k + 1, int *); ASSERT(_fft_l != NULL); for (i = 0; i <= k; i++) { _fft_l[i] = TMP_ALLOC_TYPE (1< %d times %dx%d (%1.2f)\n", n, n, K2, nprime2, nprime2, 2.0*(double)n/nprime2/K2)); { mp_size_t k1, k2, K1, omega, omegai; mp_ptr *BufA; k1 = old_k >> 1; k2 = old_k - k1; #if 0 /* unused variables */ mp_ptr a, b; mp_limb_t cc; mp_size_t N = MUL_4GMP_NUMB_BITS(n); /* 4 * n * GMP_NUMB_BITS */ #endif K1 = 1 << k1; K2 = 1 << k2; /* we overwrite the previous variable, here, but it is no longer used */ omega = Mp; BufA = TMP_ALLOC_MP_PTRS (K1); ASSERT(BufA != NULL); for (i = 0; i < K2; ++i) { /* copy the i-th column of Ap into BufA (pointers... no real copy) */ for (j = 0; j < K1; ++j) { /* Do the point-wise multiplication, the bitreverse and the column selection at once. Should help locality (not readibility). */ int ind = ll[old_k][i+K2*j]; mpn_fft_normalize (ap[ind], n); if (!sqr) mpn_fft_normalize (bp[ind], n); mpn_mul_fft_internal (ap[ind], n, ap[ind], n + 1, bp[ind], n + 1, k, Ap, Bp, A, B, nprime2, l, _fft_l, T, 1, 1); BufA[j] = ap[ind]; } /* do the level k1 transform */ mpn_fft_fftR4_twistedNeg(BufA, i, k1, old_k, omega, n, rotbuf); /* copy back (since with the rotating buffer, the pointers have been moved around. */ for (j = 0; j < K1; ++j) ap[ll[old_k][i+K2*j]] = BufA[j]; } for (i = 0; i < 1<= cc; /* rare case where R = B^n */ } r[n - 1] -= cc; } /* A <- A/sqrt(2)^k mod 2^(n*GMP_NUMB_BITS)+1. Assumes 0 < k < 4*n*GMP_NUMB_BITS. FIXME: can we use the trick used in mpn_fft_div_sqrt2exp_modF above? */ static void mpn_fft_div_sqrt2exp_modF (mp_ptr r, mp_srcptr a, unsigned int k, mp_size_t n) { ASSERT (r != a); #ifdef NO_SQRT_2 ASSERT_ALWAYS (k % 2 == 0); #endif ASSERT (0 < k && k < MUL_4GMP_NUMB_BITS((unsigned int) n)); mpn_fft_mul_sqrt2exp_modF (r, a, MUL_4GMP_NUMB_BITS(n) - k, n); /* 1/2^k = 2^(2nL-k) mod 2^(n*GMP_NUMB_BITS)+1 */ /* normalize so that R < 2^(n*GMP_NUMB_BITS)+1 */ mpn_fft_normalize (r, n); } /* {rp,n} <- {ap,an} mod 2^(n*GMP_NUMB_BITS)+b, where b=1 or b=-1. Returns carry out, i.e. 1 iff b=1 and {ap,an}=-1 mod 2^(n*GMP_NUMB_BITS)+1, then {rp,n}=0. No restriction on an, except an >= 1. */ static mp_limb_t mpn_fft_norm_modF (mp_ptr rp, mp_size_t n, mp_srcptr ap, mp_size_t an, int b) { if (an <= n) { #ifdef COUNT_ZEROCOPY printf ("mpn_fft_norm_modF: MPN_FFT_COPY with %d limbs\n", an); #endif MPN_COPY (rp, ap, an); if (an < n) MPN_FFT_ZERO (rp + an, n - an); return 0; } else /* an > n */ { mp_size_t l; mp_limb_t cc; int i; l = (an <= 2 * n) ? an - n : n; if (b == -1) cc = mpn_add (rp, ap, n, ap + n, l); else cc = -mpn_sub (rp, ap, n, ap + n, l); ap += n + l; an -= n + l; for (i = -1; an > 0; i = -b * i) { /* it remains to deal with {ap, an} */ l = (an <= n) ? an : n; if (i == -1) cc += mpn_add (rp, rp, n, ap, l); else cc -= mpn_sub (rp, rp, n, ap, l); ap += l; an -= l; } if (b == 1) { if (cc & GMP_LIMB_HIGHBIT) /* cc < 0 */ cc = mpn_add_1 (rp, rp, n, -cc); cc = mpn_sub_1 (rp, rp, n, cc); } else /* b = -1: necessarily cc >= 0 */ cc = mpn_add_1 (rp, rp, n, cc); return mpn_add_1 (rp, rp, n, cc); } } /* op <- n*m mod 2^N+b with fft of size 2^k where N=pl*GMP_NUMB_BITS n and m have respectively nl and ml limbs op must have space for pl+1 limbs if rec=1 (and pl limbs if rec=0). One must have pl = mpn_fft_next_size (pl, k). T must have space for 2 * (nprime + 1) limbs. If rec=0, then store only the pl low bits of the result, and return the out carry. Assumes b=1 (negacyclic convolution) or b=-1 (cyclic convolution). */ static int mpn_mul_fft_internal (mp_ptr op, mp_size_t pl, mp_srcptr n, mp_size_t nl, mp_srcptr m, mp_size_t ml, int k, mp_ptr *Ap, mp_ptr *Bp, mp_ptr A, mp_ptr B, mp_size_t nprime, mp_size_t l, int **_fft_l, mp_ptr T, int rec, int b) { const int K = 1<> k; int i, sqr, pla, lo, sh, j; mp_ptr p; mp_limb_t cc; mp_ptr rotbufA[1], rotbufB[1]; /* we need two rotating buffers, otherwise some Ap[i] may point to the B[] array, and will be erase since we use the B[] array to store the final result {p,pla} */ mp_ptr bufAptr, bufBptr; /* Remember pointers to free memory */ bufAptr = rotbufA[0] = __GMP_ALLOCATE_FUNC_LIMBS(nprime+1); ASSERT(rotbufA[0] != NULL); bufBptr = rotbufB[0] = __GMP_ALLOCATE_FUNC_LIMBS(nprime+1); ASSERT(rotbufB[0] != NULL); ASSERT(b == 1 || b == -1); sqr = n == m && nl == ml; TRACE (printf ("mpn_mul_fft_internal: pl=%d k=%d K=%d np=%d l=%d Mp=%d " "rec=%d sqr=%d b=%d\n", pl,k,K,nprime,l,Mp,rec,sqr,b)); #define BAILEY_THRESHOLD 9 /* direct fft's */ /* This threshold for Bailey's algorithm has been determined experimentally on an Opteron. */ if (k >= BAILEY_THRESHOLD) { TRACE(printf("Calling mpn_fft_fft_bailey(Ap, %d, %d, %d, T, ...)\n", k,Mp,nprime);) /* decomposition of inputs into arrays Ap[i] and Bp[i] */ mpn_fft_fft_bailey_decompose (A, Ap, k, Mp, nprime, n, nl, l, rotbufA, b); if (!sqr) mpn_fft_fft_bailey_decompose (B, Bp, k, Mp, nprime, m, ml, l, rotbufB, b); } else { TRACE(printf("Calling mpn_fft_fft_radix4(Ap, %d, %d, %d, T, ...)\n", k,Mp,nprime);) /* decomposition of inputs into arrays Ap[i] and Bp[i] */ mpn_mul_fft_decompose (A, Ap, K, 0, 0, nprime, n, nl, l, Mp, T, b); if (sqr == 0) mpn_mul_fft_decompose (B, Bp, K, 0, 0, nprime, m, ml, l, Mp, T, b); mpn_fft_fft_radix4 (Ap, k, Mp, nprime, rotbufA); if (!sqr) mpn_fft_fft_radix4 (Bp, k, Mp, nprime, rotbufB); } /* * We want to multipy the K transformed elements of A and B (or A and A * if we're squaring), with products reduced (mod 2^Nprime+1) * * Then we must do the backward transform. * * If we are below Bailey's threshold, we assume that the data fits in * the cache and do those 2 tasks separately. Otherwise we mix them: we * do the point-wise products for the elements of one column, then we * readily do the transform of the column since we have it in cache. * The code becomes messy (especially when you add the bitreverse * stuff), but this saves a bit. */ if (k >= BAILEY_THRESHOLD) { mpn_fft_mul_modF_K_fftInv (Ap, (sqr) ? Ap : Bp, nprime, Mp, k, rotbufA, _fft_l); } else { mpn_fft_mul_modF_K (Ap, (sqr) ? Ap : Bp, nprime, K); TRACE(printf("mpn_mul_fft_internal: Calling mpn_fft_fft_radix4Inv(Ap, %d, " "%d, %d, T, ...)\n", k, Mp, nprime);) mpn_fft_fft_radix4Inv (Ap, k, Mp, nprime, rotbufA, _fft_l); } Bp[0] = T + nprime + 1; /* addition of terms in result p */ MPN_FFT_ZERO (T, nprime + 1); pla = l * (K - 1) + nprime + 1; /* number of required limbs for p */ p = B; /* B has K*(n' + 1) limbs, which is >= pla, i.e. enough */ ASSERT (K * (nprime + 1) >= pla); MPN_FFT_ZERO (p, pla); cc = 0; /* will accumulate the (signed) carry at p[pla] */ for (i = K - 1, lo = l * i + nprime,sh = l * i; i >= 0; i--,lo -= l,sh -= l) { mp_ptr n = p + sh; j = (K - i) & (K - 1); /* Multiply by appropriate root and reorder. We want to divide by the transform length, so divide by sqrt(2)^(2*k) == 2^k */ if (j > 0 && b == 1) mpn_fft_div_sqrt2exp_modF (Bp[0], Ap[K - j], 2 * k + (K - j) * (Mp / 2), nprime); else /* No unweighting to be done, only divide by transform length */ mpn_fft_div_2exp_modF (Bp[0], Ap[(K - j) & (K - 1)], k, nprime); Bp[j] = Bp[0]; if (mpn_add_n (n, n, Bp[j], nprime + 1)) cc += mpn_add_1 (n + nprime + 1, n + nprime + 1, pla - sh - nprime - 1, ONE); T[2 * l] = (b == 1) ? i + 1 : K; /* T = (i + 1)*2^(2*M) */ if (mpn_cmp (Bp[j], T, nprime + 1) > 0) { /* subtract 2^N'+1 from {n, nprime} */ cc -= mpn_sub_1 (n, n , pla - sh, ONE); cc -= mpn_sub_1 (p + lo, p + lo, pla - lo, ONE); } } if (cc == -ONE) { if ((cc = mpn_add_1 (p + pla - pl, p + pla - pl, pl, ONE))) { /* p[pla-pl]...p[pla-1] are all zero */ mpn_sub_1 (p + pla - pl - 1, p + pla - pl - 1, pl + 1, ONE); mpn_sub_1 (p + pla - 1, p + pla - 1, 1, ONE); } } else if (cc == ONE) { if (pla >= 2 * pl) { while ((cc = mpn_add_1 (p + pla - 2 * pl, p + pla - 2 * pl, 2 * pl, cc))) ; } else { cc = mpn_sub_1 (p + pla - pl, p + pla - pl, pl, cc); ASSERT (cc == 0); } } else { ASSERT (cc == 0); } /* here p < 2^(2M) [K 2^(M(K-1)) + (K-1) 2^(M(K-2)) + ... ] < K 2^(2M) [2^(M(K-1)) + 2^(M(K-2)) + ... ] < K 2^(2M) 2^(M(K-1))*2 = 2^(M*K+M+k+1) */ i = mpn_fft_norm_modF (op, pl, p, pla, b); if (rec) /* store the carry out */ op[pl] = i; __GMP_FREE_FUNC_LIMBS(bufAptr, nprime+1); __GMP_FREE_FUNC_LIMBS(bufBptr, nprime+1); return i; } /* return the lcm of a and 2^k */ static inline unsigned int mpn_mul_fft_lcm (unsigned int a, unsigned int k) { unsigned int l = k; while ((a & 1) == 0 && k > 0) { a >>= 1; k --; } return a << l; } static int mpn_mul_fft_aux (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int, int); /* put in {op, pl} the low pl limbs of the product {n, nl} * {m, ml} mod (B^pl+1) where B = 2^GMP_NUMB_BITS, and returns the carry bit, which is 1 when {n, nl} * {m, ml} = B^pl, and is 0 otherwise. */ int mpn_mul_fft (mp_ptr op, mp_size_t pl, mp_srcptr n, mp_size_t nl, mp_srcptr m, mp_size_t ml, int k) { return mpn_mul_fft_aux (op, pl, n, nl, m, ml, k, 1); } /* put in {op, pl} the product of {n, nl} * {m, ml} mod (B^pl-1) where B = 2^GMP_NUMB_BITS. */ static int mpn_mul_fft_mersenne (mp_ptr op, mp_size_t pl, mp_srcptr n, mp_size_t nl, mp_srcptr m, mp_size_t ml, int k) { return mpn_mul_fft_aux (op, pl, n, nl, m, ml, k, -1); } /* put in {op, pl} + carry out the product {n, nl} * {m, ml} modulo 2^(pl*GMP_NUMB_BITS) + b, where b = 1 or b = -1. */ static int mpn_mul_fft_aux (mp_ptr op, const mp_size_t pl, mp_srcptr n, mp_size_t nl, mp_srcptr m, mp_size_t ml, int k, const int b) { int maxLK, i, c; const int K = 1 << k; mp_size_t N, Nprime, nprime, M, l; mp_ptr *Ap, *Bp, A, T, B; int **_fft_l; int sqr = (n == m && nl == ml), use_tmp_n, use_tmp_m; TMP_DECL; TRACE (printf ("\nmpn_mul_fft_aux: mpn_mul_fft pl=%ld nl=%ld ml=%ld k=%d " "b=%d\n", pl, nl, ml, k, b)); ASSERT_ALWAYS (mpn_fft_next_size (pl, k) == pl); TMP_MARK; /* first reduce {n, nl} or {m, ml} if nl > pl or ml > pl */ if ((use_tmp_n = nl > pl)) { mp_ptr nn = __GMP_ALLOCATE_FUNC_LIMBS(pl + (b == 1)); ASSERT(nn != NULL); if ((i = mpn_fft_norm_modF (nn, pl, n, nl, b))) nn[pl] = 1; n = nn; nl = pl + i; } if ((use_tmp_m = ml > pl)) { mp_ptr mm = __GMP_ALLOCATE_FUNC_LIMBS(pl + (b == 1)); ASSERT(mm != NULL); if ((i = mpn_fft_norm_modF (mm, pl, m, ml, b))) mm[pl] = 1; m = mm; ml = pl + i; } /* now nl,ml <= pl if b=-1, nl,ml <= pl+1 if b=1 */ N = MUL_GMP_NUMB_BITS(pl); /* The entire integer product will be mod 2^N+b */ _fft_l = TMP_ALLOC_TYPE (k + 1, int *); ASSERT(_fft_l != NULL); for (i = 0; i <= k; i++) { _fft_l[i] = TMP_ALLOC_TYPE (1 << i, int); ASSERT(_fft_l[i] != NULL); } mpn_fft_initl (_fft_l, k); M = N >> k; /* The number of bits we need to be able to store in each of the 2^k pieces */ l = 1 + DIV_GMP_NUMB_BITS(M - 1); /* nb of limbs in each of the 2^k pieces */ /* Choose maxLK so that an order 4*2^k root of unity exists for the negacyclic transform (which needs a root of unity of order twice the transform length for the weight signal), or an order 2*2^k root of unity for the cyclic transform (which uses no weight signal) */ #ifdef NO_SQRT_2 c = (b == -1) ? 1 : 0; #else c = (b == -1) ? 2 : 1; #endif ASSERT(k >= c); maxLK = LCM_GMP_NUMB_BITS (k - c); /* maxLK = lcm (GMP_NUMB_BITS, 2^(k-1) for b=1, 2^(k-2) for b=-1) */ /* When we do the transforms with elements (mod 2^Nprime+1), we need GMP_NUMB_BITS|Nprime so that shifts are fast, and transformlength|2*c*Nprime so that transformlength|ord(2) for b==1 or transformlength|ord(sqrt(2)) for b==-1 */ Nprime = 2 * M + k + 2; /* make Nprime large enough so that the coefficients in the product polynomial are not affected by reduction (mod 2^Nprime+1). FIXME is the +2 necessary? */ Nprime = (Nprime / maxLK + 1) * maxLK; /* Round up Nprime to multiple of both GMP_NUMB_BITS and 2^(k-1) */ nprime = DIV_GMP_NUMB_BITS(Nprime); /* number of limbs in poly coefficient */ TRACE(printf ("mpn_mul_fft_aux: N=%d K=%d, M=%d, l=%d, maxLK=%d, Np=%d, " "np=%d\n", N, K, M, l, maxLK, Nprime, nprime);) /* we should ensure that recursively, nprime is a multiple of the next K */ if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD)) { unsigned long K2; for (;;) { K2 = 1L << mpn_fft_best_k (nprime, sqr); if ((nprime & (K2 - 1)) == 0) break; nprime = (nprime + K2 - 1) & -K2; /* round up nprime to multiple of K2 */ Nprime = nprime * GMP_LIMB_BITS; /* warning: since nprime changed, K2 may change too! */ } TRACE (printf ("mpn_mul_fft_aux: new maxLK=%d, Np=%d, np=%d\n", maxLK, Nprime, nprime)); } ASSERT_ALWAYS (nprime < pl); /* otherwise we'll loop */ T = __GMP_ALLOCATE_FUNC_LIMBS (2 * (nprime + 1)); ASSERT(T != NULL); TRACE (printf ("mpn_mul_fft_aux: %dx%d limbs -> %d times %dx%d limbs (%1.2f)\n", pl, pl, K, nprime, nprime, 2.0 * (double) N / Nprime / K); printf (" temp space %ld\n", 2 * K * (nprime + 1));) A = __GMP_ALLOCATE_FUNC_LIMBS (2 * K * (nprime + 1)); if (A == NULL) { fprintf (stderr, "Cannot allocate memory, please use -maxmem\n"); exit (EXIT_FAILURE); } B = A + K * (nprime + 1); Ap = TMP_ALLOC_MP_PTRS (K); ASSERT(Ap != NULL); Bp = TMP_ALLOC_MP_PTRS (K); ASSERT(Bp != NULL); i = mpn_mul_fft_internal (op, pl, n, nl, m, ml, k, Ap, Bp, A, B, nprime, l, _fft_l, T, 0, b); TMP_FREE; __GMP_FREE_FUNC_LIMBS (T, 2 * (nprime + 1)); __GMP_FREE_FUNC_LIMBS (A, 2 * K * (nprime + 1)); if (use_tmp_n) __GMP_FREE_FUNC_LIMBS ((mp_ptr) n, pl + (b == 1)); if (use_tmp_m) __GMP_FREE_FUNC_LIMBS ((mp_ptr) m, pl + (b == 1)); return i; } /* multiply {n, nl} by {m, ml}, and put the result in {op, nl+ml}, using one modular product mod 2^N-1 and one mod 2^(aN)+1, with a >= 1. */ static void mpn_mul_fft_full_a (mp_ptr op, mp_srcptr n, mp_size_t nl, mp_srcptr m, mp_size_t ml, int a) { mp_size_t pl = nl + ml; /* total number of limbs of the result */ int sqr = n == m && nl == ml; mp_size_t l, h; mp_limb_t muh, cc; int k1, k2, i; mp_ptr tp; l = (pl + a + (a > 1)) / (a + 1); /* ceil(pl/(a+1)) */ /* Warning: for a > 1, the product may be larger than (2^N-1) * (2^(aN)+1), thus we take one extra limb. */ k1 = mpn_fft_best_k (l, 2 + sqr); /* for 2^N-1 */ k2 = mpn_fft_best_k (a * l, sqr); /* for 2^(aN)+1 */ /* we must have l multiple of 2^k1 and a*l multiple of 2^k2. FIXME: the optimal k1 and k2 values might change in the while loop. */ while (1) { h = mpn_fft_next_size (l, k1); if (h != l) l = h; else { h = mpn_fft_next_size (a * l, k2); if (h != a * l) l = (h + a - 1) / a; /* ceil(h/a) */ else break; } } h = a * l; /* now mpn_fft_next_size (l, k1) = l and mpn_fft_next_size (h, k2) = h with h = a * l */ /* we perform one FFT mod 2^(aN)+1 and one mod 2^N-1. Let P = n * m. Assume P = lambda * (2^(aN)+1) + mu, with 0 <= mu < 2^(aN)+1, and 0 <= lambda < 2^N-1. Then P = mu mod (2^(aN)+1) and P = 2*lambda+mu mod (2^N-1). Let A := P mod (2^(aN)+1) and B := P mod (2^N-1), with 0 <= A < 2^(aN)+1 and 0 <= B < 2^N-1. Then mu = A, and lambda = (B-A)/2 mod (2^N-1). */ ASSERT_ALWAYS(h < pl); muh = mpn_mul_fft (op, h, n, nl, m, ml, k2); /* mu = muh+{op,h} */ tp = __GMP_ALLOCATE_FUNC_LIMBS (l); ASSERT (tp != NULL); mpn_mul_fft_mersenne (tp, l, n, nl, m, ml, k1); /* B */ /* now compute B-A mod 2^N-1, where B = {tp, l}, and A = cc + {op, h} */ for (cc = muh, i = 0; i < a; i++) cc += mpn_sub_n (tp, tp, op + i * l, l); /* cc is a borrow at tp[0] */ while (cc > 0) /* add cc*(2^N-1): if cc=1 after the first loop, then tp[l-1] = 111...111, and cc=0 after the 2nd loop */ cc = mpn_sub_1 (tp, tp, l, cc); /* Check whether {tp,l} = 111...111, in which case we should reduce it to 000...000. */ for (i = 0; i < l && ~tp[i] == 0; i++); if (i == l) mpn_add_1 (tp, tp, l, 1); /* reduces {tp,l} to 000...000 */ /* make cc + {tp, l} even, and divide by 2 */ if (tp[0] & (mp_limb_t) 1) cc = 1 - mpn_sub_1 (tp, tp, l, 1); /* add 2^N-1 */ /* now we have to compute lambda * (2^(aN)+1) + mu, where 2*lambda = {tp, l} and mu = muh + {op, h} */ mpn_rshift (op + h, tp, pl - h, 1); /* divide by 2 to obtain lambda */ if (pl < l + h) /* i.e. pl - h < l: it remains high limbs in {tp, l} */ { /* since the product is P = lambda * (2^N+1) + mu, if cc=1, the product would exceed pl < h+l limbs */ ASSERT_ALWAYS (cc == 0); cc = tp[pl - h] & 1; } op[pl - 1] |= cc << (GMP_NUMB_BITS - 1); __GMP_FREE_FUNC_LIMBS (tp, l); /* since n * m has at most pl limbs, the high part of lambda should be 0 */ cc = mpn_add_n (op, op, op + h, pl - h); /* add lambda to mu */ MPN_INCR_U (op + pl - h, h, cc); MPN_INCR_U (op + h, pl - h, muh); } /* multiply {n, nl} by {m, ml}, and put the result in {op, nl+ml} */ void mpn_mul_fft_full (mp_ptr op, mp_srcptr n, mp_size_t nl, mp_srcptr m, mp_size_t ml) { #ifndef MUL_FFT_FULL_TABLE2 mpn_mul_fft_full_a (op, n, nl, m, ml, 1); #else int a = mpn_fft_best_a ((nl + ml) / 2, n == m && nl == ml); mpn_mul_fft_full_a (op, n, nl, m, ml, a); #endif return; } ecm-6.4.4/test.ecm0000755023561000001540000002351312106741273010672 00000000000000#!/bin/sh # test file for ECM # # Copyright 2002, 2003, 2004, 2005, 2006, 2008, 2009, 2011, 2012 # Jim Fougeron, Alexander Kruppa, Dave Newman, Paul Zimmermann, Cyril Bouvier, # David Cleaver. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License # along with this program; see the file COPYING. If not, see # http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., # 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. ECM="$1" # Call with "checkcode $? n" to check that return code is n # (see test.pm1 for the explanation of the different return codes) checkcode () { if [ $1 != $2 ] then echo "############### ERROR ###############" echo "Expected return code $2 but got $1" exit 1 fi } # Exit statues returned by GMP-ECM: # 0 Normal program termination, no factor found # 1 Error # 2 Composite factor found, cofactor is composite # 6 Probable prime factor found, cofactor is composite # 8 Input number found # 10 Composite factor found, cofactor is a probable prime # 14 Probable prime factor found, cofactor is a probable prime BATCH0="" # try primes < d in stage 2. Curve with sigma=7, mod 30210181 has order # 2^4 * 3^3 * 29 * 2411 echo 2050449353925555290706354283 | $ECM $BATCH0 -sigma 7 -k 1 30 0-1e6; checkcode $? 14 # check the -treefile option echo 2050449353925555290706354283 | $ECM $BATCH0 -treefile tree -sigma 7 -k 1 30 0-1e6; checkcode $? 14 # Check a stage 2 of length 1. g1=1822795201 g2=968809 g3=567947 echo 212252637915375215854013140804296246361 | $ECM $BATCH0 -sigma 781683988 -go 550232165123 63421 1822795201-1822795201; checkcode $? 8 # tests from Torbjo"rn Granlund echo 137703491 | $ECM $BATCH0 -sigma 6 84 1000; checkcode $? 8 echo 3533000986701102061387017352606588294716061 | $ECM $BATCH0 -sigma 1621 191 225; checkcode $? 14 echo 145152979917007299777325725119 | $ECM $BATCH0 -sigma 711387948 924 117751; checkcode $? 14 # Test a few base 2 numbers. These tests are fairly quick. # Test a 2^n-1 number, factor found in stage 1. Order mod 33554520197234177 # with sigma=262763035 is 2^3*3*5*47*59*241*601*743*937 echo "2^919-1" | $ECM $BATCH0 -sigma 262763035 937 1; checkcode $? 6 # Test a 2^n-1 number, factor found in stage 2. Order mod 33554520197234177 # with sigma=1691973485 is 2^6*3*11*29*59*73*263*283*1709 echo "2^919-1" | $ECM $BATCH0 -sigma 1691973485 283 1709; checkcode $? 6 # Test a 2^n+1 number, factor found in stage 1. Order mod 24651922299337 # with sigma=2301432245 is 2^3*3^3*5^2*7^2*17*67*157*521 echo "(2^1033+1)/3" | $ECM $BATCH0 -sigma 2301432245 521 1; checkcode $? 6 # Test a 2^n+1 number, factor found in stage 2. Order mod 24651922299337 # with sigma=2394040080 is 2^2*3^2*13*19*53*127*223*1847 echo "(2^1033+1)/3" | $ECM $BATCH0 -sigma 2301432245 223 1847; checkcode $? 6 # Test another 2^n+1 number, with a larger known factor divided out. # Factor found in stage 1, order mod 114584129081 with sigma=2399424618 # is 2^9*3^2*5^2*7^2*53*383 echo "(2^1063+1)/3/26210488518118323164267329859" | $ECM $BATCH0 -sigma 2399424618 383 1 ; checkcode $? 6 # Like last one, but factor found in stage 2 echo "(2^1063+1)/3/26210488518118323164267329859" | $ECM $BATCH0 -sigma 2399424618 71 500; checkcode $? 6 echo 242668358425701966181147598421249782519178289604307455138484425562807899 | $ECM $BATCH0 -sigma 1417477358 28560 8e7-85507063; checkcode $? 14 # bug found by Jim Fougeron echo 3533000986701102061387017352606588294716061 | $ECM $BATCH0 -sigma 291310394389387 191 225; checkcode $? 14 echo 121279606270805899614487548491773862357 | $ECM $BATCH0 -sigma 1931630101 120; checkcode $? 14 echo 291310394389387 | $ECM $BATCH0 -power 3 -sigma 40 2000; checkcode $? 8 echo 3533000986701102061387017352606588294716061 | $ECM $BATCH0 -sigma 3547 167 211; checkcode $? 14 # test -go option echo 449590253344339769860648131841615148645295989319968106906219761704350259884936939123964073775456979170209297434164627098624602597663490109944575251386017 | $ECM $BATCH0 -sigma 63844855 -go 172969 61843 20658299; checkcode $? 14 echo 17061648125571273329563156588435816942778260706938821014533 | $ECM $BATCH0 -sigma 585928442 174000; checkcode $? 14 echo 89101594496537524661600025466303491594098940711325290746374420963129505171895306244425914080753573576861992127359576789001 | $ECM $BATCH0 -sigma 877655087 -go 325001 157721 1032299; checkcode $? 14 echo 5394204444759808120647321820789847518754252780933425517607611172590240019087317088600360602042567541009369753816111824690753627535877960715703346991252857 | $ECM $BATCH0 -sigma 805816989 -go 345551 149827; checkcode $? 6 echo 3923385745693995079670229419275984584311007321932374190635656246740175165573932140787529348954892963218868359081838772941945556717 | $ECM $BATCH0 -sigma 876329474 141667 150814537; checkcode $? 14 echo 124539923134619429718018353168641490719788526741873602224103589351798060075728544650990190016536810151633233676972068237330360238752628542584228856301923448951 | $ECM $BATCH0 -sigma 1604840403 -go "1260317*1179109*661883" 96097 24289207; checkcode $? 14 # p49 found by Sean Irvine echo 4983070578699621345648758795946786489699447158923341167929707152021191319057138908604417894224244096909460401007237133698775496719078793168004317119431646035122982915288481052088094940158965731422616671 | $ECM $BATCH0 -sigma 909010734 122861 176711; checkcode $? 6 # bug in ecm-5.0 (overflow in fin_diff_coeff) echo 1408323592065265621229603282020508687 | $ECM $BATCH0 -sigma 1549542516 -go 2169539 531571 29973883000-29973884000; checkcode $? 8 # bug in ecm 5.0 and 5.0.1 (factor found for c110 input, not with p58) echo 3213162276640339413566047915418064969550383692549981333701 | $ECM $BATCH0 -sigma 2735675386 -go 1615843 408997 33631583; checkcode $? 8 echo 39614081257132168796771975177 | $ECM $BATCH0 -sigma 480 1e6; checkcode $? 8 echo 10000286586958753753 | $ECM $BATCH0 -sigma 3956738175 1e6; checkcode $? 8 echo 49672383630046506169472128421 | $ECM $BATCH0 -sigma 2687434659 166669 86778487; checkcode $? 8 echo 216259730493575791390589173296092767511 | $ECM $BATCH0 -sigma 214659179 1124423 20477641; checkcode $? 8 # bug reported by Allan Steel on 14 March 2006 echo 49367108402201032092269771894422156977426293789852367266303146912244441959559870316184237 | $ECM $BATCH0 -sigma 6 5000; checkcode $? 0 # A test with a larger input number to test modular arithmetic routines not # in mulredc*.asm. This input has 1363 bits so it has 22 64 bit words # (43 32 bit words) and cannot use mulredc which handles only up to 20 limbs echo "10090030271*10^400+696212088699" | $ECM $BATCH0 -sigma 3923937547 1e3 1e6; checkcode $? 14 # To test batch mode 1 # the following test works both on 32- and 64-bit machines # on 32-bit machines it corresponds to d' = 42 # on 64-bit machines it corresponds to d' = 42*2^32 echo 458903930815802071188998938170281707063809443792768383215233 | $ECM -batch -A 103699173453039012668349162616750601868936199904547322268878 10000 checkcode $? 14 # same with batch=2 echo 458903930815802071188998938170281707063809443792768383215233 | $ECM -batch=2 -A 103699173453039012668349162616750601868936199904547322268878 10000 checkcode $? 14 # this test corresponds to d'=13 on 32-bit, 13*2^32 on 64-bit echo "2^349-1" | $ECM -batch -A 13883915733485915535567641090102088744917579395318243004655770450844428217574163575149253565087742 587 29383 checkcode $? 6 # same with batch=2 echo "2^349-1" | $ECM -batch=2 -A 13883915733485915535567641090102088744917579395318243004655770450844428217574163575149253565087742 587 29383 checkcode $? 6 # another batch-mode test (d' = 1097 on 32-bit, 1097*2^32 on 64-bit) echo "2^347-1" | $ECM -batch -A 292897222300654795048417351458499833714895857628156011078988080472621879897670335421898676171177982 3301 229939 checkcode $? 14 # same with batch=2 echo "2^347-1" | $ECM -batch=2 -A 292897222300654795048417351458499833714895857628156011078988080472621879897670335421898676171177982 3301 229939 checkcode $? 14 # To test batch mode 2 echo 911962091 | $ECM -batch=2 -A 440688534 50000 checkcode $? 8 echo 31622776601683791911 | $ECM -batch=1 -A 27063318473587686303 11000 checkcode $? 0 # non-regression test for bug fixed by changeset r1819 on 64-bit # (this also produces a small d' on 32-bit, thus can be used with batch=1) echo 18446744073709551557 | $ECM -batch -A 312656731337392125 11000 checkcode $? 8 # non-regression test for bug fixed by changeset r1819 on 32-bit echo 4294967291 | $ECM -batch -A 17 1000 checkcode $? 8 # this example would fail with the old Fgw.c when using gwnum (fixed by David Cleaver, r1734) echo "((173^173+1)/174)/471462511391940575680645418941" | $ECM $BATCH0 -sigma 12345 20000 checkcode $? 0 # this test was failing on gcc45.fsffrance.org with 6.4.1 echo "((173^173+1)/174)/471462511391940575680645418941+122" | $ECM $BATCH0 -sigma 77 20000 checkcode $? 6 # the following tests should work on machines which have uint64_t or where # unsigned long long has 64 bits (exercises patch from David Cleaver, r1575) echo "NOTE: NEXT 3 TESTS WILL FAIL ON SOME 32BIT MACHINES, THIS IS EXPECTED." echo 10000000000000000000000000000000000000121 | $ECM $BATCH0 -sigma 61 -go 1195504287780095287 2950307; checkcode $? 8 echo 10000000000000000000000000000000000000121 | $ECM $BATCH0 -sigma 266 -go 218187387944803649 9405629; checkcode $? 8 echo 10000000000000000000000000000000000000121 | $ECM $BATCH0 -sigma 291 -go 5994496018878137 4372759; checkcode $? 8 echo "All ECM tests are ok." ecm-6.4.4/spv.c0000644023561000001540000003211712106741273010176 00000000000000/* spv.c - "small prime vector" functions for arithmetic on vectors of residues modulo a single small prime Copyright 2005, 2006, 2007, 2008, 2009 Dave Newman, Jason Papadopoulos, Brian Gladman, Alexander Kruppa, Paul Zimmermann. The SP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The SP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the SP Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include /* for memset */ #include "sp.h" /* Routines for vectors of integers modulo r common small prime * * These are low-overhead routines that don't do memory allocation, * other than for temporary variables. Unless otherwise specified, any * of the input pointers can be equal. */ /* r = x */ void spv_set (spv_t r, spv_t x, spv_size_t len) { #ifdef HAVE_MEMMOVE /* memmove doesn't rely on the assertion below */ memmove (r, x, len * sizeof (sp_t)); #else spv_size_t i; ASSERT (r >= x + len || x >= r); for (i = 0; i < len; i++) r[i] = x[i]; #endif } /* r[0 ... len - 1] = x[len - 1 ... 0] */ void spv_rev (spv_t r, spv_t x, spv_size_t len) { spv_size_t i; ASSERT (r >= x + len || x >= r + len); for (i = 0; i < len; i++) r[i] = x[len - 1 - i]; } /* r = [y, y, ... ] */ void spv_set_sp (spv_t r, sp_t y, spv_size_t len) { spv_size_t i; for (i = 0; i < len; i++) r[i] = y; } void spv_set_zero (spv_t r, spv_size_t len) { memset (r, 0, len * sizeof (sp_t)); } int spv_cmp (spv_t x, spv_t y, spv_size_t len) { spv_size_t i; for (i = 0; i < len; i++) if (x[i] != y[i]) return 1; return 0; } /* r = x + y */ void spv_add (spv_t r, spv_t x, spv_t y, spv_size_t len, sp_t m) { spv_size_t i; ASSERT (r >= x + len || x >= r); ASSERT (r >= y + len || y >= r); for (i = 0; i < len; i++) r[i] = sp_add (x[i], y[i], m); } /* r = [x[0] + y, x[1] + y, ... ] */ void spv_add_sp (spv_t r, spv_t x, sp_t c, spv_size_t len, sp_t m) { spv_size_t i; for (i = 0; i < len; i++) r[i] = sp_add (x[i], c, m); } /* r = x - y */ void spv_sub (spv_t r, spv_t x, spv_t y, spv_size_t len, sp_t m) { spv_size_t i; ASSERT (r >= x + len || x >= r); ASSERT (r >= y + len || y >= r); for (i = 0; i < len; i++) r[i] = sp_sub (x[i], y[i], m); } /* r = [x[0] - y, x[1] - y, ... ] */ void spv_sub_sp (spv_t r, spv_t x, sp_t c, spv_size_t len, sp_t m) { spv_size_t i; for (i = 0; i < len; i++) r[i] = sp_sub (x[i], c, m); } /* r = [-x[0], -x[1], ... ] */ void spv_neg (spv_t r, spv_t x, spv_size_t len, sp_t m) { spv_size_t i; for (i = 0; i < len; i++) r[i] = sp_sub (0, x[i], m); } /* Pointwise multiplication * r = [x[0] * y[0], x[1] * y[1], ... ] */ void spv_pwmul (spv_t r, spv_t x, spv_t y, spv_size_t len, sp_t m, sp_t d) { spv_size_t i = 0; ASSERT (r >= x + len || x >= r); ASSERT (r >= y + len || y >= r); #if (defined(__GNUC__) || defined(__ICL)) && \ defined(__i386__) && defined(HAVE_SSE2) asm volatile ( "movd %6, %%xmm6 \n\t" "pshufd $0x44, %%xmm6, %%xmm5 \n\t" "pshufd $0, %%xmm6, %%xmm6 \n\t" "movd %7, %%xmm7 \n\t" "pshufd $0, %%xmm7, %%xmm7 \n\t" "0: \n\t" "movdqa (%1,%4,4), %%xmm0 \n\t" "movdqa (%2,%4,4), %%xmm2 \n\t" "pshufd $0x31, %%xmm0, %%xmm1\n\t" "pshufd $0x31, %%xmm2, %%xmm3\n\t" "pmuludq %%xmm2, %%xmm0 \n\t" "pmuludq %%xmm3, %%xmm1 \n\t" "movdqa %%xmm0, %%xmm2 \n\t" "movdqa %%xmm1, %%xmm3 \n\t" "psrlq $" STRING((2*SP_NUMB_BITS - W_TYPE_SIZE)) ", %%xmm2 \n\t" "pmuludq %%xmm7, %%xmm2 \n\t" "psrlq $" STRING((2*SP_NUMB_BITS - W_TYPE_SIZE)) ", %%xmm3 \n\t" "pmuludq %%xmm7, %%xmm3 \n\t" #if SP_NUMB_BITS < W_TYPE_SIZE - 1 "psrlq $33, %%xmm2 \n\t" "pmuludq %%xmm6, %%xmm2 \n\t" "psrlq $33, %%xmm3 \n\t" "pmuludq %%xmm6, %%xmm3 \n\t" "psubq %%xmm2, %%xmm0 \n\t" "psubq %%xmm3, %%xmm1 \n\t" #else "pshufd $0xf5, %%xmm2, %%xmm2 \n\t" "pmuludq %%xmm6, %%xmm2 \n\t" "pshufd $0xf5, %%xmm3, %%xmm3 \n\t" "pmuludq %%xmm6, %%xmm3 \n\t" "psubq %%xmm2, %%xmm0 \n\t" "psubq %%xmm3, %%xmm1 \n\t" "psubq %%xmm5, %%xmm0 \n\t" "psubq %%xmm5, %%xmm1 \n\t" "pshufd $0xf5, %%xmm0, %%xmm2 \n\t" "pshufd $0xf5, %%xmm1, %%xmm3 \n\t" "pand %%xmm5, %%xmm2 \n\t" "pand %%xmm5, %%xmm3 \n\t" "paddq %%xmm2, %%xmm0 \n\t" "paddq %%xmm3, %%xmm1 \n\t" #endif "pshufd $0x8, %%xmm0, %%xmm0 \n\t" "pshufd $0x8, %%xmm1, %%xmm1 \n\t" "punpckldq %%xmm1, %%xmm0 \n\t" "psubd %%xmm6, %%xmm0 \n\t" "pxor %%xmm1, %%xmm1 \n\t" "pcmpgtd %%xmm0, %%xmm1 \n\t" "pand %%xmm6, %%xmm1 \n\t" "paddd %%xmm1, %%xmm0 \n\t" "movdqa %%xmm0, (%3,%4,4) \n\t" "addl $4, %4 \n\t" /* INC */ "cmpl %5, %4 \n\t" "jne 0b \n\t" :"=r"(i) :"r"(x), "r"(y), "r"(r), "0"(i), "g"(len & (spv_size_t)(~3)), "g"(m), "g"(d) :"%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"); #elif defined( _MSC_VER ) && defined( SSE2) __asm { push esi push edi mov edi, x mov esi, y mov edx, r xor ecx, ecx mov eax, len and eax, ~3 movd xmm6, m pshufd xmm5, xmm6, 0x44 pshufd xmm6, xmm6, 0 movd xmm7, d pshufd xmm7, xmm7, 0 L0: movdqa xmm0, [edi+ecx*4] movdqa xmm2, [esi+ecx*4] pshufd xmm1, xmm0, 0x31 pshufd xmm3, xmm2, 0x31 pmuludq xmm0, xmm2 pmuludq xmm1, xmm3 movdqa xmm2, xmm0 movdqa xmm3, xmm1 psrlq xmm2, 2*SP_NUMB_BITS - W_TYPE_SIZE pmuludq xmm2, xmm7 psrlq xmm3, 2*SP_NUMB_BITS - W_TYPE_SIZE pmuludq xmm3, xmm7 #if SP_NUMB_BITS < W_TYPE_SIZE - 1 psrlq xmm2, 33 pmuludq xmm2, xmm6 psrlq xmm3, 33 pmuludq xmm3, xmm6 psubq xmm0, xmm2 psubq xmm1, xmm3 #else pshufd xmm2, xmm2, 0xf5 pmuludq xmm2, xmm6 pshufd xmm3, xmm3, 0xf5 pmuludq xmm3, xmm6 psubq xmm0, xmm2 psubq xmm1, xmm3 psubq xmm0, xmm5 psubq xmm1, xmm5 pshufd xmm2, xmm0, 0xf5 pshufd xmm3, xmm1, 0xf5 pand xmm2, xmm5 pand xmm3, xmm5 paddq xmm0, xmm2 paddq xmm1, xmm3 #endif pshufd xmm0, xmm0, 0x8 pshufd xmm1, xmm1, 0x8 punpckldq xmm0, xmm1 psubd xmm0, xmm6 pxor xmm1, xmm1 pcmpgtd xmm1, xmm0 pand xmm1, xmm6 paddd xmm0, xmm1 movdqa [edx+ecx*4], xmm0 add ecx, 4 cmp eax, ecx jne L0 mov i, ecx pop edi pop esi } #endif for (; i < len; i++) r[i] = sp_mul (x[i], y[i], m, d); } /* Pointwise multiplication, second input is read in reverse * r = [x[0] * y[len - 1], x[1] * y[len - 2], ... x[len - 1] * y[0]] */ void spv_pwmul_rev (spv_t r, spv_t x, spv_t y, spv_size_t len, sp_t m, sp_t d) { spv_size_t i; ASSERT (r >= x + len || x >= r); ASSERT (r >= y + len || y >= r); for (i = 0; i < len; i++) r[i] = sp_mul (x[i], y[len - 1 - i], m, d); } /* dst = src * y */ void spv_mul_sp (spv_t r, spv_t x, sp_t c, spv_size_t len, sp_t m, sp_t d) { spv_size_t i = 0; ASSERT (r >= x + len || x >= r); #if (defined(__GNUC__) || defined(__ICL)) && \ defined(__i386__) && defined(HAVE_SSE2) asm volatile ( "movd %2, %%xmm4 \n\t" "pshufd $0, %%xmm4, %%xmm4 \n\t" "movd %6, %%xmm6 \n\t" "pshufd $0x44, %%xmm6, %%xmm5 \n\t" "pshufd $0, %%xmm6, %%xmm6 \n\t" "movd %7, %%xmm7 \n\t" "pshufd $0, %%xmm7, %%xmm7 \n\t" "0: \n\t" "movdqa (%1,%4,4), %%xmm0 \n\t" "pshufd $0x31, %%xmm0, %%xmm1\n\t" "pshufd $0x31, %%xmm4, %%xmm3\n\t" "pmuludq %%xmm4, %%xmm0 \n\t" "pmuludq %%xmm3, %%xmm1 \n\t" "movdqa %%xmm0, %%xmm2 \n\t" "movdqa %%xmm1, %%xmm3 \n\t" "psrlq $" STRING((2*SP_NUMB_BITS - W_TYPE_SIZE)) ", %%xmm2 \n\t" "pmuludq %%xmm7, %%xmm2 \n\t" "psrlq $" STRING((2*SP_NUMB_BITS - W_TYPE_SIZE)) ", %%xmm3 \n\t" "pmuludq %%xmm7, %%xmm3 \n\t" #if SP_NUMB_BITS < W_TYPE_SIZE - 1 "psrlq $33, %%xmm2 \n\t" "pmuludq %%xmm6, %%xmm2 \n\t" "psrlq $33, %%xmm3 \n\t" "pmuludq %%xmm6, %%xmm3 \n\t" "psubq %%xmm2, %%xmm0 \n\t" "psubq %%xmm3, %%xmm1 \n\t" #else "pshufd $0xf5, %%xmm2, %%xmm2 \n\t" "pmuludq %%xmm6, %%xmm2 \n\t" "pshufd $0xf5, %%xmm3, %%xmm3 \n\t" "pmuludq %%xmm6, %%xmm3 \n\t" "psubq %%xmm2, %%xmm0 \n\t" "psubq %%xmm3, %%xmm1 \n\t" "psubq %%xmm5, %%xmm0 \n\t" "psubq %%xmm5, %%xmm1 \n\t" "pshufd $0xf5, %%xmm0, %%xmm2 \n\t" "pshufd $0xf5, %%xmm1, %%xmm3 \n\t" "pand %%xmm5, %%xmm2 \n\t" "pand %%xmm5, %%xmm3 \n\t" "paddq %%xmm2, %%xmm0 \n\t" "paddq %%xmm3, %%xmm1 \n\t" #endif "pshufd $0x8, %%xmm0, %%xmm0 \n\t" "pshufd $0x8, %%xmm1, %%xmm1 \n\t" "punpckldq %%xmm1, %%xmm0 \n\t" "psubd %%xmm6, %%xmm0 \n\t" "pxor %%xmm1, %%xmm1 \n\t" "pcmpgtd %%xmm0, %%xmm1 \n\t" "pand %%xmm6, %%xmm1 \n\t" "paddd %%xmm1, %%xmm0 \n\t" "movdqa %%xmm0, (%3,%4,4) \n\t" "addl $4, %4 \n\t" /* INC */ "cmpl %5, %4 \n\t" "jne 0b \n\t" :"=r"(i) :"r"(x), "g"(c), "r"(r), "0"(i), "g"(len & (spv_size_t)(~3)), "g"(m), "g"(d) :"%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"); #elif defined( _MSC_VER ) && defined( SSE2) __asm { push esi push edi xor ecx, ecx mov edi, x mov esi, c mov edx, r mov eax, len and eax, ~3 movd xmm4, esi pshufd xmm4, xmm4, 0 movd xmm6, m pshufd xmm5, xmm6, 0x44 pshufd xmm6, xmm6, 0 movd xmm7, d pshufd xmm7, xmm7, 0 L0: movdqa xmm0, [edi+ecx*4] pshufd xmm1, xmm0, 0x31 pshufd xmm3, xmm4, 0x31 pmuludq xmm0, xmm4 pmuludq xmm1, xmm3 movdqa xmm2, xmm0 movdqa xmm3, xmm1 psrlq xmm2, 2*SP_NUMB_BITS - W_TYPE_SIZE pmuludq xmm2, xmm7 psrlq xmm3, 2*SP_NUMB_BITS - W_TYPE_SIZE pmuludq xmm3, xmm7 #if SP_NUMB_BITS < W_TYPE_SIZE - 1 psrlq xmm2, 33 pmuludq xmm2, xmm6 psrlq xmm3, 33 pmuludq xmm3, xmm6 psubq xmm0, xmm2 psubq xmm1, xmm3 #else pshufd xmm2, xmm2, 0xf5 pmuludq xmm2, xmm6 pshufd xmm3, xmm3, 0xf5 pmuludq xmm3, xmm6 psubq xmm0, xmm2 psubq xmm1, xmm3 psubq xmm0, xmm5 psubq xmm1, xmm5 pshufd xmm2, xmm0, 0xf5 pshufd xmm3, xmm1, 0xf5 pand xmm2, xmm5 pand xmm3, xmm5 paddq xmm0, xmm2 paddq xmm1, xmm3 #endif pshufd xmm0, xmm0, 0x8 pshufd xmm1, xmm1, 0x8 punpckldq xmm0, xmm1 psubd xmm0, xmm6 pxor xmm1, xmm1 pcmpgtd xmm1, xmm0 pand xmm1, xmm6 paddd xmm0, xmm1 movdqa [edx+ecx*4], xmm0 add ecx, 4 cmp eax, ecx jne L0 mov i, ecx pop edi pop esi } #endif for (; i < len; i++) r[i] = sp_mul (x[i], c, m, d); } void spv_random (spv_t x, spv_size_t len, sp_t m) { spv_size_t i; mpn_random (x, len); for (i = 0; i < len; i++) while (x[i] >= m) x[i] -= m; } ecm-6.4.4/install-sh0000755023561000001540000003325612106744312011230 00000000000000#!/bin/sh # install - install a program, script, or datafile scriptversion=2011-01-19.21; # UTC # This originates from X11R5 (mit/util/scripts/install.sh), which was # later released in X11R6 (xc/config/util/install.sh) with the # following copyright and license. # # Copyright (C) 1994 X Consortium # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN # AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- # TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # Except as contained in this notice, the name of the X Consortium shall not # be used in advertising or otherwise to promote the sale, use or other deal- # ings in this Software without prior written authorization from the X Consor- # tium. # # # FSF changes to this file are in the public domain. # # Calling this script install-sh is preferred over install.sh, to prevent # `make' implicit rules from creating a file called install from it # when there is no Makefile. # # This script is compatible with the BSD install script, but was written # from scratch. nl=' ' IFS=" "" $nl" # set DOITPROG to echo to test this script # Don't use :- since 4.3BSD and earlier shells don't like it. doit=${DOITPROG-} if test -z "$doit"; then doit_exec=exec else doit_exec=$doit fi # Put in absolute file names if you don't have them in your path; # or use environment vars. chgrpprog=${CHGRPPROG-chgrp} chmodprog=${CHMODPROG-chmod} chownprog=${CHOWNPROG-chown} cmpprog=${CMPPROG-cmp} cpprog=${CPPROG-cp} mkdirprog=${MKDIRPROG-mkdir} mvprog=${MVPROG-mv} rmprog=${RMPROG-rm} stripprog=${STRIPPROG-strip} posix_glob='?' initialize_posix_glob=' test "$posix_glob" != "?" || { if (set -f) 2>/dev/null; then posix_glob= else posix_glob=: fi } ' posix_mkdir= # Desired mode of installed file. mode=0755 chgrpcmd= chmodcmd=$chmodprog chowncmd= mvcmd=$mvprog rmcmd="$rmprog -f" stripcmd= src= dst= dir_arg= dst_arg= copy_on_change=false no_target_directory= usage="\ Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE or: $0 [OPTION]... SRCFILES... DIRECTORY or: $0 [OPTION]... -t DIRECTORY SRCFILES... or: $0 [OPTION]... -d DIRECTORIES... In the 1st form, copy SRCFILE to DSTFILE. In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. In the 4th, create DIRECTORIES. Options: --help display this help and exit. --version display version info and exit. -c (ignored) -C install only if different (preserve the last data modification time) -d create directories instead of installing files. -g GROUP $chgrpprog installed files to GROUP. -m MODE $chmodprog installed files to MODE. -o USER $chownprog installed files to USER. -s $stripprog installed files. -t DIRECTORY install into DIRECTORY. -T report an error if DSTFILE is a directory. Environment variables override the default commands: CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG " while test $# -ne 0; do case $1 in -c) ;; -C) copy_on_change=true;; -d) dir_arg=true;; -g) chgrpcmd="$chgrpprog $2" shift;; --help) echo "$usage"; exit $?;; -m) mode=$2 case $mode in *' '* | *' '* | *' '* | *'*'* | *'?'* | *'['*) echo "$0: invalid mode: $mode" >&2 exit 1;; esac shift;; -o) chowncmd="$chownprog $2" shift;; -s) stripcmd=$stripprog;; -t) dst_arg=$2 # Protect names problematic for `test' and other utilities. case $dst_arg in -* | [=\(\)!]) dst_arg=./$dst_arg;; esac shift;; -T) no_target_directory=true;; --version) echo "$0 $scriptversion"; exit $?;; --) shift break;; -*) echo "$0: invalid option: $1" >&2 exit 1;; *) break;; esac shift done if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then # When -d is used, all remaining arguments are directories to create. # When -t is used, the destination is already specified. # Otherwise, the last argument is the destination. Remove it from $@. for arg do if test -n "$dst_arg"; then # $@ is not empty: it contains at least $arg. set fnord "$@" "$dst_arg" shift # fnord fi shift # arg dst_arg=$arg # Protect names problematic for `test' and other utilities. case $dst_arg in -* | [=\(\)!]) dst_arg=./$dst_arg;; esac done fi if test $# -eq 0; then if test -z "$dir_arg"; then echo "$0: no input file specified." >&2 exit 1 fi # It's OK to call `install-sh -d' without argument. # This can happen when creating conditional directories. exit 0 fi if test -z "$dir_arg"; then do_exit='(exit $ret); exit $ret' trap "ret=129; $do_exit" 1 trap "ret=130; $do_exit" 2 trap "ret=141; $do_exit" 13 trap "ret=143; $do_exit" 15 # Set umask so as not to create temps with too-generous modes. # However, 'strip' requires both read and write access to temps. case $mode in # Optimize common cases. *644) cp_umask=133;; *755) cp_umask=22;; *[0-7]) if test -z "$stripcmd"; then u_plus_rw= else u_plus_rw='% 200' fi cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; *) if test -z "$stripcmd"; then u_plus_rw= else u_plus_rw=,u+rw fi cp_umask=$mode$u_plus_rw;; esac fi for src do # Protect names problematic for `test' and other utilities. case $src in -* | [=\(\)!]) src=./$src;; esac if test -n "$dir_arg"; then dst=$src dstdir=$dst test -d "$dstdir" dstdir_status=$? else # Waiting for this to be detected by the "$cpprog $src $dsttmp" command # might cause directories to be created, which would be especially bad # if $src (and thus $dsttmp) contains '*'. if test ! -f "$src" && test ! -d "$src"; then echo "$0: $src does not exist." >&2 exit 1 fi if test -z "$dst_arg"; then echo "$0: no destination specified." >&2 exit 1 fi dst=$dst_arg # If destination is a directory, append the input filename; won't work # if double slashes aren't ignored. if test -d "$dst"; then if test -n "$no_target_directory"; then echo "$0: $dst_arg: Is a directory" >&2 exit 1 fi dstdir=$dst dst=$dstdir/`basename "$src"` dstdir_status=0 else # Prefer dirname, but fall back on a substitute if dirname fails. dstdir=` (dirname "$dst") 2>/dev/null || expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$dst" : 'X\(//\)[^/]' \| \ X"$dst" : 'X\(//\)$' \| \ X"$dst" : 'X\(/\)' \| . 2>/dev/null || echo X"$dst" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q' ` test -d "$dstdir" dstdir_status=$? fi fi obsolete_mkdir_used=false if test $dstdir_status != 0; then case $posix_mkdir in '') # Create intermediate dirs using mode 755 as modified by the umask. # This is like FreeBSD 'install' as of 1997-10-28. umask=`umask` case $stripcmd.$umask in # Optimize common cases. *[2367][2367]) mkdir_umask=$umask;; .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; *[0-7]) mkdir_umask=`expr $umask + 22 \ - $umask % 100 % 40 + $umask % 20 \ - $umask % 10 % 4 + $umask % 2 `;; *) mkdir_umask=$umask,go-w;; esac # With -d, create the new directory with the user-specified mode. # Otherwise, rely on $mkdir_umask. if test -n "$dir_arg"; then mkdir_mode=-m$mode else mkdir_mode= fi posix_mkdir=false case $umask in *[123567][0-7][0-7]) # POSIX mkdir -p sets u+wx bits regardless of umask, which # is incompatible with FreeBSD 'install' when (umask & 300) != 0. ;; *) tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 if (umask $mkdir_umask && exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 then if test -z "$dir_arg" || { # Check for POSIX incompatibilities with -m. # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or # other-writeable bit of parent directory when it shouldn't. # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. ls_ld_tmpdir=`ls -ld "$tmpdir"` case $ls_ld_tmpdir in d????-?r-*) different_mode=700;; d????-?--*) different_mode=755;; *) false;; esac && $mkdirprog -m$different_mode -p -- "$tmpdir" && { ls_ld_tmpdir_1=`ls -ld "$tmpdir"` test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" } } then posix_mkdir=: fi rmdir "$tmpdir/d" "$tmpdir" else # Remove any dirs left behind by ancient mkdir implementations. rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null fi trap '' 0;; esac;; esac if $posix_mkdir && ( umask $mkdir_umask && $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" ) then : else # The umask is ridiculous, or mkdir does not conform to POSIX, # or it failed possibly due to a race condition. Create the # directory the slow way, step by step, checking for races as we go. case $dstdir in /*) prefix='/';; [-=\(\)!]*) prefix='./';; *) prefix='';; esac eval "$initialize_posix_glob" oIFS=$IFS IFS=/ $posix_glob set -f set fnord $dstdir shift $posix_glob set +f IFS=$oIFS prefixes= for d do test X"$d" = X && continue prefix=$prefix$d if test -d "$prefix"; then prefixes= else if $posix_mkdir; then (umask=$mkdir_umask && $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break # Don't fail if two instances are running concurrently. test -d "$prefix" || exit 1 else case $prefix in *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; *) qprefix=$prefix;; esac prefixes="$prefixes '$qprefix'" fi fi prefix=$prefix/ done if test -n "$prefixes"; then # Don't fail if two instances are running concurrently. (umask $mkdir_umask && eval "\$doit_exec \$mkdirprog $prefixes") || test -d "$dstdir" || exit 1 obsolete_mkdir_used=true fi fi fi if test -n "$dir_arg"; then { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 else # Make a couple of temp file names in the proper directory. dsttmp=$dstdir/_inst.$$_ rmtmp=$dstdir/_rm.$$_ # Trap to clean up those temp files at exit. trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 # Copy the file name to the temp name. (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && # and set any options; do chmod last to preserve setuid bits. # # If any of these fail, we abort the whole thing. If we want to # ignore errors from any of these, just make sure not to ignore # errors from the above "$doit $cpprog $src $dsttmp" command. # { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } && { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && # If -C, don't bother to copy if it wouldn't change the file. if $copy_on_change && old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && eval "$initialize_posix_glob" && $posix_glob set -f && set X $old && old=:$2:$4:$5:$6 && set X $new && new=:$2:$4:$5:$6 && $posix_glob set +f && test "$old" = "$new" && $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 then rm -f "$dsttmp" else # Rename the file to the real destination. $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null || # The rename failed, perhaps because mv can't rename something else # to itself, or perhaps because mv is so ancient that it does not # support -f. { # Now remove or move aside any old file at destination location. # We try this two ways since rm can't unlink itself on some # systems and the destination file might be busy for other # reasons. In this case, the final cleanup might fail but the new # file should still install successfully. { test ! -f "$dst" || $doit $rmcmd -f "$dst" 2>/dev/null || { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } } || { echo "$0: cannot unlink or rename $dst" >&2 (exit 1); exit 1 } } && # Now rename the file to the real destination. $doit $mvcmd "$dsttmp" "$dst" } fi || exit 1 trap '' 0 fi done # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-time-zone: "UTC" # time-stamp-end: "; # UTC" # End: ecm-6.4.4/ecm.c0000644023561000001540000012217012106741273010131 00000000000000/* Elliptic Curve Method: toplevel and stage 1 routines. Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Paul Zimmermann, Alexander Kruppa, Cyril Bouvier, David Cleaver. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include "ecm-impl.h" #include #ifdef HAVE_LIMITS_H # include #else # define ULONG_MAX __GMP_ULONG_MAX #endif /* the following factor takes into account the smaller expected smoothness for Montgomery's curves (batch mode) with respect to Suyama's curves */ #if GMP_NUMB_BITS >= 64 /* For GMP_NUMB_BITS >= 64 we use A=4d-2 with d a square (see main.c). In that case, Cyril Bouvier and Razvan Barbulescu have shown that the average expected torsion is that of a generic Suyama curve multiplied by the constant 2^(1/3)/(3*3^(1/128)) */ #define BATCH1_EXTRA_SMOOTHNESS 0.416384512396064 #else /* For A=4d-2 for d a random integer, the average expected torsion is that of a generic Suyama curve multiplied by the constant 1/(3*3^(1/128)) */ #define BATCH1_EXTRA_SMOOTHNESS 0.330484606500389 #endif /****************************************************************************** * * * Elliptic Curve Method * * * ******************************************************************************/ void duplicate (mpres_t, mpres_t, mpres_t, mpres_t, mpmod_t, mpres_t, mpres_t, mpres_t, mpres_t) ATTRIBUTE_HOT; void add3 (mpres_t, mpres_t, mpres_t, mpres_t, mpres_t, mpres_t, mpres_t, mpres_t, mpmod_t, mpres_t, mpres_t, mpres_t) ATTRIBUTE_HOT; #define mpz_mulmod5(r,s1,s2,m,t) { mpz_mul(t,s1,s2); mpz_mod(r, t, m); } /* Computes curve parameter A and a starting point (x:1) from a given sigma value. If a factor of n was found during the process, returns ECM_FACTOR_FOUND_STEP1 (and factor in f), returns ECM_NO_FACTOR_FOUND otherwise. */ static int get_curve_from_sigma (mpz_t f, mpres_t A, mpres_t x, mpz_t sigma, mpmod_t n) { mpres_t t, u, v, b, z; MEMORY_TAG; mpres_init (t, n); MEMORY_TAG; mpres_init (u, n); MEMORY_TAG; mpres_init (v, n); MEMORY_TAG; mpres_init (b, n); MEMORY_TAG; mpres_init (z, n); MEMORY_UNTAG; mpres_set_z (u, sigma, n); mpres_mul_ui (v, u, 4, n); /* v = (4*sigma) mod n */ mpres_sqr (t, u, n); mpres_sub_ui (u, t, 5, n); /* u = (sigma^2-5) mod n */ mpres_sqr (t, u, n); mpres_mul (x, t, u, n); /* x = (u^3) mod n */ mpres_sqr (t, v, n); mpres_mul (z, t, v, n); /* z = (v^3) mod n */ mpres_mul (t, x, v, n); mpres_mul_ui (b, t, 4, n); /* b = (4*x*v) mod n */ mpres_mul_ui (t, u, 3, n); mpres_sub (u, v, u, n); /* u' = v-u */ mpres_add (v, t, v, n); /* v' = (3*u+v) mod n */ mpres_sqr (t, u, n); mpres_mul (u, t, u, n); /* u'' = ((v-u)^3) mod n */ mpres_mul (A, u, v, n); /* a = (u'' * v') mod n = ((v-u)^3 * (3*u+v)) mod n */ /* Normalize b and z to 1 */ mpres_mul (v, b, z, n); if (!mpres_invert (u, v, n)) /* u = (b*z)^(-1) (mod n) */ { mpres_gcd (f, v, n); mpres_clear (t, n); mpres_clear (u, n); mpres_clear (v, n); mpres_clear (b, n); mpres_clear (z, n); return ECM_FACTOR_FOUND_STEP1; } mpres_mul (v, u, b, n); /* v = z^(-1) (mod n) */ mpres_mul (x, x, v, n); /* x = x * z^(-1) */ mpres_mul (v, u, z, n); /* v = b^(-1) (mod n) */ mpres_mul (t, A, v, n); mpres_sub_ui (A, t, 2, n); mpres_clear (t, n); mpres_clear (u, n); mpres_clear (v, n); mpres_clear (b, n); mpres_clear (z, n); return ECM_NO_FACTOR_FOUND; } /* switch from Montgomery's form g*y^2 = x^3 + a*x^2 + x to Weierstrass' form Y^2 = X^3 + A*X + B by change of variables x -> g*X-a/3, y -> g*Y. We have A = (3-a^2)/(3g^2), X = (3x+a)/(3g), Y = y/g. If a factor is found during the modular inverse, returns ECM_FACTOR_FOUND_STEP1 and the factor in f, otherwise returns ECM_NO_FACTOR_FOUND. */ static int montgomery_to_weierstrass (mpz_t f, mpres_t x, mpres_t y, mpres_t A, mpmod_t n) { mpres_t g; MEMORY_TAG; mpres_init (g, n); MEMORY_UNTAG; mpres_add (g, x, A, n); mpres_mul (g, g, x, n); mpres_add_ui (g, g, 1, n); mpres_mul (g, g, x, n); /* g = x^3+a*x^2+x (y=1) */ mpres_mul_ui (y, g, 3, n); mpres_mul (y, y, g, n); /* y = 3g^2 */ if (!mpres_invert (y, y, n)) /* y = 1/(3g^2) temporarily */ { mpres_gcd (f, y, n); mpres_clear (g, n); return ECM_FACTOR_FOUND_STEP1; } /* update x */ mpres_mul_ui (x, x, 3, n); /* 3x */ mpres_add (x, x, A, n); /* 3x+a */ mpres_mul (x, x, g, n); /* (3x+a)*g */ mpres_mul (x, x, y, n); /* (3x+a)/(3g) */ /* update A */ mpres_sqr (A, A, n); /* a^2 */ mpres_sub_ui (A, A, 3, n); mpres_neg (A, A, n); /* 3-a^2 */ mpres_mul (A, A, y, n); /* (3-a^2)/(3g^2) */ /* update y */ mpres_mul_ui (g, g, 3, n); /* 3g */ mpres_mul (y, y, g, n); /* (3g)/(3g^2) = 1/g */ mpres_clear (g, n); return ECM_NO_FACTOR_FOUND; } /* adds Q=(x2:z2) and R=(x1:z1) and puts the result in (x3:z3), using 6 muls (4 muls and 2 squares), and 6 add/sub. One assumes that Q-R=P or R-Q=P where P=(x:z). - n : number to factor - u, v, w : auxiliary variables Modifies: x3, z3, u, v, w. (x3,z3) may be identical to (x2,z2) and to (x,z) */ void add3 (mpres_t x3, mpres_t z3, mpres_t x2, mpres_t z2, mpres_t x1, mpres_t z1, mpres_t x, mpres_t z, mpmod_t n, mpres_t u, mpres_t v, mpres_t w) { mpres_sub (u, x2, z2, n); mpres_add (v, x1, z1, n); /* u = x2-z2, v = x1+z1 */ mpres_mul (u, u, v, n); /* u = (x2-z2)*(x1+z1) */ mpres_add (w, x2, z2, n); mpres_sub (v, x1, z1, n); /* w = x2+z2, v = x1-z1 */ mpres_mul (v, w, v, n); /* v = (x2+z2)*(x1-z1) */ mpres_add (w, u, v, n); /* w = 2*(x1*x2-z1*z2) */ mpres_sub (v, u, v, n); /* v = 2*(x2*z1-x1*z2) */ mpres_sqr (w, w, n); /* w = 4*(x1*x2-z1*z2)^2 */ mpres_sqr (v, v, n); /* v = 4*(x2*z1-x1*z2)^2 */ if (x == x3) /* same variable: in-place variant */ { /* x3 <- w * z mod n z3 <- x * v mod n */ mpres_mul (z3, w, z, n); mpres_mul (x3, x, v, n); mpres_swap (x3, z3, n); } else { mpres_mul (x3, w, z, n); /* x3 = 4*z*(x1*x2-z1*z2)^2 mod n */ mpres_mul (z3, x, v, n); /* z3 = 4*x*(x2*z1-x1*z2)^2 mod n */ } /* mul += 6; */ } /* computes 2P=(x2:z2) from P=(x1:z1), with 5 muls (3 muls and 2 squares) and 4 add/sub. - n : number to factor - b : (a+2)/4 mod n - t, u, v, w : auxiliary variables */ void duplicate (mpres_t x2, mpres_t z2, mpres_t x1, mpres_t z1, mpmod_t n, mpres_t b, mpres_t u, mpres_t v, mpres_t w) { mpres_add (u, x1, z1, n); mpres_sqr (u, u, n); /* u = (x1+z1)^2 mod n */ mpres_sub (v, x1, z1, n); mpres_sqr (v, v, n); /* v = (x1-z1)^2 mod n */ mpres_mul (x2, u, v, n); /* x2 = u*v = (x1^2 - z1^2)^2 mod n */ mpres_sub (w, u, v, n); /* w = u-v = 4*x1*z1 */ mpres_mul (u, w, b, n); /* u = w*b = ((A+2)/4*(4*x1*z1)) mod n */ mpres_add (u, u, v, n); /* u = (x1-z1)^2+(A+2)/4*(4*x1*z1) */ mpres_mul (z2, w, u, n); /* z2 = ((4*x1*z1)*((x1-z1)^2+(A+2)/4*(4*x1*z1))) mod n */ } /* multiply P=(x:z) by e and puts the result in (x:z). */ void ecm_mul (mpres_t x, mpres_t z, mpz_t e, mpmod_t n, mpres_t b) { size_t l; int negated = 0; mpres_t x0, z0, x1, z1, u, v, w; /* In Montgomery coordinates, the point at infinity is (0::0) */ if (mpz_sgn (e) == 0) { mpz_set_ui (x, 0); mpz_set_ui (z, 0); return; } /* The negative of a point (x:y:z) is (x:-y:u). Since we do not compute y, e*(x::z) == (-e)*(x::z). */ if (mpz_sgn (e) < 0) { negated = 1; mpz_neg (e, e); } if (mpz_cmp_ui (e, 1) == 0) goto ecm_mul_end; MEMORY_TAG; mpres_init (x0, n); MEMORY_TAG; mpres_init (z0, n); MEMORY_TAG; mpres_init (x1, n); MEMORY_TAG; mpres_init (z1, n); MEMORY_TAG; mpres_init (u, n); MEMORY_TAG; mpres_init (v, n); MEMORY_TAG; mpres_init (w, n); MEMORY_UNTAG; l = mpz_sizeinbase (e, 2) - 1; /* l >= 1 */ mpres_set (x0, x, n); mpres_set (z0, z, n); duplicate (x1, z1, x0, z0, n, b, u, v, w); /* invariant: (P1,P0) = ((k+1)P, kP) where k = floor(e/2^l) */ while (l-- > 0) { if (mpz_tstbit (e, l)) /* k, k+1 -> 2k+1, 2k+2 */ { add3 (x0, z0, x0, z0, x1, z1, x, z, n, u, v, w); /* 2k+1 */ duplicate (x1, z1, x1, z1, n, b, u, v, w); /* 2k+2 */ } else /* k, k+1 -> 2k, 2k+1 */ { add3 (x1, z1, x1, z1, x0, z0, x, z, n, u, v, w); /* 2k+1 */ duplicate (x0, z0, x0, z0, n, b, u, v, w); /* 2k */ } } mpres_set (x, x0, n); mpres_set (z, z0, n); mpres_clear (x0, n); mpres_clear (z0, n); mpres_clear (x1, n); mpres_clear (z1, n); mpres_clear (u, n); mpres_clear (v, n); mpres_clear (w, n); ecm_mul_end: /* Undo negation to avoid changing the caller's e value */ if (negated) mpz_neg (e, e); } #define ADD 6.0 /* number of multiplications in an addition */ #define DUP 5.0 /* number of multiplications in a duplicate */ /* returns the number of modular multiplications for computing V_n from V_r * V_{n-r} - V_{n-2r}. ADD is the cost of an addition DUP is the cost of a duplicate */ static double lucas_cost (ecm_uint n, double v) { ecm_uint d, e, r; double c; /* cost */ d = n; r = (ecm_uint) ((double) d * v + 0.5); if (r >= n) return (ADD * (double) n); d = n - r; e = 2 * r - n; c = DUP + ADD; /* initial duplicate and final addition */ while (d != e) { if (d < e) { r = d; d = e; e = r; } if (d - e <= e / 4 && ((d + e) % 3) == 0) { /* condition 1 */ d = (2 * d - e) / 3; e = (e - d) / 2; c += 3.0 * ADD; /* 3 additions */ } else if (d - e <= e / 4 && (d - e) % 6 == 0) { /* condition 2 */ d = (d - e) / 2; c += ADD + DUP; /* one addition, one duplicate */ } else if ((d + 3) / 4 <= e) { /* condition 3 */ d -= e; c += ADD; /* one addition */ } else if ((d + e) % 2 == 0) { /* condition 4 */ d = (d - e) / 2; c += ADD + DUP; /* one addition, one duplicate */ } /* now d+e is odd */ else if (d % 2 == 0) { /* condition 5 */ d /= 2; c += ADD + DUP; /* one addition, one duplicate */ } /* now d is odd and e is even */ else if (d % 3 == 0) { /* condition 6 */ d = d / 3 - e; c += 3.0 * ADD + DUP; /* three additions, one duplicate */ } else if ((d + e) % 3 == 0) { /* condition 7 */ d = (d - 2 * e) / 3; c += 3.0 * ADD + DUP; /* three additions, one duplicate */ } else if ((d - e) % 3 == 0) { /* condition 8 */ d = (d - e) / 3; c += 3.0 * ADD + DUP; /* three additions, one duplicate */ } else /* necessarily e is even: catches all cases */ { /* condition 9 */ e /= 2; c += ADD + DUP; /* one addition, one duplicate */ } } return c; } /* computes kP from P=(xA:zA) and puts the result in (xA:zA). Assumes k>2. WARNING! The calls to add3() assume that the two input points are distinct, which is not neccessarily satisfied. The result can be that in rare cases the point at infinity (z==0) results when it shouldn't. A test case is echo 33554520197234177 | ./ecm -sigma 2046841451 373 1 which finds the prime even though it shouldn't (23^2=529 divides order). This is not a problem for ECM since at worst we'll find a factor we shouldn't have found. For other purposes (i.e. primality proving) this would have to be fixed first. */ static void prac (mpres_t xA, mpres_t zA, ecm_uint k, mpmod_t n, mpres_t b, mpres_t u, mpres_t v, mpres_t w, mpres_t xB, mpres_t zB, mpres_t xC, mpres_t zC, mpres_t xT, mpres_t zT, mpres_t xT2, mpres_t zT2) { ecm_uint d, e, r, i = 0, nv; double c, cmin; __mpz_struct *tmp; #define NV 10 /* 1/val[0] = the golden ratio (1+sqrt(5))/2, and 1/val[i] for i>0 is the real number whose continued fraction expansion is all 1s except for a 2 in i+1-st place */ static double val[NV] = { 0.61803398874989485, 0.72360679774997897, 0.58017872829546410, 0.63283980608870629, 0.61242994950949500, 0.62018198080741576, 0.61721461653440386, 0.61834711965622806, 0.61791440652881789, 0.61807966846989581}; /* for small n, it makes no sense to try 10 different Lucas chains */ nv = mpz_size ((mpz_ptr) n); if (nv > NV) nv = NV; if (nv > 1) { /* chooses the best value of v */ for (d = 0, cmin = ADD * (double) k; d < nv; d++) { c = lucas_cost (k, val[d]); if (c < cmin) { cmin = c; i = d; } } } d = k; r = (ecm_uint) ((double) d * val[i] + 0.5); /* first iteration always begins by Condition 3, then a swap */ d = k - r; e = 2 * r - k; mpres_set (xB, xA, n); mpres_set (zB, zA, n); /* B=A */ mpres_set (xC, xA, n); mpres_set (zC, zA, n); /* C=A */ duplicate (xA, zA, xA, zA, n, b, u, v, w); /* A = 2*A */ while (d != e) { if (d < e) { r = d; d = e; e = r; mpres_swap (xA, xB, n); mpres_swap (zA, zB, n); } /* do the first line of Table 4 whose condition qualifies */ if (d - e <= e / 4 && ((d + e) % 3) == 0) { /* condition 1 */ d = (2 * d - e) / 3; e = (e - d) / 2; add3 (xT, zT, xA, zA, xB, zB, xC, zC, n, u, v, w); /* T = f(A,B,C) */ add3 (xT2, zT2, xT, zT, xA, zA, xB, zB, n, u, v, w); /* T2 = f(T,A,B) */ add3 (xB, zB, xB, zB, xT, zT, xA, zA, n, u, v, w); /* B = f(B,T,A) */ mpres_swap (xA, xT2, n); mpres_swap (zA, zT2, n); /* swap A and T2 */ } else if (d - e <= e / 4 && (d - e) % 6 == 0) { /* condition 2 */ d = (d - e) / 2; add3 (xB, zB, xA, zA, xB, zB, xC, zC, n, u, v, w); /* B = f(A,B,C) */ duplicate (xA, zA, xA, zA, n, b, u, v, w); /* A = 2*A */ } else if ((d + 3) / 4 <= e) { /* condition 3 */ d -= e; add3 (xT, zT, xB, zB, xA, zA, xC, zC, n, u, v, w); /* T = f(B,A,C) */ /* circular permutation (B,T,C) */ tmp = xB; xB = xT; xT = xC; xC = tmp; tmp = zB; zB = zT; zT = zC; zC = tmp; } else if ((d + e) % 2 == 0) { /* condition 4 */ d = (d - e) / 2; add3 (xB, zB, xB, zB, xA, zA, xC, zC, n, u, v, w); /* B = f(B,A,C) */ duplicate (xA, zA, xA, zA, n, b, u, v, w); /* A = 2*A */ } /* now d+e is odd */ else if (d % 2 == 0) { /* condition 5 */ d /= 2; add3 (xC, zC, xC, zC, xA, zA, xB, zB, n, u, v, w); /* C = f(C,A,B) */ duplicate (xA, zA, xA, zA, n, b, u, v, w); /* A = 2*A */ } /* now d is odd, e is even */ else if (d % 3 == 0) { /* condition 6 */ d = d / 3 - e; duplicate (xT, zT, xA, zA, n, b, u, v, w); /* T = 2*A */ add3 (xT2, zT2, xA, zA, xB, zB, xC, zC, n, u, v, w); /* T2 = f(A,B,C) */ add3 (xA, zA, xT, zT, xA, zA, xA, zA, n, u, v, w); /* A = f(T,A,A) */ add3 (xT, zT, xT, zT, xT2, zT2, xC, zC, n, u, v, w); /* T = f(T,T2,C) */ /* circular permutation (C,B,T) */ tmp = xC; xC = xB; xB = xT; xT = tmp; tmp = zC; zC = zB; zB = zT; zT = tmp; } else if ((d + e) % 3 == 0) { /* condition 7 */ d = (d - 2 * e) / 3; add3 (xT, zT, xA, zA, xB, zB, xC, zC, n, u, v, w); /* T = f(A,B,C) */ add3 (xB, zB, xT, zT, xA, zA, xB, zB, n, u, v, w); /* B = f(T,A,B) */ duplicate (xT, zT, xA, zA, n, b, u, v, w); add3 (xA, zA, xA, zA, xT, zT, xA, zA, n, u, v, w); /* A = 3*A */ } else if ((d - e) % 3 == 0) { /* condition 8 */ d = (d - e) / 3; add3 (xT, zT, xA, zA, xB, zB, xC, zC, n, u, v, w); /* T = f(A,B,C) */ add3 (xC, zC, xC, zC, xA, zA, xB, zB, n, u, v, w); /* C = f(A,C,B) */ mpres_swap (xB, xT, n); mpres_swap (zB, zT, n); /* swap B and T */ duplicate (xT, zT, xA, zA, n, b, u, v, w); add3 (xA, zA, xA, zA, xT, zT, xA, zA, n, u, v, w); /* A = 3*A */ } else /* necessarily e is even here */ { /* condition 9 */ e /= 2; add3 (xC, zC, xC, zC, xB, zB, xA, zA, n, u, v, w); /* C = f(C,B,A) */ duplicate (xB, zB, xB, zB, n, b, u, v, w); /* B = 2*B */ } } add3 (xA, zA, xA, zA, xB, zB, xC, zC, n, u, v, w); ASSERT(d == 1); } /* Input: x is initial point A is curve parameter in Montgomery's form: g*y^2*z = x^3 + a*x^2*z + x*z^2 n is the number to factor B1 is the stage 1 bound Output: If a factor is found, it is returned in x. Otherwise, x contains the x-coordinate of the point computed in stage 1 (with z coordinate normalized to 1). B1done is set to B1 if stage 1 completed normally, or to the largest prime processed if interrupted, but never to a smaller value than B1done was upon function entry. Return value: ECM_FACTOR_FOUND_STEP1 if a factor, otherwise ECM_NO_FACTOR_FOUND */ static int ecm_stage1 (mpz_t f, mpres_t x, mpres_t A, mpmod_t n, double B1, double *B1done, mpz_t go, int (*stop_asap)(void), char *chkfilename) { mpres_t b, z, u, v, w, xB, zB, xC, zC, xT, zT, xT2, zT2; double p, r, last_chkpnt_p; int ret = ECM_NO_FACTOR_FOUND; long last_chkpnt_time; MEMORY_TAG; mpres_init (b, n); MEMORY_TAG; mpres_init (z, n); MEMORY_TAG; mpres_init (u, n); MEMORY_TAG; mpres_init (v, n); MEMORY_TAG; mpres_init (w, n); MEMORY_TAG; mpres_init (xB, n); MEMORY_TAG; mpres_init (zB, n); MEMORY_TAG; mpres_init (xC, n); MEMORY_TAG; mpres_init (zC, n); MEMORY_TAG; mpres_init (xT, n); MEMORY_TAG; mpres_init (zT, n); MEMORY_TAG; mpres_init (xT2, n); MEMORY_TAG; mpres_init (zT2, n); MEMORY_UNTAG; last_chkpnt_time = cputime (); mpres_set_ui (z, 1, n); mpres_add_ui (b, A, 2, n); mpres_div_2exp (b, b, 2, n); /* b == (A0+2)*B/4, where B=2^(k*GMP_NUMB_BITS) for MODMULN or REDC, B=1 otherwise */ /* preload group order */ if (go != NULL) ecm_mul (x, z, go, n, b); /* prac() wants multiplicands > 2 */ for (r = 2.0; r <= B1; r *= 2.0) if (r > *B1done) duplicate (x, z, x, z, n, b, u, v, w); /* We'll do 3 manually, too (that's what ecm4 did..) */ for (r = 3.0; r <= B1; r *= 3.0) if (r > *B1done) { duplicate (xB, zB, x, z, n, b, u, v, w); add3 (x, z, x, z, xB, zB, x, z, n, u, v, w); } last_chkpnt_p = 3.; p = getprime (); /* Puts 3.0 into p. Next call gives 5.0 */ for (p = getprime (); p <= B1; p = getprime ()) { for (r = p; r <= B1; r *= p) if (r > *B1done) prac (x, z, (ecm_uint) p, n, b, u, v, w, xB, zB, xC, zC, xT, zT, xT2, zT2); if (mpres_is_zero (z, n)) { outputf (OUTPUT_VERBOSE, "Reached point at infinity, %.0f divides " "group order\n", p); break; } if (stop_asap != NULL && (*stop_asap) ()) { outputf (OUTPUT_NORMAL, "Interrupted at prime %.0f\n", p); break; } if (chkfilename != NULL && p > last_chkpnt_p + 10000. && elltime (last_chkpnt_time, cputime ()) > CHKPNT_PERIOD) { writechkfile (chkfilename, ECM_ECM, MAX(p, *B1done), n, A, x, z); last_chkpnt_p = p; last_chkpnt_time = cputime (); } } /* If stage 1 finished normally, p is the smallest prime >B1 here. In that case, set to B1 */ if (p > B1) p = B1; if (p > *B1done) *B1done = p; if (chkfilename != NULL) writechkfile (chkfilename, ECM_ECM, *B1done, n, A, x, z); getprime_clear (); /* free the prime tables, and reinitialize */ if (!mpres_invert (u, z, n)) /* Factor found? */ { mpres_gcd (f, z, n); ret = ECM_FACTOR_FOUND_STEP1; } mpres_mul (x, x, u, n); mpres_clear (zT2, n); mpres_clear (xT2, n); mpres_clear (zT, n); mpres_clear (xT, n); mpres_clear (zC, n); mpres_clear (xC, n); mpres_clear (zB, n); mpres_clear (xB, n); mpres_clear (w, n); mpres_clear (v, n); mpres_clear (u, n); mpres_clear (z, n); mpres_clear (b, n); return ret; } /* choose "optimal" S according to step 2 range B2 */ int choose_S (mpz_t B2len) { if (mpz_cmp_d (B2len, 1e7) < 0) return 1; /* x^1 */ else if (mpz_cmp_d (B2len, 1e8) < 0) return 2; /* x^2 */ else if (mpz_cmp_d (B2len, 1e9) < 0) return -3; /* Dickson(3) */ else if (mpz_cmp_d (B2len, 1e10) < 0) return -6; /* Dickson(6) */ else if (mpz_cmp_d (B2len, 3e11) < 0) return -12; /* Dickson(12) */ else return -30; /* Dickson(30) */ } #define DIGITS_START 35 #define DIGITS_INCR 5 #define DIGITS_END 80 static void print_expcurves (double B1, const mpz_t B2, unsigned long dF, unsigned long k, int S, int batch) { double prob; int i, j; char sep, outs[128]; for (i = DIGITS_START, j = 0; i <= DIGITS_END; i += DIGITS_INCR, j += 3) sprintf (outs + j, "%2u%c", i, (i < DIGITS_END) ? '\t' : '\n'); outs[j] = '\0'; outputf (OUTPUT_VERBOSE, "Expected number of curves to find a factor " "of n digits:\n%s", outs); for (i = DIGITS_START; i <= DIGITS_END; i += DIGITS_INCR) { sep = (i < DIGITS_END) ? '\t' : '\n'; prob = ecmprob (B1, mpz_get_d (B2), /* in batch mode, the extra smoothness is smaller */ pow (10., i - .5) / ((batch == 1) ? BATCH1_EXTRA_SMOOTHNESS : 1.0), (double) dF * dF * k, S); if (prob > 1. / 10000000) outputf (OUTPUT_VERBOSE, "%.0f%c", floor (1. / prob + .5), sep); else if (prob > 0.) outputf (OUTPUT_VERBOSE, "%.2g%c", floor (1. / prob + .5), sep); else outputf (OUTPUT_VERBOSE, "Inf%c", sep); } } static void print_exptime (double B1, const mpz_t B2, unsigned long dF, unsigned long k, int S, double tottime, int batch) { double prob, exptime; int i, j; char sep, outs[128]; for (i = DIGITS_START, j = 0; i <= DIGITS_END; i += DIGITS_INCR, j += 3) sprintf (outs + j, "%2u%c", i, (i < DIGITS_END) ? '\t' : '\n'); outs[j] = '\0'; outputf (OUTPUT_VERBOSE, "Expected time to find a factor of n digits:\n%s", outs); for (i = DIGITS_START; i <= DIGITS_END; i += DIGITS_INCR) { sep = (i < DIGITS_END) ? '\t' : '\n'; prob = ecmprob (B1, mpz_get_d (B2), /* in batch mode, the extra smoothness is smaller */ pow (10., i - .5) / ((batch == 1) ? BATCH1_EXTRA_SMOOTHNESS : 1.0), (double) dF * dF * k, S); exptime = (prob > 0.) ? tottime / prob : HUGE_VAL; outputf (OUTPUT_TRACE, "Digits: %d, Total time: %.0f, probability: " "%g, expected time: %.0f\n", i, tottime, prob, exptime); if (exptime < 1000.) outputf (OUTPUT_VERBOSE, "%.0fms%c", exptime, sep); else if (exptime < 60000.) /* One minute */ outputf (OUTPUT_VERBOSE, "%.2fs%c", exptime / 1000., sep); else if (exptime < 3600000.) /* One hour */ outputf (OUTPUT_VERBOSE, "%.2fm%c", exptime / 60000., sep); else if (exptime < 86400000.) /* One day */ outputf (OUTPUT_VERBOSE, "%.2fh%c", exptime / 3600000., sep); else if (exptime < 31536000000.) /* One year */ outputf (OUTPUT_VERBOSE, "%.2fd%c", exptime / 86400000., sep); else if (exptime < 31536000000000.) /* One thousand years */ outputf (OUTPUT_VERBOSE, "%.2fy%c", exptime / 31536000000., sep); else if (exptime < 31536000000000000.) /* One million years */ outputf (OUTPUT_VERBOSE, "%.0fy%c", exptime / 31536000000., sep); else if (prob > 0.) outputf (OUTPUT_VERBOSE, "%.1gy%c", exptime / 31536000000., sep); else outputf (OUTPUT_VERBOSE, "Inf%c", sep); } } /* go should be NULL for P+1, and P-1, it contains the y coordinate for the Weierstrass form for ECM (when sigma_is_A = -1). */ void print_B1_B2_poly (int verbosity, int method, double B1, double B1done, mpz_t B2min_param, mpz_t B2min, mpz_t B2, int S, mpz_t x0, int sigma_is_A, mpz_t go) { ASSERT ((method == ECM_ECM) || (go == NULL)); ASSERT ((-1 <= sigma_is_A) && (sigma_is_A <= 1)); if (test_verbose (verbosity)) { outputf (verbosity, "Using "); if (ECM_IS_DEFAULT_B1_DONE(B1done)) outputf (verbosity, "B1=%1.0f, ", B1); else outputf (verbosity, "B1=%1.0f-%1.0f, ", B1done, B1); if (mpz_sgn (B2min_param) < 0) outputf (verbosity, "B2=%Zd", B2); else outputf (verbosity, "B2=%Zd-%Zd", B2min, B2); if (S > 0) outputf (verbosity, ", polynomial x^%u", S); else if (S < 0) outputf (verbosity, ", polynomial Dickson(%u)", -S); /* don't print in resume case, since x0 is saved in resume file */ if (method == ECM_ECM) { if (sigma_is_A == 1) outputf (verbosity, ", A=%Zd", x0); else if (sigma_is_A == 0) outputf (verbosity, ", sigma=%Zd", x0); else /* sigma_is_A = -1: curve was given in Weierstrass form */ outputf (verbosity, ", Weierstrass(A=%Zd,y=Zd)", x0, go); } else if (ECM_IS_DEFAULT_B1_DONE(B1done)) outputf (verbosity, ", x0=%Zd", x0); outputf (verbosity, "\n"); } } /* Input: x is starting point or zero sigma is sigma value (if x is set to zero) or A parameter (if x is non-zero) of curve n is the number to factor go is the initial group order to preload B1, B2 are the stage 1/stage 2 bounds, respectively B2min the lower bound for stage 2 B2scale is the stage 2 scale factor k is the number of blocks to do in stage 2 S is the degree of the Suyama-Brent extension for stage 2 verbose is verbosity level: 0 no output, 1 normal output, 2 diagnostic output. sigma_is_a: If true, the sigma parameter contains the curve's A value Output: f is the factor found. Return value: ECM_FACTOR_FOUND_STEPn if a factor was found, ECM_NO_FACTOR_FOUND if no factor was found, ECM_ERROR in case of error. */ int ecm (mpz_t f, mpz_t x, mpz_t sigma, mpz_t n, mpz_t go, double *B1done, double B1, mpz_t B2min_parm, mpz_t B2_parm, double B2scale, unsigned long k, const int S, int verbose, int repr, int nobase2step2, int use_ntt, int sigma_is_A, FILE *os, FILE* es, char *chkfilename, char *TreeFilename, double maxmem, double stage1time, gmp_randstate_t rng, int (*stop_asap)(void), int batch, mpz_t batch_s, ATTRIBUTE_UNUSED double gw_k, ATTRIBUTE_UNUSED unsigned long gw_b, ATTRIBUTE_UNUSED unsigned long gw_n, ATTRIBUTE_UNUSED signed long gw_c) { int youpi = ECM_NO_FACTOR_FOUND; int base2 = 0; /* If n is of form 2^n[+-]1, set base to [+-]n */ int Fermat = 0; /* If base2 > 0 is a power of 2, set Fermat to base2 */ int po2 = 0; /* Whether we should use power-of-2 poly degree */ long st; mpmod_t modulus; curve P; mpz_t B2min, B2; /* Local B2, B2min to avoid changing caller's values */ unsigned long dF; root_params_t root_params; /* 1: sigma contains A from Montgomery form By^2 = x^3 + Ax^2 + x 0: sigma contains 'sigma' from Suyama's parametrization -1: sigma contains A from Weierstrass form y^2 = x^3 + Ax + B, and go contains B */ ASSERT((-1 <= sigma_is_A) && (sigma_is_A <= 1)); set_verbose (verbose); ECM_STDOUT = (os == NULL) ? stdout : os; ECM_STDERR = (es == NULL) ? stdout : es; #ifdef MPRESN_NO_ADJUSTMENT /* When no adjustment is made in mpresn_ functions, N should be smaller than B^n/16 */ if (mpz_sizeinbase (n, 2) > mpz_size (n) * GMP_NUMB_BITS - 4) { outputf (OUTPUT_ERROR, "Error, N should be smaller than B^n/16\n"); return ECM_ERROR; } #endif /* In batch mode, we force MODMULN */ if (batch) repr = ECM_MOD_MODMULN; /* if n is even, return 2 */ if (mpz_divisible_2exp_p (n, 1)) { mpz_set_ui (f, 2); return ECM_FACTOR_FOUND_STEP1; } /* now n is odd */ /* check that B1 is not too large */ if (B1 > (double) ECM_UINT_MAX) { outputf (OUTPUT_ERROR, "Error, maximal step 1 bound for ECM is %lu.\n", ECM_UINT_MAX); return ECM_ERROR; } st = cputime (); if (mpmod_init (modulus, n, repr) != 0) return ECM_ERROR; /* See what kind of number we have as that may influence optimal parameter selection. Test for base 2 number. Note: this was already done by mpmod_init. */ if (modulus->repr == ECM_MOD_BASE2) base2 = modulus->bits; /* For a Fermat number (base2 a positive power of 2) */ for (Fermat = base2; Fermat > 0 && (Fermat & 1) == 0; Fermat >>= 1); if (Fermat == 1) { Fermat = base2; po2 = 1; } else Fermat = 0; MEMORY_TAG; mpres_init (P.x, modulus); MEMORY_TAG; mpres_init (P.y, modulus); MEMORY_TAG; mpres_init (P.A, modulus); mpres_set_z (P.x, x, modulus); mpres_set_ui (P.y, 1, modulus); MEMORY_TAG; mpz_init_set (B2min, B2min_parm); MEMORY_TAG; mpz_init_set (B2, B2_parm); MEMORY_TAG; mpz_init (root_params.i0); MEMORY_UNTAG; /* set second stage bound B2: when using polynomial multiplication of complexity n^alpha, stage 2 has complexity about B2^(alpha/2), and we want stage 2 to take about half of stage 1, thus we choose B2 = (c*B1)^(2/alpha). Experimentally, c=1/4 seems to work well. For Toom-Cook 3, this gives alpha=log(5)/log(3), and B2 ~ (c*B1)^1.365. For Toom-Cook 4, this gives alpha=log(7)/log(4), and B2 ~ (c*B1)^1.424. */ /* We take the cost of P+1 stage 1 to be about twice that of P-1. Since nai"ve P+1 and ECM cost respectively 2 and 11 multiplies per addition and duplicate, and both are optimized with PRAC, we can assume the ratio remains about 11/2. */ /* Also scale B2 by what the user said (or by the default scaling of 1.0) */ if (ECM_IS_DEFAULT_B2(B2)) mpz_set_d (B2, B2scale * pow (ECM_COST * B1, DEFAULT_B2_EXPONENT)); /* set B2min */ if (mpz_sgn (B2min) < 0) mpz_set_d (B2min, B1); /* Let bestD determine parameters for root generation and the effective B2 */ if (use_ntt) po2 = 1; root_params.d2 = 0; /* Enable automatic choice of d2 */ if (bestD (&root_params, &k, &dF, B2min, B2, po2, use_ntt, maxmem, (TreeFilename != NULL), modulus) == ECM_ERROR) { youpi = ECM_ERROR; goto end_of_ecm; } /* Set default degree for Brent-Suyama extension */ /* We try to keep the time used by the Brent-Suyama extension at about 10% of the stage 2 time */ /* Degree S Dickson polys and x^S are equally fast for ECM, so we go for the better Dickson polys whenever possible. For S == 1, 2, they behave identically. */ root_params.S = S; if (root_params.S == ECM_DEFAULT_S) { if (Fermat > 0) { /* For Fermat numbers, default is 1 (no Brent-Suyama) */ root_params.S = 1; } else { mpz_t t; MEMORY_TAG; mpz_init (t); MEMORY_UNTAG; mpz_sub (t, B2, B2min); root_params.S = choose_S (t); mpz_clear (t); } } if (sigma_is_A == 0) { /* if sigma=0, generate it at random */ if (mpz_sgn (sigma) == 0) { mpz_urandomb (sigma, rng, 32); mpz_add_ui (sigma, sigma, 6); } /* sigma contains sigma value, A and x values must be computed */ youpi = get_curve_from_sigma (f, P.A, P.x, sigma, modulus); if (youpi != ECM_NO_FACTOR_FOUND) goto end_of_ecm; } else if (sigma_is_A == 1 && batch == 1) { if (mpz_sgn (sigma) == 0) { int i; /* We choose a positive integer d' smaller than B=2^GMP_NUMB_BITS and consider d = d'/B and A = 4d-2 */ do mpz_urandomb (sigma, rng, 32); /* generates d' <> 0 */ while (mpz_sgn (sigma) == 0); ASSERT((GMP_NUMB_BITS % 2) == 0); if (GMP_NUMB_BITS >= 64) mpz_mul (sigma, sigma, sigma); /* ensures d' (and thus d) is a square, which increases the success probability */ /* divide d' by B to get d */ for (i = 0; i < GMP_NUMB_BITS; i++) { if (mpz_tstbit (sigma, 0) == 1) mpz_add (sigma, sigma, n); mpz_div_2exp (sigma, sigma, 1); } mpz_mul_2exp (sigma, sigma, 2); /* 4d */ mpz_sub_ui (sigma, sigma, 2); /* 4d-2 */ } mpres_set_z (P.A, sigma, modulus); } else if (sigma_is_A == 1 && batch == 2) { if (mpz_sgn (sigma) == 0) { mpz_urandomb (sigma, rng, 32); mpz_add_ui (sigma, sigma, 2); youpi = get_curve_from_ell_parametrization (f, P.A, sigma, modulus); mpres_get_z (sigma, P.A, modulus); if (youpi != ECM_NO_FACTOR_FOUND) goto end_of_ecm; } else /* sigma contains the A value */ mpres_set_z (P.A, sigma, modulus); } else if (sigma_is_A == 1) { /* sigma contains the A value */ mpres_set_z (P.A, sigma, modulus); /* TODO: make a valid, random starting point in case none was given */ /* Problem: this may be as hard as factoring as we'd need to determine whether x^3 + a*x^2 + x is a quadratic residue or not */ /* For now, we'll just chicken out. */ if (mpz_sgn (x) == 0) { outputf (OUTPUT_ERROR, "Error, -A requires a starting point (-x0 x).\n"); youpi = ECM_ERROR; goto end_of_ecm; } } /* If a nonzero value is given in x, then we use it as the starting point, overwriting the one computing from sigma for sigma_is_A=0. */ if (mpz_sgn (x) != 0) mpres_set_z (P.x, x, modulus); /* Print B1, B2, polynomial and sigma */ print_B1_B2_poly (OUTPUT_NORMAL, ECM_ECM, B1, *B1done, B2min_parm, B2min, B2, root_params.S, sigma, sigma_is_A, go); #if 0 outputf (OUTPUT_VERBOSE, "b2=%1.0f, dF=%lu, k=%lu, d=%lu, d2=%lu, i0=%Zd\n", b2, dF, k, root_params.d1, root_params.d2, root_params.i0); #else outputf (OUTPUT_VERBOSE, "dF=%lu, k=%lu, d=%lu, d2=%lu, i0=%Zd\n", dF, k, root_params.d1, root_params.d2, root_params.i0); #endif if (sigma_is_A == -1) /* Weierstrass form: we perform only Stage 2, since all curves in Weierstrass form do not admit a Montgomery form. */ { mpres_set_z (P.A, sigma, modulus); /* sigma contains A */ mpres_set_z (P.y, go, modulus); /* go contains y */ if (mpz_sgn (x) == 0 || mpz_sgn (go) == 0) { outputf (OUTPUT_ERROR, "Error, sigma_is_A=-1 requires x and y.\n"); youpi = ECM_ERROR; goto end_of_ecm; } goto hecm; } if (test_verbose (OUTPUT_RESVERBOSE)) { mpz_t t; MEMORY_TAG; mpz_init (t); MEMORY_UNTAG; mpres_get_z (t, P.A, modulus); outputf (OUTPUT_RESVERBOSE, "A=%Zd\n", t); mpres_get_z (t, P.x, modulus); outputf (OUTPUT_RESVERBOSE, "starting point: x0=%Zd\n", t); mpz_clear (t); } if (go != NULL && mpz_cmp_ui (go, 1) > 0) outputf (OUTPUT_VERBOSE, "initial group order: %Zd\n", go); if (test_verbose (OUTPUT_VERBOSE)) { if (mpz_cmp_d (B2min, B1) != 0) { outputf (OUTPUT_VERBOSE, "Can't compute success probabilities for B1 <> B2min\n"); } else { rhoinit (256, 10); print_expcurves (B1, B2, dF, k, root_params.S, batch); } } #ifdef HAVE_GWNUM /* We will only use GWNUM for numbers of the form k*b^n+c */ if (gw_b != 0 && B1 >= *B1done && batch == 0) youpi = gw_ecm_stage1 (f, &P, modulus, B1, B1done, go, gw_k, gw_b, gw_n, gw_c); /* At this point B1 == *B1done unless interrupted, or no GWNUM ecm_stage1 is available */ if (youpi != ECM_NO_FACTOR_FOUND) goto end_of_ecm_rhotable; #endif if (B1 > *B1done) { if (batch != 0) /* FIXME: go, stop_asap and chkfilename are ignored in batch mode */ youpi = ecm_stage1_batch (f, P.x, P.A, modulus, B1, B1done, batch, batch_s); else youpi = ecm_stage1 (f, P.x, P.A, modulus, B1, B1done, go, stop_asap, chkfilename); } if (stage1time > 0.) { const long st2 = elltime (st, cputime ()); const long s1t = (long) (stage1time * 1000.); outputf (OUTPUT_NORMAL, "Step 1 took %ldms (%ld in this run, %ld from previous runs)\n", st2 + s1t, st2, s1t); } else outputf (OUTPUT_NORMAL, "Step 1 took %ldms\n", elltime (st, cputime ())); /* Store end-of-stage-1 residue in x in case we write it to a save file, before P.x is converted to Weierstrass form */ mpres_get_z (x, P.x, modulus); if (youpi != ECM_NO_FACTOR_FOUND) goto end_of_ecm_rhotable; if (test_verbose (OUTPUT_RESVERBOSE)) { mpz_t t; MEMORY_TAG; mpz_init (t); MEMORY_UNTAG; mpres_get_z (t, P.x, modulus); outputf (OUTPUT_RESVERBOSE, "x=%Zd\n", t); mpz_clear (t); } /* In case of a signal, we'll exit after the residue is printed. If no save file is specified, the user may still resume from the residue */ if (stop_asap != NULL && (*stop_asap) ()) goto end_of_ecm_rhotable; /* If using 2^k +/-1 modulus and 'nobase2step2' flag is set, set default (-nobase2) modular method and remap P.x, P.y, and P.A */ if (modulus->repr == ECM_MOD_BASE2 && nobase2step2) { mpz_t x_t, y_t, A_t; MEMORY_TAG; mpz_init (x_t); MEMORY_UNTAG; MEMORY_TAG; mpz_init (y_t); MEMORY_UNTAG; MEMORY_TAG; mpz_init (A_t); MEMORY_UNTAG; mpz_mod (x_t, P.x, modulus->orig_modulus); mpz_mod (y_t, P.y, modulus->orig_modulus); mpz_mod (A_t, P.A, modulus->orig_modulus); mpmod_clear (modulus); repr = ECM_MOD_NOBASE2; if (mpmod_init (modulus, n, repr) != 0) /* reset modulus for nobase2 */ return ECM_ERROR; /* remap x, y, and A for new modular method */ mpres_set_z (P.x, x_t, modulus); mpres_set_z (P.y, y_t, modulus); mpres_set_z (P.A, A_t, modulus); mpz_clear (x_t); mpz_clear (y_t); mpz_clear (A_t); } youpi = montgomery_to_weierstrass (f, P.x, P.y, P.A, modulus); hecm: if (test_verbose (OUTPUT_RESVERBOSE) && youpi == ECM_NO_FACTOR_FOUND && mpz_cmp (B2, B2min) >= 0) { mpz_t t; MEMORY_TAG; mpz_init (t); MEMORY_UNTAG; mpres_get_z (t, P.x, modulus); outputf (OUTPUT_RESVERBOSE, "After switch to Weierstrass form, " "P=(%Zd", t); mpres_get_z (t, P.y, modulus); outputf (OUTPUT_RESVERBOSE, ", %Zd)\n", t); mpres_get_z (t, P.A, modulus); outputf (OUTPUT_RESVERBOSE, "on curve Y^2 = X^3 + %Zd * X + b\n", t); mpz_clear (t); } if (youpi == ECM_NO_FACTOR_FOUND && mpz_cmp (B2, B2min) >= 0) youpi = stage2 (f, &P, modulus, dF, k, &root_params, ECM_ECM, use_ntt, TreeFilename, stop_asap); end_of_ecm_rhotable: if (test_verbose (OUTPUT_VERBOSE)) { if (mpz_cmp_d (B2min, B1) == 0) { if (youpi == ECM_NO_FACTOR_FOUND && (stop_asap == NULL || !(*stop_asap)())) print_exptime (B1, B2, dF, k, root_params.S, (long) (stage1time * 1000.) + elltime (st, cputime ()), batch); rhoinit (1, 0); /* Free memory of rhotable */ } } end_of_ecm: mpres_clear (P.A, modulus); mpres_clear (P.y, modulus); mpres_clear (P.x, modulus); mpmod_clear (modulus); mpz_clear (root_params.i0); mpz_clear (B2); mpz_clear (B2min); return youpi; } ecm-6.4.4/ntt_gfp.c0000644023561000001540000004576112106741273011040 00000000000000/* ntt_gfp.c - low-level radix-2 dif/dit ntt routines over GF(p) Copyright 2005, 2006, 2007, 2008, 2009 Dave Newman, Jason Papadopoulos, Brian Gladman, Alexander Kruppa, Paul Zimmermann. The SP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The SP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the SP Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "sp.h" #include "ecm-impl.h" /*--------------------------- FORWARD NTT --------------------------------*/ static void bfly_dif(spv_t x0, spv_t x1, spv_t w, spv_size_t len, sp_t p, sp_t d) { spv_size_t i = 0; #if (defined(__GNUC__) || defined(__ICL)) && \ defined(__i386__) && defined(HAVE_SSE2) asm volatile ( "movd %6, %%xmm6 \n\t" "pshufd $0x44, %%xmm6, %%xmm5 \n\t" "pshufd $0, %%xmm6, %%xmm6 \n\t" "movd %7, %%xmm7 \n\t" "pshufd $0, %%xmm7, %%xmm7 \n\t" "0: \n\t" "movdqa (%1,%4,4), %%xmm0 \n\t" "movdqa (%2,%4,4), %%xmm1 \n\t" "movdqa %%xmm1, %%xmm2 \n\t" "paddd %%xmm0, %%xmm1 \n\t" "psubd %%xmm2, %%xmm0 \n\t" "psubd %%xmm6, %%xmm1 \n\t" "pxor %%xmm2, %%xmm2 \n\t" "pcmpgtd %%xmm1, %%xmm2 \n\t" "pand %%xmm6, %%xmm2 \n\t" "paddd %%xmm2, %%xmm1 \n\t" "movdqa %%xmm1, (%1,%4,4) \n\t" "pxor %%xmm2, %%xmm2 \n\t" "pcmpgtd %%xmm0, %%xmm2 \n\t" "pand %%xmm6, %%xmm2 \n\t" "paddd %%xmm2, %%xmm0 \n\t" "movdqa (%3,%4,4), %%xmm2 \n\t" "addl $4, %4 \n\t" /* INC */ "pshufd $0x31, %%xmm0, %%xmm1\n\t" "pshufd $0x31, %%xmm2, %%xmm3\n\t" "pmuludq %%xmm2, %%xmm0 \n\t" "pmuludq %%xmm3, %%xmm1 \n\t" "movdqa %%xmm0, %%xmm2 \n\t" "movdqa %%xmm1, %%xmm3 \n\t" "psrlq $" STRING((2*SP_NUMB_BITS - W_TYPE_SIZE)) ", %%xmm2 \n\t" "pmuludq %%xmm7, %%xmm2 \n\t" "psrlq $" STRING((2*SP_NUMB_BITS - W_TYPE_SIZE)) ", %%xmm3 \n\t" "pmuludq %%xmm7, %%xmm3 \n\t" #if SP_NUMB_BITS < W_TYPE_SIZE - 1 "psrlq $33, %%xmm2 \n\t" "pmuludq %%xmm6, %%xmm2 \n\t" "psrlq $33, %%xmm3 \n\t" "pmuludq %%xmm6, %%xmm3 \n\t" "psubq %%xmm2, %%xmm0 \n\t" "psubq %%xmm3, %%xmm1 \n\t" #else "pshufd $0xf5, %%xmm2, %%xmm2 \n\t" "pmuludq %%xmm6, %%xmm2 \n\t" "pshufd $0xf5, %%xmm3, %%xmm3 \n\t" "pmuludq %%xmm6, %%xmm3 \n\t" "psubq %%xmm2, %%xmm0 \n\t" "psubq %%xmm3, %%xmm1 \n\t" "psubq %%xmm5, %%xmm0 \n\t" "psubq %%xmm5, %%xmm1 \n\t" "pshufd $0xf5, %%xmm0, %%xmm2 \n\t" "pshufd $0xf5, %%xmm1, %%xmm3 \n\t" "pand %%xmm5, %%xmm2 \n\t" "pand %%xmm5, %%xmm3 \n\t" "paddq %%xmm2, %%xmm0 \n\t" "paddq %%xmm3, %%xmm1 \n\t" #endif "pshufd $0x8, %%xmm0, %%xmm0 \n\t" "pshufd $0x8, %%xmm1, %%xmm1 \n\t" "punpckldq %%xmm1, %%xmm0 \n\t" "psubd %%xmm6, %%xmm0 \n\t" "pxor %%xmm1, %%xmm1 \n\t" "pcmpgtd %%xmm0, %%xmm1 \n\t" "pand %%xmm6, %%xmm1 \n\t" "paddd %%xmm1, %%xmm0 \n\t" "movdqa %%xmm0, -16(%2,%4,4) \n\t" "cmpl %5, %4 \n\t" "jne 0b \n\t" :"=r"(i) :"r"(x0), "r"(x1), "r"(w), "0"(i), "g"(len), "g"(p), "g"(d) :"%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"); #elif defined( _MSC_VER ) && defined( SSE2) __asm { push esi push edi mov edi, x0 mov esi, x1 mov edx, w xor ecx, ecx mov eax, len movd xmm6, p pshufd xmm5, xmm6, 0x44 pshufd xmm6, xmm6, 0 movd xmm7, d pshufd xmm7, xmm7, 0 L0: movdqa xmm0, [edi+ecx*4] movdqa xmm1, [esi+ecx*4] movdqa xmm2, xmm1 paddd xmm1, xmm0 psubd xmm0, xmm2 psubd xmm1, xmm6 pxor xmm2, xmm2 pcmpgtd xmm2, xmm1 pand xmm2, xmm6 paddd xmm1, xmm2 movdqa [edi+ecx*4], xmm1 pxor xmm2, xmm2 pcmpgtd xmm2, xmm0 pand xmm2, xmm6 paddd xmm0, xmm2 movdqa xmm2, [edx+ecx*4] add ecx, 4 pshufd xmm1, xmm0, 0x31 pshufd xmm3, xmm2, 0x31 pmuludq xmm0, xmm2 pmuludq xmm1, xmm3 movdqa xmm2, xmm0 movdqa xmm3, xmm1 psrlq xmm2, 2*SP_NUMB_BITS - W_TYPE_SIZE pmuludq xmm2, xmm7 psrlq xmm3, 2*SP_NUMB_BITS - W_TYPE_SIZE pmuludq xmm3, xmm7 #if SP_NUMB_BITS < W_TYPE_SIZE - 1 psrlq xmm2, 33 pmuludq xmm2, xmm6 psrlq xmm3, 33 pmuludq xmm3, xmm6 psubq xmm0, xmm2 psubq xmm1, xmm3 #else pshufd xmm2, xmm2, 0xf5 pmuludq xmm2, xmm6 pshufd xmm3, xmm3, 0xf5 pmuludq xmm3, xmm6 psubq xmm0, xmm2 psubq xmm1, xmm3 psubq xmm0, xmm5 psubq xmm1, xmm5 pshufd xmm2, xmm0, 0xf5 pshufd xmm3, xmm1, 0xf5 pand xmm2, xmm5 pand xmm3, xmm5 paddq xmm0, xmm2 paddq xmm1, xmm3 #endif pshufd xmm0, xmm0, 0x8 pshufd xmm1, xmm1, 0x8 punpckldq xmm0, xmm1 psubd xmm0, xmm6 pxor xmm1, xmm1 pcmpgtd xmm1, xmm0 pand xmm1, xmm6 paddd xmm0, xmm1 movdqa [esi+ecx*4-16], xmm0 cmp eax, ecx jne L0 pop edi pop esi } #else for (i = 0; i < len; i++) { sp_t w0 = w[i]; sp_t t0 = x0[i]; sp_t t1 = x1[i]; sp_t t2, t3; t2 = sp_add (t0, t1, p); t3 = sp_sub (t0, t1, p); t3 = sp_mul (t3, w0, p, d); x0[i] = t2; x1[i] = t3; } #endif } static void spv_ntt_dif_core (spv_t x, spv_t w, spv_size_t log2_len, sp_t p, sp_t d) { spv_size_t len; spv_t x0, x1; /* handle small transforms immediately */ switch (log2_len) { case 0: return; case 1: { sp_t t0 = x[0]; sp_t t1 = x[1]; x[0] = sp_add (t0, t1, p); x[1] = sp_sub (t0, t1, p); return; } case 2: { sp_t t0 = x[0]; sp_t t1 = x[1]; sp_t t2 = x[2]; sp_t t3 = x[3]; sp_t t4, t5, t6, t7; t4 = sp_add (t0, t2, p); t6 = sp_sub (t0, t2, p); t5 = sp_add (t1, t3, p); t7 = sp_sub (t1, t3, p); x[0] = sp_add (t4, t5, p); x[1] = sp_sub (t4, t5, p); t7 = sp_mul (t7, w[1], p, d); x[2] = sp_add (t6, t7, p); x[3] = sp_sub (t6, t7, p); return; } case 3: { sp_t t0 = x[0]; sp_t t1 = x[1]; sp_t t2 = x[2]; sp_t t3 = x[3]; sp_t t4 = x[4]; sp_t t5 = x[5]; sp_t t6 = x[6]; sp_t t7 = x[7]; sp_t t8, t9, t10, t11, t12, t13, t14, t15; t8 = sp_add (t0, t4, p); t12 = sp_sub (t0, t4, p); t9 = sp_add (t1, t5, p); t13 = sp_sub (t1, t5, p); t13 = sp_mul (t13, w[1], p, d); t10 = sp_add (t2, t6, p); t14 = sp_sub (t2, t6, p); t14 = sp_mul (t14, w[2], p, d); t11 = sp_add (t3, t7, p); t15 = sp_sub (t3, t7, p); t15 = sp_mul (t15, w[3], p, d); t0 = sp_add (t8, t10, p); t2 = sp_sub (t8, t10, p); t1 = sp_add (t9, t11, p); t3 = sp_sub (t9, t11, p); t3 = sp_mul (t3, w[2], p, d); x[0] = sp_add (t0, t1, p); x[1] = sp_sub (t0, t1, p); x[2] = sp_add (t2, t3, p); x[3] = sp_sub (t2, t3, p); t0 = sp_add (t12, t14, p); t2 = sp_sub (t12, t14, p); t1 = sp_add (t13, t15, p); t3 = sp_sub (t13, t15, p); t3 = sp_mul (t3, w[2], p, d); x[4] = sp_add (t0, t1, p); x[5] = sp_sub (t0, t1, p); x[6] = sp_add (t2, t3, p); x[7] = sp_sub (t2, t3, p); return; } } len = 1 << (log2_len - 1); x0 = x; x1 = x + len; bfly_dif (x0, x1, w, len, p, d); spv_ntt_dif_core (x0, w + len, log2_len - 1, p, d); spv_ntt_dif_core (x1, w + len, log2_len - 1, p, d); } void spv_ntt_gfp_dif (spv_t x, spv_size_t log2_len, spm_t data) { sp_t p = data->sp; sp_t d = data->mul_c; if (log2_len <= NTT_GFP_TWIDDLE_DIF_BREAKOVER) { spv_t w = data->nttdata->twiddle + data->nttdata->twiddle_size - (1 << log2_len); spv_ntt_dif_core (x, w, log2_len, p, d); } else { /* recursive version for data that doesn't fit in the L1 cache */ spv_size_t len = 1 << (log2_len - 1); spv_t x0 = x; spv_t x1 = x + len; spv_t roots = data->nttdata->ntt_roots; { spv_size_t i; spv_size_t block_size = MIN(len, MAX_NTT_BLOCK_SIZE); sp_t root = roots[log2_len]; spv_t w = data->scratch; w[0] = 1; for (i = 1; i < block_size; i++) w[i] = sp_mul (w[i-1], root, p, d); root = sp_pow (root, block_size, p, d); for (i = 0; i < len; i += block_size) { if (i) spv_mul_sp (w, w, root, block_size, p, d); bfly_dif (x0 + i, x1 + i, w, block_size, p, d); } } spv_ntt_gfp_dif (x0, log2_len - 1, data); spv_ntt_gfp_dif (x1, log2_len - 1, data); } } /*--------------------------- INVERSE NTT --------------------------------*/ static inline void bfly_dit(spv_t x0, spv_t x1, spv_t w, spv_size_t len, sp_t p, sp_t d) { spv_size_t i = 0; #if (defined(__GNUC__) || defined(__ICL)) && \ defined(__i386__) && defined(HAVE_SSE2) asm volatile ( "movd %6, %%xmm6 \n\t" "pshufd $0x44, %%xmm6, %%xmm5 \n\t" "pshufd $0, %%xmm6, %%xmm6 \n\t" "movd %7, %%xmm7 \n\t" "pshufd $0, %%xmm7, %%xmm7 \n\t" "0: \n\t" "movdqa (%2,%4,4), %%xmm0 \n\t" "movdqa (%3,%4,4), %%xmm2 \n\t" "pshufd $0x31, %%xmm0, %%xmm1\n\t" "pshufd $0x31, %%xmm2, %%xmm3\n\t" "pmuludq %%xmm2, %%xmm0 \n\t" "pmuludq %%xmm3, %%xmm1 \n\t" "movdqa %%xmm0, %%xmm2 \n\t" "movdqa %%xmm1, %%xmm3 \n\t" "psrlq $" STRING((2*SP_NUMB_BITS - W_TYPE_SIZE)) ", %%xmm2 \n\t" "pmuludq %%xmm7, %%xmm2 \n\t" "psrlq $" STRING((2*SP_NUMB_BITS - W_TYPE_SIZE)) ", %%xmm3 \n\t" "pmuludq %%xmm7, %%xmm3 \n\t" #if SP_NUMB_BITS < W_TYPE_SIZE - 1 "psrlq $33, %%xmm2 \n\t" "pmuludq %%xmm6, %%xmm2 \n\t" "psrlq $33, %%xmm3 \n\t" "pmuludq %%xmm6, %%xmm3 \n\t" "psubq %%xmm2, %%xmm0 \n\t" "psubq %%xmm3, %%xmm1 \n\t" #else "pshufd $0xf5, %%xmm2, %%xmm2 \n\t" "pmuludq %%xmm6, %%xmm2 \n\t" "pshufd $0xf5, %%xmm3, %%xmm3 \n\t" "pmuludq %%xmm6, %%xmm3 \n\t" "psubq %%xmm2, %%xmm0 \n\t" "psubq %%xmm3, %%xmm1 \n\t" "psubq %%xmm5, %%xmm0 \n\t" "psubq %%xmm5, %%xmm1 \n\t" "pshufd $0xf5, %%xmm0, %%xmm2 \n\t" "pshufd $0xf5, %%xmm1, %%xmm3 \n\t" "pand %%xmm5, %%xmm2 \n\t" "pand %%xmm5, %%xmm3 \n\t" "paddq %%xmm2, %%xmm0 \n\t" "paddq %%xmm3, %%xmm1 \n\t" #endif "pshufd $0x8, %%xmm0, %%xmm0 \n\t" "pshufd $0x8, %%xmm1, %%xmm1 \n\t" "punpckldq %%xmm1, %%xmm0 \n\t" "psubd %%xmm6, %%xmm0 \n\t" "pxor %%xmm1, %%xmm1 \n\t" "pcmpgtd %%xmm0, %%xmm1 \n\t" "pand %%xmm6, %%xmm1 \n\t" "paddd %%xmm0, %%xmm1 \n\t" "movdqa (%1,%4,4), %%xmm0 \n\t" "movdqa %%xmm1, %%xmm2 \n\t" "paddd %%xmm0, %%xmm1 \n\t" "psubd %%xmm2, %%xmm0 \n\t" "psubd %%xmm6, %%xmm1 \n\t" "pxor %%xmm2, %%xmm2 \n\t" "pcmpgtd %%xmm1, %%xmm2 \n\t" "pand %%xmm6, %%xmm2 \n\t" "paddd %%xmm2, %%xmm1 \n\t" "movdqa %%xmm1, (%1,%4,4) \n\t" "pxor %%xmm2, %%xmm2 \n\t" "pcmpgtd %%xmm0, %%xmm2 \n\t" "pand %%xmm6, %%xmm2 \n\t" "paddd %%xmm2, %%xmm0 \n\t" "movdqa %%xmm0, (%2,%4,4) \n\t" "addl $4, %4 \n\t" /* INC */ "cmpl %5, %4 \n\t" "jne 0b \n\t" :"=r"(i) :"r"(x0), "r"(x1), "r"(w), "0"(i), "g"(len), "g"(p), "g"(d) :"%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm5", "%xmm6", "%xmm7", "cc", "memory"); #elif defined( _MSC_VER ) && defined( SSE2) __asm { push esi push edi mov edi, x0 mov esi, x1 mov edx, w xor ecx, ecx mov eax, len movd xmm6, p pshufd xmm5, xmm6, 0x44 pshufd xmm6, xmm6, 0 movd xmm7, d pshufd xmm7, xmm7, 0 L0: movdqa xmm0, [esi+ecx*4] movdqa xmm2, [edx+ecx*4] pshufd xmm1, xmm0, 0x31 pshufd xmm3, xmm2, 0x31 pmuludq xmm0, xmm2 pmuludq xmm1, xmm3 movdqa xmm2, xmm0 movdqa xmm3, xmm1 psrlq xmm2, 2*SP_NUMB_BITS - W_TYPE_SIZE pmuludq xmm2, xmm7 psrlq xmm3, 2*SP_NUMB_BITS - W_TYPE_SIZE pmuludq xmm3, xmm7 #if SP_NUMB_BITS < W_TYPE_SIZE - 1 psrlq xmm2, 33 pmuludq xmm2, xmm6 psrlq xmm3, 33 pmuludq xmm3, xmm6 psubq xmm0, xmm2 psubq xmm1, xmm3 #else pshufd xmm2, xmm2, 0xf5 pmuludq xmm2, xmm6 pshufd xmm3, xmm3, 0xf5 pmuludq xmm3, xmm6 psubq xmm0, xmm2 psubq xmm1, xmm3 psubq xmm0, xmm5 psubq xmm1, xmm5 pshufd xmm2, xmm0, 0xf5 pshufd xmm3, xmm1, 0xf5 pand xmm2, xmm5 pand xmm3, xmm5 paddq xmm0, xmm2 paddq xmm1, xmm3 #endif pshufd xmm0, xmm0, 0x8 pshufd xmm1, xmm1, 0x8 punpckldq xmm0, xmm1 psubd xmm0, xmm6 pxor xmm1, xmm1 pcmpgtd xmm1, xmm0 pand xmm1, xmm6 paddd xmm1, xmm0 movdqa xmm0, [edi+ecx*4] movdqa xmm2, xmm1 paddd xmm1, xmm0 psubd xmm0, xmm2 psubd xmm1, xmm6 pxor xmm2, xmm2 pcmpgtd xmm2, xmm1 pand xmm2, xmm6 paddd xmm1, xmm2 movdqa [edi+ecx*4], xmm1 pxor xmm2, xmm2 pcmpgtd xmm2, xmm0 pand xmm2, xmm6 paddd xmm0, xmm2 movdqa [esi+ecx*4], xmm0 add ecx, 4 cmp eax, ecx jne L0 pop edi pop esi } #else for (i = 0; i < len; i++) { sp_t w0 = w[i]; sp_t t0 = x0[i]; sp_t t1 = x1[i]; t1 = sp_mul (t1, w0, p, d); x0[i] = sp_add (t0, t1, p); x1[i] = sp_sub (t0, t1, p); } #endif } static void spv_ntt_dit_core (spv_t x, spv_t w, spv_size_t log2_len, sp_t p, sp_t d) { spv_size_t len; spv_t x0, x1; /* handle small transforms immediately */ switch (log2_len) { case 0: return; case 1: { sp_t t0 = x[0]; sp_t t1 = x[1]; x[0] = sp_add (t0, t1, p); x[1] = sp_sub (t0, t1, p); return; } case 2: { sp_t t0 = x[0]; sp_t t1 = x[1]; sp_t t2 = x[2]; sp_t t3 = x[3]; sp_t t4, t5, t6, t7; t4 = sp_add (t0, t1, p); t5 = sp_sub (t0, t1, p); t6 = sp_add (t2, t3, p); t7 = sp_sub (t2, t3, p); x[0] = sp_add (t4, t6, p); x[2] = sp_sub (t4, t6, p); t7 = sp_mul (t7, w[1], p, d); x[1] = sp_add (t5, t7, p); x[3] = sp_sub (t5, t7, p); return; } case 3: { sp_t t0 = x[0]; sp_t t1 = x[1]; sp_t t2 = x[2]; sp_t t3 = x[3]; sp_t t4 = x[4]; sp_t t5 = x[5]; sp_t t6 = x[6]; sp_t t7 = x[7]; sp_t t8, t9, t10, t11; t8 = sp_add(t0, t1, p); t9 = sp_sub(t0, t1, p); t10 = sp_add(t2, t3, p); t11 = sp_sub(t2, t3, p); t0 = sp_add(t8, t10, p); t2 = sp_sub(t8, t10, p); t11 = sp_mul (t11, w[2], p, d); t1 = sp_add(t9, t11, p); t3 = sp_sub(t9, t11, p); t8 = sp_add(t4, t5, p); t9 = sp_sub(t4, t5, p); t10 = sp_add(t6, t7, p); t11 = sp_sub(t6, t7, p); t4 = sp_add(t8, t10, p); t6 = sp_sub(t8, t10, p); t11 = sp_mul (t11, w[2], p, d); t5 = sp_add(t9, t11, p); t7 = sp_sub(t9, t11, p); x[0] = sp_add(t0, t4, p); x[4] = sp_sub(t0, t4, p); t5 = sp_mul (t5, w[1], p, d); x[1] = sp_add(t1, t5, p); x[5] = sp_sub(t1, t5, p); t6 = sp_mul (t6, w[2], p, d); x[2] = sp_add(t2, t6, p); x[6] = sp_sub(t2, t6, p); t7 = sp_mul (t7, w[3], p, d); x[3] = sp_add(t3, t7, p); x[7] = sp_sub(t3, t7, p); return; } } len = 1 << (log2_len - 1); x0 = x; x1 = x + len; spv_ntt_dit_core (x0, w + len, log2_len - 1, p, d); spv_ntt_dit_core (x1, w + len, log2_len - 1, p, d); bfly_dit (x0, x1, w, len, p, d); } void spv_ntt_gfp_dit (spv_t x, spv_size_t log2_len, spm_t data) { sp_t p = data->sp; sp_t d = data->mul_c; if (log2_len <= NTT_GFP_TWIDDLE_DIT_BREAKOVER) { spv_t w = data->inttdata->twiddle + data->inttdata->twiddle_size - (1 << log2_len); spv_ntt_dit_core (x, w, log2_len, p, d); } else { spv_size_t len = 1 << (log2_len - 1); spv_t x0 = x; spv_t x1 = x + len; spv_t roots = data->inttdata->ntt_roots; spv_ntt_gfp_dit (x0, log2_len - 1, data); spv_ntt_gfp_dit (x1, log2_len - 1, data); { spv_size_t i; spv_size_t block_size = MIN(len, MAX_NTT_BLOCK_SIZE); sp_t root = roots[log2_len]; spv_t w = data->scratch; w[0] = 1; for (i = 1; i < block_size; i++) w[i] = sp_mul (w[i-1], root, p, d); root = sp_pow (root, block_size, p, d); for (i = 0; i < len; i += block_size) { if (i) spv_mul_sp (w, w, root, block_size, p, d); bfly_dit (x0 + i, x1 + i, w, block_size, p, d); } } } } ecm-6.4.4/ks-multiply.c0000644023561000001540000002736712106741273011673 00000000000000/* Polynomial multiplication using GMP's integer multiplication code Copyright 2004, 2005, 2006, 2007, 2008, 2009, 2010 Dave Newman, Paul Zimmermann, Alexander Kruppa. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include "ecm-gmp.h" /* for MPZ_REALLOC and MPN_COPY */ #include "ecm-impl.h" #define FFT_WRAP /* always defined since mpn_mul_fft is included */ /* Puts in R[0..2l-2] the product of A[0..l-1] and B[0..l-1]. T must have as much space as for toomcook4 (it is only used when that function is called). Notes: - this code aligns the coeffs at limb boundaries - if instead we aligned at byte boundaries then we could save up to 3*l bytes in T0 and T1, but tests have shown this doesn't give any significant speed increase, even for large degree polynomials. - this code requires that all coefficients A[] and B[] are nonnegative. */ void kronecker_schonhage (listz_t R, listz_t A, listz_t B, unsigned int l, listz_t T) { unsigned long i; mp_size_t s, t = 0, size_t0, size_tmp; mp_ptr t0_ptr, t1_ptr, t2_ptr, r_ptr; s = mpz_sizeinbase (A[0], 2); if ((double) l * (double) s < KS_MUL_THRESHOLD) { toomcook4 (R, A, B, l, T); return; } for (i = 0; i < l; i++) { if ((s = mpz_sizeinbase (A[i], 2)) > t) t = s; if ((s = mpz_sizeinbase (B[i], 2)) > t) t = s; } /* For n > 0, s = sizeinbase (n, 2) <==> 2^(s-1) <= n < 2^s. For n = 0, s = sizeinbase (n, 2) = 1 ==> n < 2^s. Hence all A[i], B[i] < 2^t */ /* Each coeff of A(x)*B(x) < l * 2^(2*t), so max number of bits in a coeff of T[0] * T[1] will be 2 * t + ceil(log_2(l)) */ s = t * 2; for (i = l - 1; i; s++, i >>= 1); /* ceil(log_2(l)) = 1+floor(log_2(l-1)) */ /* work out the corresponding number of limbs */ s = 1 + (s - 1) / GMP_NUMB_BITS; /* Note: s * (l - 1) + ceil(t/GMP_NUMB_BITS) should be faster, but no significant speedup was observed */ size_t0 = s * l; /* allocate one double-buffer to save malloc/MPN_ZERO/free calls */ t0_ptr = (mp_ptr) malloc (2 * size_t0 * sizeof (mp_limb_t)); if (t0_ptr == NULL) { outputf (OUTPUT_ERROR, "Out of memory in kronecker_schonhage()\n"); exit (1); } t1_ptr = t0_ptr + size_t0; MPN_ZERO (t0_ptr, 2 * size_t0); for (i = 0; i < l; i++) { ASSERT(SIZ(A[i]) >= 0); if (SIZ(A[i])) MPN_COPY (t0_ptr + i * s, PTR(A[i]), SIZ(A[i])); ASSERT(SIZ(B[i]) >= 0); if (SIZ(B[i])) MPN_COPY (t1_ptr + i * s, PTR(B[i]), SIZ(B[i])); } t2_ptr = (mp_ptr) malloc (2 * size_t0 * sizeof (mp_limb_t)); if (t2_ptr == NULL) { free (t0_ptr); outputf (OUTPUT_ERROR, "Out of memory in kronecker_schonhage()\n"); exit (1); } /* mpn_mul_fft_full () allocates auxiliary memory of about 8n limbs, thus the total memory allocated by this function is about 12*size_t0. Since size_t0 is about 2*dF*limbs(modulus), this is about 24*dF*limbs(modulus). */ mpn_mul_fft_full (t2_ptr, t0_ptr, size_t0, t1_ptr, size_t0); for (i = 0; i < 2 * l - 1; i++) { size_tmp = s; MPN_NORMALIZE(t2_ptr + i * s, size_tmp); r_ptr = MPZ_REALLOC (R[i], size_tmp); if (size_tmp) MPN_COPY (r_ptr, t2_ptr + i * s, size_tmp); SIZ(R[i]) = size_tmp; } free (t0_ptr); free (t2_ptr); } /* Given a[0..m] and c[0..l], puts in b[0..n] the coefficients of degree m to n+m of rev(a)*c, i.e. b[0] = a[0]*c[0] + ... + a[i]*c[i] with i = min(m, l) ... b[k] = a[0]*c[k] + ... + a[i]*c[i+k] with i = min(m, l-k) ... b[n] = a[0]*c[n] + ... + a[i]*c[i+n] with i = min(m, l-n) [=l-n]. If rev=0, consider a instead of rev(a). Assumes n <= l. Return non-zero if an error occurred. */ #undef TEST_OLD_S int TMulKS (listz_t b, unsigned int n, listz_t a, unsigned int m, listz_t c, unsigned int l, mpz_t modulus, int rev) { unsigned long i, s = 0, t, k; mp_ptr ap, bp, cp; mp_size_t an, bn, cn; int ret = 0; /* default return value */ #ifdef TEST_OLD_S unsigned long s_old = 0, k_old; mp_size_t bn_old; #endif #ifdef DEBUG long st = cputime (); fprintf (ECM_STDOUT, "n=%u m=%u l=%u bits=%u n*bits=%u: ", n, m, l, mpz_sizeinbase (modulus, 2), n * mpz_sizeinbase (modulus, 2)); #endif ASSERT (n <= l); /* otherwise the upper coefficients of b are 0 */ if (l > n + m) l = n + m; /* otherwise, c has too many coeffs */ /* compute max bits of a[] and c[] */ for (i = 0; i <= m; i++) { if (mpz_sgn (a[i]) < 0) mpz_mod (a[i], a[i], modulus); if ((t = mpz_sizeinbase (a[i], 2)) > s) s = t; } for (i = 0; i <= l; i++) { if (mpz_sgn (c[i]) < 0) mpz_mod (c[i], c[i], modulus); if ((t = mpz_sizeinbase (c[i], 2)) > s) s = t; } #ifdef FFT_WRAP s ++; /* need one extra bit to determine sign of low(b) - high(b) */ #endif #ifdef TEST_OLD_S /* We used max(m,l) before. We compute the corresponding s for comparison. */ for (s_old = 2 * s, i = (m > l) ? m : l; i; s_old++, i >>= 1); #endif /* max coeff has 2*s+ceil(log2(min(m+1,l+1))) bits, i.e. 2*s + 1 + floor(log2(min(m,l))) */ for (s = 2 * s, i = (m < l) ? m : l; i; s++, i >>= 1); /* corresponding number of limbs */ s = 1 + (s - 1) / GMP_NUMB_BITS; #ifdef TEST_OLD_S s_old = 1 + (s_old - 1) / GMP_NUMB_BITS; #endif an = (m + 1) * s; cn = (l + 1) * s; bn = an + cn; /* a[0..m] needs (m+1) * s limbs */ ap = (mp_ptr) malloc (an * sizeof (mp_limb_t)); if (ap == NULL) { ret = 1; goto TMulKS_end; } cp = (mp_ptr) malloc (cn * sizeof (mp_limb_t)); if (cp == NULL) { ret = 1; goto TMulKS_free_ap; } MPN_ZERO (ap, an); MPN_ZERO (cp, cn); /* a is reverted */ for (i = 0; i <= m; i++) if (SIZ(a[i])) MPN_COPY (ap + ((rev) ? (m - i) : i) * s, PTR(a[i]), SIZ(a[i])); for (i = 0; i <= l; i++) if (SIZ(c[i])) MPN_COPY (cp + i * s, PTR(c[i]), SIZ(c[i])); #ifdef FFT_WRAP /* the product rev(a) * c has m+l+1 coefficients. We throw away the first m and the last l-n <= m. If we compute mod (m+n+1) * s limbs, we are ok */ k = mpn_fft_best_k ((m + n + 1) * s, 0); bn = mpn_fft_next_size ((m + n + 1) * s, k); #ifdef TEST_OLD_S k_old = mpn_fft_best_k ((m + n + 1) * s_old, 0); if (k != k_old) outputf (OUTPUT_ERROR, "Got different FFT transform length, k = %lu, k_old : %lu\n", k, k_old); bn_old = mpn_fft_next_size ((m + n + 1) * s_old, k_old); if (bn != bn_old) outputf (OUTPUT_ERROR, "Got different FFT size, bn = %d, bn_old : %d\n", (int) bn, (int) bn_old); #endif bp = (mp_ptr) malloc ((bn + 1) * sizeof (mp_limb_t)); if (bp == NULL) { ret = 1; goto TMulKS_free_cp; } mpn_mul_fft (bp, bn, ap, an, cp, cn, k); if (m && bp[m * s - 1] >> (GMP_NUMB_BITS - 1)) /* lo(b)-hi(b) is negative */ mpn_add_1 (bp + m * s, bp + m * s, (n + 1) * s, (mp_limb_t) 1); #else bp = (mp_ptr) malloc (bn * sizeof (mp_limb_t)); if (bp == NULL) { ret = 1; goto TMulKS_free_cp; } if (an >= cn) mpn_mul (bp, ap, an, cp, cn); else mpn_mul (bp, cp, cn, ap, an); #endif /* recover coefficients of degree m to n+m of product in b[0..n] */ bp += m * s; for (i = 0; i <= n; i++) { t = s; MPN_NORMALIZE(bp, t); MPZ_REALLOC (b[i], (mp_size_t) t); if (t) MPN_COPY (PTR(b[i]), bp, t); SIZ(b[i]) = t; bp += s; } bp -= (m + n + 1) * s; free (bp); TMulKS_free_cp: free (cp); TMulKS_free_ap: free (ap); #ifdef DEBUG fprintf (ECM_STDOUT, "%ums\n", elltime (st, cputime ())); #endif TMulKS_end: return ret; } #ifdef DEBUG void mpn_print (mp_ptr np, mp_size_t nn) { mp_size_t i; for (i = 0; i < nn; i++) fprintf (ECM_STDOUT, "+%lu*B^%u", np[i], i); fprintf (ECM_STDOUT, "\n"); } #endif unsigned int ks_wrapmul_m (unsigned int m0, unsigned int k, mpz_t n) { mp_size_t t, s; unsigned long i, fft_k, m; t = mpz_sizeinbase (n, 2); s = t * 2 + 1; for (i = k - 1; i; s++, i >>= 1); s = 1 + (s - 1) / GMP_NUMB_BITS; fft_k = mpn_fft_best_k (m0 * s, 0); i = mpn_fft_next_size (m0 * s, fft_k); while (i % s) i = mpn_fft_next_size (i + 1, fft_k); m = i / s; return m; } /* multiply in R[] A[0]+A[1]*x+...+A[k-1]*x^(k-1) by B[0]+B[1]*x+...+B[l-1]*x^(l-1) modulo n, wrapping around coefficients of the product up from degree m >= m0. Assumes k >= l. R is assumed to have 2*m0-3+list_mul_mem(m0-1) allocated cells. Return m (or 0 if an error occurred). */ unsigned int ks_wrapmul (listz_t R, unsigned int m0, listz_t A, unsigned int k, listz_t B, unsigned int l, mpz_t n) { unsigned long i, fft_k, m, t; mp_size_t s, size_t0, size_t1, size_tmp; mp_ptr t0_ptr, t1_ptr, t2_ptr, r_ptr, tp; int negative; ASSERT(k >= l); t = mpz_sizeinbase (n, 2); for (i = 0; i < k; i++) if (mpz_sgn (A[i]) < 0 || mpz_sizeinbase (A[i], 2) > t) mpz_mod (A[i], A[i], n); for (i = 0; i < l; i++) if (mpz_sgn (B[i]) < 0 || mpz_sizeinbase (B[i], 2) > t) mpz_mod (B[i], B[i], n); s = t * 2 + 1; /* one extra sign bit */ for (i = k - 1; i; s++, i >>= 1); s = 1 + (s - 1) / GMP_NUMB_BITS; size_t0 = s * k; size_t1 = s * l; /* allocate one double-buffer to save malloc/MPN_ZERO/free calls */ t0_ptr = (mp_ptr) malloc (size_t0 * sizeof (mp_limb_t)); if (t0_ptr == NULL) return 0; t1_ptr = (mp_ptr) malloc (size_t1 * sizeof (mp_limb_t)); if (t1_ptr == NULL) { free (t0_ptr); return 0; } MPN_ZERO (t0_ptr, size_t0); MPN_ZERO (t1_ptr, size_t1); for (i = 0; i < k; i++) if (SIZ(A[i])) MPN_COPY (t0_ptr + i * s, PTR(A[i]), SIZ(A[i])); for (i = 0; i < l; i++) if (SIZ(B[i])) MPN_COPY (t1_ptr + i * s, PTR(B[i]), SIZ(B[i])); fft_k = mpn_fft_best_k (m0 * s, 0); i = mpn_fft_next_size (m0 * s, fft_k); /* the following loop ensures we don't cut in the middle of a coefficient */ while (i % s) i = mpn_fft_next_size (i + 1, fft_k); m = i / s; ASSERT(m <= 2 * m0 - 3 + list_mul_mem (m0 - 1)); t2_ptr = (mp_ptr) malloc ((i + 1) * sizeof (mp_limb_t)); if (t2_ptr == NULL) { free (t0_ptr); free (t1_ptr); return 0; } mpn_mul_fft (t2_ptr, i, t0_ptr, size_t0, t1_ptr, size_t1, fft_k); for (i = 0, tp = t2_ptr, negative = 0; i < m; i++) { size_tmp = s; if (negative) /* previous was negative, add 1 */ mpn_add_1 (tp, tp, s, (mp_limb_t) 1); /* no need to check return value of mpn_add_1: if 1, then {tp, s} is now identically 0, and should remain so */ MPN_NORMALIZE(tp, size_tmp); if ((size_tmp == s) && (tp[s - 1] >> (GMP_NUMB_BITS - 1))) { negative = 1; mpn_com_n (tp, tp, s); mpn_add_1 (tp, tp, s, (mp_limb_t) 1); } else negative = 0; r_ptr = MPZ_REALLOC (R[i], size_tmp); if (size_tmp) MPN_COPY (r_ptr, tp, size_tmp); SIZ(R[i]) = (negative) ? -size_tmp : size_tmp; tp += s; } free (t0_ptr); free (t1_ptr); free (t2_ptr); return m; } ecm-6.4.4/Makefile.in0000644023561000001540000041450512113421165011265 00000000000000# Makefile.in generated by automake 1.11.3 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ EXTRA_PROGRAMS = rho$(EXEEXT) batch$(EXEEXT) $(am__EXEEXT_1) \ $(am__EXEEXT_2) @ENABLE_ASM_REDC_TRUE@am__append_1 = bench_mulredc test_mulredc @ENABLE_ASM_REDC_FALSE@am__append_2 = bench_mulredc bin_PROGRAMS = ecm$(EXEEXT) noinst_PROGRAMS = tune$(EXEEXT) ecmfactor$(EXEEXT) \ bench_mulredc$(EXEEXT) @MEMORY_DEBUG_TRUE@am__append_3 = memory.c @MEMORY_DEBUG_TRUE@am__append_4 = memory.c @WITH_GWNUM_TRUE@am__append_5 = Fgw.c @WITH_GWNUM_FALSE@ecm_DEPENDENCIES = libecm.la $(am__DEPENDENCIES_1) subdir = . DIST_COMMON = README $(am__configure_deps) $(dist_man_MANS) \ $(include_HEADERS) $(noinst_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in $(srcdir)/config.h.in \ $(top_srcdir)/configure AUTHORS COPYING COPYING.LIB ChangeLog \ INSTALL NEWS TODO compile config.guess config.sub depcomp \ install-sh ltmain.sh missing ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ configure.lineno config.status.lineno mkinstalldirs = $(install_sh) -d CONFIG_HEADER = config.h CONFIG_CLEAN_FILES = ecm-params.h mul_fft-params.h CONFIG_CLEAN_VPATH_FILES = am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" \ "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(includedir)" LTLIBRARIES = $(lib_LTLIBRARIES) libecm_la_DEPENDENCIES = $(MULREDCLIBRARY) am__libecm_la_SOURCES_DIST = ecm.c ecm2.c pm1.c pp1.c getprime.c \ listz.c lucas.c stage2.c toomcook.c mpmod.c mul_lo.c \ polyeval.c median.c schoen_strass.c ks-multiply.c rho.c \ bestd.c auxlib.c random.c factor.c sp.c spv.c spm.c mpzspm.c \ mpzspv.c ntt_gfp.c ecm_ntt.c pm1fs2.c mul_fft.c sets_long.c \ auxarith.c batch.c ellparam_batch.c memory.c Fgw.c @MEMORY_DEBUG_TRUE@am__objects_1 = libecm_la-memory.lo @WITH_GWNUM_TRUE@am__objects_2 = libecm_la-Fgw.lo am_libecm_la_OBJECTS = libecm_la-ecm.lo libecm_la-ecm2.lo \ libecm_la-pm1.lo libecm_la-pp1.lo libecm_la-getprime.lo \ libecm_la-listz.lo libecm_la-lucas.lo libecm_la-stage2.lo \ libecm_la-toomcook.lo libecm_la-mpmod.lo libecm_la-mul_lo.lo \ libecm_la-polyeval.lo libecm_la-median.lo \ libecm_la-schoen_strass.lo libecm_la-ks-multiply.lo \ libecm_la-rho.lo libecm_la-bestd.lo libecm_la-auxlib.lo \ libecm_la-random.lo libecm_la-factor.lo libecm_la-sp.lo \ libecm_la-spv.lo libecm_la-spm.lo libecm_la-mpzspm.lo \ libecm_la-mpzspv.lo libecm_la-ntt_gfp.lo libecm_la-ecm_ntt.lo \ libecm_la-pm1fs2.lo libecm_la-mul_fft.lo \ libecm_la-sets_long.lo libecm_la-auxarith.lo \ libecm_la-batch.lo libecm_la-ellparam_batch.lo \ $(am__objects_1) $(am__objects_2) libecm_la_OBJECTS = $(am_libecm_la_OBJECTS) libecm_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(libecm_la_CFLAGS) \ $(CFLAGS) $(libecm_la_LDFLAGS) $(LDFLAGS) -o $@ @ENABLE_ASM_REDC_TRUE@am__EXEEXT_1 = bench_mulredc$(EXEEXT) \ @ENABLE_ASM_REDC_TRUE@ test_mulredc$(EXEEXT) @ENABLE_ASM_REDC_FALSE@am__EXEEXT_2 = bench_mulredc$(EXEEXT) PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS) am_batch_OBJECTS = batch.$(OBJEXT) batch_OBJECTS = $(am_batch_OBJECTS) am__DEPENDENCIES_1 = batch_DEPENDENCIES = $(am__DEPENDENCIES_1) bench_mulredc_SOURCES = bench_mulredc.c bench_mulredc_OBJECTS = bench_mulredc-bench_mulredc.$(OBJEXT) @ENABLE_ASM_REDC_FALSE@bench_mulredc_DEPENDENCIES = \ @ENABLE_ASM_REDC_FALSE@ $(am__DEPENDENCIES_1) @ENABLE_ASM_REDC_TRUE@bench_mulredc_DEPENDENCIES = $(MULREDCLIBRARY) \ @ENABLE_ASM_REDC_TRUE@ $(am__DEPENDENCIES_1) am_ecm_OBJECTS = ecm-auxi.$(OBJEXT) ecm-b1_ainc.$(OBJEXT) \ ecm-candi.$(OBJEXT) ecm-eval.$(OBJEXT) ecm-random.$(OBJEXT) \ ecm-main.$(OBJEXT) ecm-resume.$(OBJEXT) ecm-getprime.$(OBJEXT) ecm_OBJECTS = $(am_ecm_OBJECTS) ecm_LDADD = $(LDADD) ecm_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(ecm_CFLAGS) $(CFLAGS) $(ecm_LDFLAGS) \ $(LDFLAGS) -o $@ ecmfactor_SOURCES = ecmfactor.c ecmfactor_OBJECTS = ecmfactor-ecmfactor.$(OBJEXT) ecmfactor_LDADD = $(LDADD) ecmfactor_DEPENDENCIES = libecm.la $(am__DEPENDENCIES_1) ecmfactor_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(ecmfactor_CFLAGS) \ $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ am_rho_OBJECTS = rho-rho.$(OBJEXT) rho_OBJECTS = $(am_rho_OBJECTS) rho_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) test_mulredc_SOURCES = test_mulredc.c test_mulredc_OBJECTS = test_mulredc-test_mulredc.$(OBJEXT) @ENABLE_ASM_REDC_TRUE@test_mulredc_DEPENDENCIES = $(MULREDCLIBRARY) \ @ENABLE_ASM_REDC_TRUE@ $(am__DEPENDENCIES_1) am__tune_SOURCES_DIST = mpmod.c tune.c mul_lo.c listz.c auxlib.c \ ks-multiply.c toomcook.c schoen_strass.c polyeval.c median.c \ ecm_ntt.c ntt_gfp.c mpzspv.c mpzspm.c sp.c spv.c spm.c \ random.c mul_fft.c auxarith.c memory.c @MEMORY_DEBUG_TRUE@am__objects_3 = tune-memory.$(OBJEXT) am_tune_OBJECTS = tune-mpmod.$(OBJEXT) tune-tune.$(OBJEXT) \ tune-mul_lo.$(OBJEXT) tune-listz.$(OBJEXT) \ tune-auxlib.$(OBJEXT) tune-ks-multiply.$(OBJEXT) \ tune-toomcook.$(OBJEXT) tune-schoen_strass.$(OBJEXT) \ tune-polyeval.$(OBJEXT) tune-median.$(OBJEXT) \ tune-ecm_ntt.$(OBJEXT) tune-ntt_gfp.$(OBJEXT) \ tune-mpzspv.$(OBJEXT) tune-mpzspm.$(OBJEXT) tune-sp.$(OBJEXT) \ tune-spv.$(OBJEXT) tune-spm.$(OBJEXT) tune-random.$(OBJEXT) \ tune-mul_fft.$(OBJEXT) tune-auxarith.$(OBJEXT) \ $(am__objects_3) tune_OBJECTS = $(am_tune_OBJECTS) tune_DEPENDENCIES = $(MULREDCLIBRARY) $(am__DEPENDENCIES_1) DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = $(SHELL) $(top_srcdir)/depcomp am__depfiles_maybe = depfiles am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) CCLD = $(CC) LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ SOURCES = $(libecm_la_SOURCES) $(batch_SOURCES) bench_mulredc.c \ $(ecm_SOURCES) ecmfactor.c $(rho_SOURCES) test_mulredc.c \ $(tune_SOURCES) DIST_SOURCES = $(am__libecm_la_SOURCES_DIST) $(batch_SOURCES) \ bench_mulredc.c $(ecm_SOURCES) ecmfactor.c $(rho_SOURCES) \ test_mulredc.c $(am__tune_SOURCES_DIST) RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ html-recursive info-recursive install-data-recursive \ install-dvi-recursive install-exec-recursive \ install-html-recursive install-info-recursive \ install-pdf-recursive install-ps-recursive install-recursive \ installcheck-recursive installdirs-recursive pdf-recursive \ ps-recursive uninstall-recursive man1dir = $(mandir)/man1 NROFF = nroff MANS = $(dist_man_MANS) HEADERS = $(include_HEADERS) $(noinst_HEADERS) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \ $(RECURSIVE_CLEAN_TARGETS:-recursive=) tags TAGS ctags CTAGS \ distdir dist dist-all distcheck ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) distdir = $(PACKAGE)-$(VERSION) top_distdir = $(distdir) am__remove_distdir = \ if test -d "$(distdir)"; then \ find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \ && rm -rf "$(distdir)" \ || { sleep 5 && rm -rf "$(distdir)"; }; \ else :; fi am__relativize = \ dir0=`pwd`; \ sed_first='s,^\([^/]*\)/.*$$,\1,'; \ sed_rest='s,^[^/]*/*,,'; \ sed_last='s,^.*/\([^/]*\)$$,\1,'; \ sed_butlast='s,/*[^/]*$$,,'; \ while test -n "$$dir1"; do \ first=`echo "$$dir1" | sed -e "$$sed_first"`; \ if test "$$first" != "."; then \ if test "$$first" = ".."; then \ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \ else \ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \ if test "$$first2" = "$$first"; then \ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \ else \ dir2="../$$dir2"; \ fi; \ dir0="$$dir0"/"$$first"; \ fi; \ fi; \ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \ done; \ reldir="$$dir2" DIST_ARCHIVES = $(distdir).tar.gz GZIP_ENV = --best distuninstallcheck_listfiles = find . -type f -print am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \ | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$' distcleancheck_listfiles = find . -type f -print ACLOCAL = @ACLOCAL@ ALLOCA = @ALLOCA@ AMTAR = @AMTAR@ AR = @AR@ ASMPATH = @ASMPATH@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCAS = @CCAS@ CCASDEPMODE = @CCASDEPMODE@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GMPLIB = @GMPLIB@ GREP = @GREP@ GSL_LD_FLAGS = @GSL_LD_FLAGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LD = @LD@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ M4 = @M4@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ POW_LIB = @POW_LIB@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VALGRIND = @VALGRIND@ VERSION = @VERSION@ XSLDIR = @XSLDIR@ XSLTPROC = @XSLTPROC@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ ACLOCAL_AMFLAGS = -I m4 # to not install libecm.la, we could write noinst_LTLIBRARIES instead of # lib_LTLIBRARIES below, however then libecm.a is not installed either # (see http://www.gnu.org/software/automake/manual/html_node/Libtool-Convenience-Libraries.html) lib_LTLIBRARIES = libecm.la # If we want assembly mulredc code, recurse into the right subdirectory # and set up variables to include the mulredc library from that subdir @ENABLE_ASM_REDC_TRUE@SUBDIRS = $(ASMPATH) @ENABLE_ASM_REDC_TRUE@MULREDCINCPATH = -I$(srcdir)/$(ASMPATH) @ENABLE_ASM_REDC_TRUE@MULREDCLIBRARY = $(builddir)/$(ASMPATH)/libmulredc.la @ENABLE_ASM_REDC_FALSE@CLEANFILES = bench_mulredc @ENABLE_ASM_REDC_TRUE@CLEANFILES = bench_mulredc test_mulredc @ENABLE_ASM_REDC_TRUE@bench_mulredc_CPPFLAGS = $(MULREDCINCPATH) @ENABLE_ASM_REDC_FALSE@bench_mulredc_LDADD = $(GMPLIB) @ENABLE_ASM_REDC_TRUE@bench_mulredc_LDADD = $(MULREDCLIBRARY) $(GMPLIB) @ENABLE_ASM_REDC_TRUE@test_mulredc_CPPFLAGS = $(MULREDCINCPATH) @ENABLE_ASM_REDC_TRUE@test_mulredc_LDADD = $(MULREDCLIBRARY) $(GMPLIB) libecm_la_SOURCES = ecm.c ecm2.c pm1.c pp1.c getprime.c listz.c \ lucas.c stage2.c toomcook.c mpmod.c mul_lo.c polyeval.c \ median.c schoen_strass.c ks-multiply.c rho.c bestd.c auxlib.c \ random.c factor.c sp.c spv.c spm.c mpzspm.c mpzspv.c ntt_gfp.c \ ecm_ntt.c pm1fs2.c mul_fft.c sets_long.c auxarith.c batch.c \ ellparam_batch.c $(am__append_3) $(am__append_5) # Link the asm redc code (if we use it) into libecm.la libecm_la_CPPFLAGS = $(MULREDCINCPATH) libecm_la_CFLAGS = $(OPENMP_CFLAGS) libecm_la_LDFLAGS = '-version-info 0:0:0' libecm_la_LIBADD = $(MULREDCLIBRARY) # Most binaries want to link libecm.la, and the ones which don't will # have their own _LDADD which overrides the default LDADD here LDADD = libecm.la $(GMPLIB) ecm_CPPFLAGS = -DOUTSIDE_LIBECM ecm_CFLAGS = $(OPENMP_CFLAGS) ecm_SOURCES = auxi.c b1_ainc.c candi.c eval.c random.c main.c \ resume.c getprime.c champions.h tune_SOURCES = mpmod.c tune.c mul_lo.c listz.c auxlib.c ks-multiply.c \ toomcook.c schoen_strass.c polyeval.c median.c ecm_ntt.c \ ntt_gfp.c mpzspv.c mpzspm.c sp.c spv.c spm.c random.c \ mul_fft.c auxarith.c $(am__append_4) tune_CPPFLAGS = -DTUNE $(MULREDCINCPATH) tune_LDADD = $(MULREDCLIBRARY) $(GMPLIB) ecmfactor_CFLAGS = $(OPENMP_CFLAGS) rho_SOURCES = rho.c rho_CPPFLAGS = -DTESTDRIVE rho_LDADD = -lprimegen $(GMPLIB) $(GSL_LD_FLAGS) batch_SOURCES = batch.c batch_LDADD = $(GMPLIB) @WITH_GWNUM_TRUE@ecm_DEPENDENCIES = gwdata.ld @WITH_GWNUM_TRUE@ecm_LDFLAGS = $(AM_LDFLAGS) -Wl,gwdata.ld include_HEADERS = ecm.h noinst_HEADERS = ecm-impl.h ecm-gmp.h ecm-ecm.h sp.h longlong.h ecm-params.h \ mpmod.h EXTRA_DIST = test.pm1 test.pp1 test.ecm README.lib INSTALL-ecm ecm.xml \ ecm-params.h.alpha-ev5 ecm-params.h.athlon64 \ ecm-params.h.default ecm-params.h.alpha-ev6 \ ecm-params.h.athlon ecm-params.h.powerpc7450 \ ecm-params.h.pentium3 ecm-params.h.pentium4 \ ecm-params.h.pentium-m ecm-params.h.powerpc970 \ ecm-params.h.mips64el ecm-params.h.armv5tel \ ecm-params.h.sparc64 ecm-params.h.ia64 \ ecm-params.h.hppa2.0 ecm-params.h.alpha-ev56 \ ecm-params.h.core2 ecm-params.h.corei5 \ mul_fft-params.h.athlon64 mul_fft-params.h.pentium3 \ mul_fft-params.h.default mul_fft-params.h.pentium4 DIST_SUBDIRS = athlon pentium4 x86_64 powerpc64 build.vc10 DISTCLEANFILES = config.m4 dist_man_MANS = ecm.1 all: config.h $(MAKE) $(AM_MAKEFLAGS) all-recursive .SUFFIXES: .SUFFIXES: .c .lo .o .obj am--refresh: Makefile @: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ echo ' cd $(srcdir) && $(AUTOMAKE) --gnu'; \ $(am__cd) $(srcdir) && $(AUTOMAKE) --gnu \ && exit 0; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ echo ' $(SHELL) ./config.status'; \ $(SHELL) ./config.status;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) $(SHELL) ./config.status --recheck $(top_srcdir)/configure: $(am__configure_deps) $(am__cd) $(srcdir) && $(AUTOCONF) $(ACLOCAL_M4): $(am__aclocal_m4_deps) $(am__cd) $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) $(am__aclocal_m4_deps): config.h: stamp-h1 @if test ! -f $@; then rm -f stamp-h1; else :; fi @if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status @rm -f stamp-h1 cd $(top_builddir) && $(SHELL) ./config.status config.h $(srcdir)/config.h.in: $(am__configure_deps) ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) rm -f stamp-h1 touch $@ distclean-hdr: -rm -f config.h stamp-h1 install-libLTLIBRARIES: $(lib_LTLIBRARIES) @$(NORMAL_INSTALL) test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)" @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ list2=; for p in $$list; do \ if test -f $$p; then \ list2="$$list2 $$p"; \ else :; fi; \ done; \ test -z "$$list2" || { \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ } uninstall-libLTLIBRARIES: @$(NORMAL_UNINSTALL) @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ for p in $$list; do \ $(am__strip_dir) \ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ done clean-libLTLIBRARIES: -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ test "$$dir" != "$$p" || dir=.; \ echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done libecm.la: $(libecm_la_OBJECTS) $(libecm_la_DEPENDENCIES) $(EXTRA_libecm_la_DEPENDENCIES) $(libecm_la_LINK) -rpath $(libdir) $(libecm_la_OBJECTS) $(libecm_la_LIBADD) $(LIBS) install-binPROGRAMS: $(bin_PROGRAMS) @$(NORMAL_INSTALL) test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)" @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ for p in $$list; do echo "$$p $$p"; done | \ sed 's/$(EXEEXT)$$//' | \ while read p p1; do if test -f $$p || test -f $$p1; \ then echo "$$p"; echo "$$p"; else :; fi; \ done | \ sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ sed 'N;N;N;s,\n, ,g' | \ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ if ($$2 == $$4) files[d] = files[d] " " $$1; \ else { print "f", $$3 "/" $$4, $$1; } } \ END { for (d in files) print "f", d, files[d] }' | \ while read type dir files; do \ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ test -z "$$files" || { \ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ } \ ; done uninstall-binPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ files=`for p in $$list; do echo "$$p"; done | \ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ -e 's/$$/$(EXEEXT)/' `; \ test -n "$$list" || exit 0; \ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ cd "$(DESTDIR)$(bindir)" && rm -f $$files clean-binPROGRAMS: @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list clean-noinstPROGRAMS: @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ echo " rm -f" $$list; \ rm -f $$list batch$(EXEEXT): $(batch_OBJECTS) $(batch_DEPENDENCIES) $(EXTRA_batch_DEPENDENCIES) @rm -f batch$(EXEEXT) $(LINK) $(batch_OBJECTS) $(batch_LDADD) $(LIBS) bench_mulredc$(EXEEXT): $(bench_mulredc_OBJECTS) $(bench_mulredc_DEPENDENCIES) $(EXTRA_bench_mulredc_DEPENDENCIES) @rm -f bench_mulredc$(EXEEXT) $(LINK) $(bench_mulredc_OBJECTS) $(bench_mulredc_LDADD) $(LIBS) ecm$(EXEEXT): $(ecm_OBJECTS) $(ecm_DEPENDENCIES) $(EXTRA_ecm_DEPENDENCIES) @rm -f ecm$(EXEEXT) $(ecm_LINK) $(ecm_OBJECTS) $(ecm_LDADD) $(LIBS) ecmfactor$(EXEEXT): $(ecmfactor_OBJECTS) $(ecmfactor_DEPENDENCIES) $(EXTRA_ecmfactor_DEPENDENCIES) @rm -f ecmfactor$(EXEEXT) $(ecmfactor_LINK) $(ecmfactor_OBJECTS) $(ecmfactor_LDADD) $(LIBS) rho$(EXEEXT): $(rho_OBJECTS) $(rho_DEPENDENCIES) $(EXTRA_rho_DEPENDENCIES) @rm -f rho$(EXEEXT) $(LINK) $(rho_OBJECTS) $(rho_LDADD) $(LIBS) test_mulredc$(EXEEXT): $(test_mulredc_OBJECTS) $(test_mulredc_DEPENDENCIES) $(EXTRA_test_mulredc_DEPENDENCIES) @rm -f test_mulredc$(EXEEXT) $(LINK) $(test_mulredc_OBJECTS) $(test_mulredc_LDADD) $(LIBS) tune$(EXEEXT): $(tune_OBJECTS) $(tune_DEPENDENCIES) $(EXTRA_tune_DEPENDENCIES) @rm -f tune$(EXEEXT) $(LINK) $(tune_OBJECTS) $(tune_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/batch.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bench_mulredc-bench_mulredc.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecm-auxi.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecm-b1_ainc.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecm-candi.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecm-eval.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecm-getprime.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecm-main.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecm-random.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecm-resume.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecmfactor-ecmfactor.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-Fgw.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-auxarith.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-auxlib.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-batch.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-bestd.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-ecm.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-ecm2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-ecm_ntt.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-ellparam_batch.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-factor.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-getprime.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-ks-multiply.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-listz.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-lucas.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-median.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-memory.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-mpmod.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-mpzspm.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-mpzspv.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-mul_fft.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-mul_lo.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-ntt_gfp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-pm1.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-pm1fs2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-polyeval.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-pp1.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-random.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-rho.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-schoen_strass.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-sets_long.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-sp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-spm.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-spv.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-stage2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libecm_la-toomcook.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rho-rho.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_mulredc-test_mulredc.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-auxarith.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-auxlib.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-ecm_ntt.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-ks-multiply.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-listz.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-median.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-memory.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-mpmod.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-mpzspm.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-mpzspv.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-mul_fft.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-mul_lo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-ntt_gfp.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-polyeval.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-random.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-schoen_strass.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-sp.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-spm.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-spv.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-toomcook.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tune-tune.Po@am__quote@ .c.o: @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(COMPILE) -c $< .c.obj: @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< libecm_la-ecm.lo: ecm.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-ecm.lo -MD -MP -MF $(DEPDIR)/libecm_la-ecm.Tpo -c -o libecm_la-ecm.lo `test -f 'ecm.c' || echo '$(srcdir)/'`ecm.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-ecm.Tpo $(DEPDIR)/libecm_la-ecm.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='ecm.c' object='libecm_la-ecm.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-ecm.lo `test -f 'ecm.c' || echo '$(srcdir)/'`ecm.c libecm_la-ecm2.lo: ecm2.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-ecm2.lo -MD -MP -MF $(DEPDIR)/libecm_la-ecm2.Tpo -c -o libecm_la-ecm2.lo `test -f 'ecm2.c' || echo '$(srcdir)/'`ecm2.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-ecm2.Tpo $(DEPDIR)/libecm_la-ecm2.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='ecm2.c' object='libecm_la-ecm2.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-ecm2.lo `test -f 'ecm2.c' || echo '$(srcdir)/'`ecm2.c libecm_la-pm1.lo: pm1.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-pm1.lo -MD -MP -MF $(DEPDIR)/libecm_la-pm1.Tpo -c -o libecm_la-pm1.lo `test -f 'pm1.c' || echo '$(srcdir)/'`pm1.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-pm1.Tpo $(DEPDIR)/libecm_la-pm1.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pm1.c' object='libecm_la-pm1.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-pm1.lo `test -f 'pm1.c' || echo '$(srcdir)/'`pm1.c libecm_la-pp1.lo: pp1.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-pp1.lo -MD -MP -MF $(DEPDIR)/libecm_la-pp1.Tpo -c -o libecm_la-pp1.lo `test -f 'pp1.c' || echo '$(srcdir)/'`pp1.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-pp1.Tpo $(DEPDIR)/libecm_la-pp1.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pp1.c' object='libecm_la-pp1.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-pp1.lo `test -f 'pp1.c' || echo '$(srcdir)/'`pp1.c libecm_la-getprime.lo: getprime.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-getprime.lo -MD -MP -MF $(DEPDIR)/libecm_la-getprime.Tpo -c -o libecm_la-getprime.lo `test -f 'getprime.c' || echo '$(srcdir)/'`getprime.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-getprime.Tpo $(DEPDIR)/libecm_la-getprime.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='getprime.c' object='libecm_la-getprime.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-getprime.lo `test -f 'getprime.c' || echo '$(srcdir)/'`getprime.c libecm_la-listz.lo: listz.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-listz.lo -MD -MP -MF $(DEPDIR)/libecm_la-listz.Tpo -c -o libecm_la-listz.lo `test -f 'listz.c' || echo '$(srcdir)/'`listz.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-listz.Tpo $(DEPDIR)/libecm_la-listz.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='listz.c' object='libecm_la-listz.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-listz.lo `test -f 'listz.c' || echo '$(srcdir)/'`listz.c libecm_la-lucas.lo: lucas.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-lucas.lo -MD -MP -MF $(DEPDIR)/libecm_la-lucas.Tpo -c -o libecm_la-lucas.lo `test -f 'lucas.c' || echo '$(srcdir)/'`lucas.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-lucas.Tpo $(DEPDIR)/libecm_la-lucas.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='lucas.c' object='libecm_la-lucas.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-lucas.lo `test -f 'lucas.c' || echo '$(srcdir)/'`lucas.c libecm_la-stage2.lo: stage2.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-stage2.lo -MD -MP -MF $(DEPDIR)/libecm_la-stage2.Tpo -c -o libecm_la-stage2.lo `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-stage2.Tpo $(DEPDIR)/libecm_la-stage2.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='stage2.c' object='libecm_la-stage2.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-stage2.lo `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c libecm_la-toomcook.lo: toomcook.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-toomcook.lo -MD -MP -MF $(DEPDIR)/libecm_la-toomcook.Tpo -c -o libecm_la-toomcook.lo `test -f 'toomcook.c' || echo '$(srcdir)/'`toomcook.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-toomcook.Tpo $(DEPDIR)/libecm_la-toomcook.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='toomcook.c' object='libecm_la-toomcook.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-toomcook.lo `test -f 'toomcook.c' || echo '$(srcdir)/'`toomcook.c libecm_la-mpmod.lo: mpmod.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-mpmod.lo -MD -MP -MF $(DEPDIR)/libecm_la-mpmod.Tpo -c -o libecm_la-mpmod.lo `test -f 'mpmod.c' || echo '$(srcdir)/'`mpmod.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-mpmod.Tpo $(DEPDIR)/libecm_la-mpmod.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='mpmod.c' object='libecm_la-mpmod.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-mpmod.lo `test -f 'mpmod.c' || echo '$(srcdir)/'`mpmod.c libecm_la-mul_lo.lo: mul_lo.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-mul_lo.lo -MD -MP -MF $(DEPDIR)/libecm_la-mul_lo.Tpo -c -o libecm_la-mul_lo.lo `test -f 'mul_lo.c' || echo '$(srcdir)/'`mul_lo.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-mul_lo.Tpo $(DEPDIR)/libecm_la-mul_lo.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='mul_lo.c' object='libecm_la-mul_lo.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-mul_lo.lo `test -f 'mul_lo.c' || echo '$(srcdir)/'`mul_lo.c libecm_la-polyeval.lo: polyeval.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-polyeval.lo -MD -MP -MF $(DEPDIR)/libecm_la-polyeval.Tpo -c -o libecm_la-polyeval.lo `test -f 'polyeval.c' || echo '$(srcdir)/'`polyeval.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-polyeval.Tpo $(DEPDIR)/libecm_la-polyeval.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='polyeval.c' object='libecm_la-polyeval.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-polyeval.lo `test -f 'polyeval.c' || echo '$(srcdir)/'`polyeval.c libecm_la-median.lo: median.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-median.lo -MD -MP -MF $(DEPDIR)/libecm_la-median.Tpo -c -o libecm_la-median.lo `test -f 'median.c' || echo '$(srcdir)/'`median.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-median.Tpo $(DEPDIR)/libecm_la-median.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='median.c' object='libecm_la-median.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-median.lo `test -f 'median.c' || echo '$(srcdir)/'`median.c libecm_la-schoen_strass.lo: schoen_strass.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-schoen_strass.lo -MD -MP -MF $(DEPDIR)/libecm_la-schoen_strass.Tpo -c -o libecm_la-schoen_strass.lo `test -f 'schoen_strass.c' || echo '$(srcdir)/'`schoen_strass.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-schoen_strass.Tpo $(DEPDIR)/libecm_la-schoen_strass.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='schoen_strass.c' object='libecm_la-schoen_strass.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-schoen_strass.lo `test -f 'schoen_strass.c' || echo '$(srcdir)/'`schoen_strass.c libecm_la-ks-multiply.lo: ks-multiply.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-ks-multiply.lo -MD -MP -MF $(DEPDIR)/libecm_la-ks-multiply.Tpo -c -o libecm_la-ks-multiply.lo `test -f 'ks-multiply.c' || echo '$(srcdir)/'`ks-multiply.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-ks-multiply.Tpo $(DEPDIR)/libecm_la-ks-multiply.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='ks-multiply.c' object='libecm_la-ks-multiply.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-ks-multiply.lo `test -f 'ks-multiply.c' || echo '$(srcdir)/'`ks-multiply.c libecm_la-rho.lo: rho.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-rho.lo -MD -MP -MF $(DEPDIR)/libecm_la-rho.Tpo -c -o libecm_la-rho.lo `test -f 'rho.c' || echo '$(srcdir)/'`rho.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-rho.Tpo $(DEPDIR)/libecm_la-rho.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='rho.c' object='libecm_la-rho.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-rho.lo `test -f 'rho.c' || echo '$(srcdir)/'`rho.c libecm_la-bestd.lo: bestd.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-bestd.lo -MD -MP -MF $(DEPDIR)/libecm_la-bestd.Tpo -c -o libecm_la-bestd.lo `test -f 'bestd.c' || echo '$(srcdir)/'`bestd.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-bestd.Tpo $(DEPDIR)/libecm_la-bestd.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='bestd.c' object='libecm_la-bestd.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-bestd.lo `test -f 'bestd.c' || echo '$(srcdir)/'`bestd.c libecm_la-auxlib.lo: auxlib.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-auxlib.lo -MD -MP -MF $(DEPDIR)/libecm_la-auxlib.Tpo -c -o libecm_la-auxlib.lo `test -f 'auxlib.c' || echo '$(srcdir)/'`auxlib.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-auxlib.Tpo $(DEPDIR)/libecm_la-auxlib.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='auxlib.c' object='libecm_la-auxlib.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-auxlib.lo `test -f 'auxlib.c' || echo '$(srcdir)/'`auxlib.c libecm_la-random.lo: random.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-random.lo -MD -MP -MF $(DEPDIR)/libecm_la-random.Tpo -c -o libecm_la-random.lo `test -f 'random.c' || echo '$(srcdir)/'`random.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-random.Tpo $(DEPDIR)/libecm_la-random.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='random.c' object='libecm_la-random.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-random.lo `test -f 'random.c' || echo '$(srcdir)/'`random.c libecm_la-factor.lo: factor.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-factor.lo -MD -MP -MF $(DEPDIR)/libecm_la-factor.Tpo -c -o libecm_la-factor.lo `test -f 'factor.c' || echo '$(srcdir)/'`factor.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-factor.Tpo $(DEPDIR)/libecm_la-factor.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='factor.c' object='libecm_la-factor.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-factor.lo `test -f 'factor.c' || echo '$(srcdir)/'`factor.c libecm_la-sp.lo: sp.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-sp.lo -MD -MP -MF $(DEPDIR)/libecm_la-sp.Tpo -c -o libecm_la-sp.lo `test -f 'sp.c' || echo '$(srcdir)/'`sp.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-sp.Tpo $(DEPDIR)/libecm_la-sp.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sp.c' object='libecm_la-sp.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-sp.lo `test -f 'sp.c' || echo '$(srcdir)/'`sp.c libecm_la-spv.lo: spv.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-spv.lo -MD -MP -MF $(DEPDIR)/libecm_la-spv.Tpo -c -o libecm_la-spv.lo `test -f 'spv.c' || echo '$(srcdir)/'`spv.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-spv.Tpo $(DEPDIR)/libecm_la-spv.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='spv.c' object='libecm_la-spv.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-spv.lo `test -f 'spv.c' || echo '$(srcdir)/'`spv.c libecm_la-spm.lo: spm.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-spm.lo -MD -MP -MF $(DEPDIR)/libecm_la-spm.Tpo -c -o libecm_la-spm.lo `test -f 'spm.c' || echo '$(srcdir)/'`spm.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-spm.Tpo $(DEPDIR)/libecm_la-spm.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='spm.c' object='libecm_la-spm.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-spm.lo `test -f 'spm.c' || echo '$(srcdir)/'`spm.c libecm_la-mpzspm.lo: mpzspm.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-mpzspm.lo -MD -MP -MF $(DEPDIR)/libecm_la-mpzspm.Tpo -c -o libecm_la-mpzspm.lo `test -f 'mpzspm.c' || echo '$(srcdir)/'`mpzspm.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-mpzspm.Tpo $(DEPDIR)/libecm_la-mpzspm.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='mpzspm.c' object='libecm_la-mpzspm.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-mpzspm.lo `test -f 'mpzspm.c' || echo '$(srcdir)/'`mpzspm.c libecm_la-mpzspv.lo: mpzspv.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-mpzspv.lo -MD -MP -MF $(DEPDIR)/libecm_la-mpzspv.Tpo -c -o libecm_la-mpzspv.lo `test -f 'mpzspv.c' || echo '$(srcdir)/'`mpzspv.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-mpzspv.Tpo $(DEPDIR)/libecm_la-mpzspv.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='mpzspv.c' object='libecm_la-mpzspv.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-mpzspv.lo `test -f 'mpzspv.c' || echo '$(srcdir)/'`mpzspv.c libecm_la-ntt_gfp.lo: ntt_gfp.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-ntt_gfp.lo -MD -MP -MF $(DEPDIR)/libecm_la-ntt_gfp.Tpo -c -o libecm_la-ntt_gfp.lo `test -f 'ntt_gfp.c' || echo '$(srcdir)/'`ntt_gfp.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-ntt_gfp.Tpo $(DEPDIR)/libecm_la-ntt_gfp.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='ntt_gfp.c' object='libecm_la-ntt_gfp.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-ntt_gfp.lo `test -f 'ntt_gfp.c' || echo '$(srcdir)/'`ntt_gfp.c libecm_la-ecm_ntt.lo: ecm_ntt.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-ecm_ntt.lo -MD -MP -MF $(DEPDIR)/libecm_la-ecm_ntt.Tpo -c -o libecm_la-ecm_ntt.lo `test -f 'ecm_ntt.c' || echo '$(srcdir)/'`ecm_ntt.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-ecm_ntt.Tpo $(DEPDIR)/libecm_la-ecm_ntt.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='ecm_ntt.c' object='libecm_la-ecm_ntt.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-ecm_ntt.lo `test -f 'ecm_ntt.c' || echo '$(srcdir)/'`ecm_ntt.c libecm_la-pm1fs2.lo: pm1fs2.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-pm1fs2.lo -MD -MP -MF $(DEPDIR)/libecm_la-pm1fs2.Tpo -c -o libecm_la-pm1fs2.lo `test -f 'pm1fs2.c' || echo '$(srcdir)/'`pm1fs2.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-pm1fs2.Tpo $(DEPDIR)/libecm_la-pm1fs2.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='pm1fs2.c' object='libecm_la-pm1fs2.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-pm1fs2.lo `test -f 'pm1fs2.c' || echo '$(srcdir)/'`pm1fs2.c libecm_la-mul_fft.lo: mul_fft.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-mul_fft.lo -MD -MP -MF $(DEPDIR)/libecm_la-mul_fft.Tpo -c -o libecm_la-mul_fft.lo `test -f 'mul_fft.c' || echo '$(srcdir)/'`mul_fft.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-mul_fft.Tpo $(DEPDIR)/libecm_la-mul_fft.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='mul_fft.c' object='libecm_la-mul_fft.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-mul_fft.lo `test -f 'mul_fft.c' || echo '$(srcdir)/'`mul_fft.c libecm_la-sets_long.lo: sets_long.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-sets_long.lo -MD -MP -MF $(DEPDIR)/libecm_la-sets_long.Tpo -c -o libecm_la-sets_long.lo `test -f 'sets_long.c' || echo '$(srcdir)/'`sets_long.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-sets_long.Tpo $(DEPDIR)/libecm_la-sets_long.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sets_long.c' object='libecm_la-sets_long.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-sets_long.lo `test -f 'sets_long.c' || echo '$(srcdir)/'`sets_long.c libecm_la-auxarith.lo: auxarith.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-auxarith.lo -MD -MP -MF $(DEPDIR)/libecm_la-auxarith.Tpo -c -o libecm_la-auxarith.lo `test -f 'auxarith.c' || echo '$(srcdir)/'`auxarith.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-auxarith.Tpo $(DEPDIR)/libecm_la-auxarith.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='auxarith.c' object='libecm_la-auxarith.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-auxarith.lo `test -f 'auxarith.c' || echo '$(srcdir)/'`auxarith.c libecm_la-batch.lo: batch.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-batch.lo -MD -MP -MF $(DEPDIR)/libecm_la-batch.Tpo -c -o libecm_la-batch.lo `test -f 'batch.c' || echo '$(srcdir)/'`batch.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-batch.Tpo $(DEPDIR)/libecm_la-batch.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='batch.c' object='libecm_la-batch.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-batch.lo `test -f 'batch.c' || echo '$(srcdir)/'`batch.c libecm_la-ellparam_batch.lo: ellparam_batch.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-ellparam_batch.lo -MD -MP -MF $(DEPDIR)/libecm_la-ellparam_batch.Tpo -c -o libecm_la-ellparam_batch.lo `test -f 'ellparam_batch.c' || echo '$(srcdir)/'`ellparam_batch.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-ellparam_batch.Tpo $(DEPDIR)/libecm_la-ellparam_batch.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='ellparam_batch.c' object='libecm_la-ellparam_batch.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-ellparam_batch.lo `test -f 'ellparam_batch.c' || echo '$(srcdir)/'`ellparam_batch.c libecm_la-memory.lo: memory.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-memory.lo -MD -MP -MF $(DEPDIR)/libecm_la-memory.Tpo -c -o libecm_la-memory.lo `test -f 'memory.c' || echo '$(srcdir)/'`memory.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-memory.Tpo $(DEPDIR)/libecm_la-memory.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='memory.c' object='libecm_la-memory.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-memory.lo `test -f 'memory.c' || echo '$(srcdir)/'`memory.c libecm_la-Fgw.lo: Fgw.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -MT libecm_la-Fgw.lo -MD -MP -MF $(DEPDIR)/libecm_la-Fgw.Tpo -c -o libecm_la-Fgw.lo `test -f 'Fgw.c' || echo '$(srcdir)/'`Fgw.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libecm_la-Fgw.Tpo $(DEPDIR)/libecm_la-Fgw.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='Fgw.c' object='libecm_la-Fgw.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libecm_la_CPPFLAGS) $(CPPFLAGS) $(libecm_la_CFLAGS) $(CFLAGS) -c -o libecm_la-Fgw.lo `test -f 'Fgw.c' || echo '$(srcdir)/'`Fgw.c bench_mulredc-bench_mulredc.o: bench_mulredc.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(bench_mulredc_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT bench_mulredc-bench_mulredc.o -MD -MP -MF $(DEPDIR)/bench_mulredc-bench_mulredc.Tpo -c -o bench_mulredc-bench_mulredc.o `test -f 'bench_mulredc.c' || echo '$(srcdir)/'`bench_mulredc.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/bench_mulredc-bench_mulredc.Tpo $(DEPDIR)/bench_mulredc-bench_mulredc.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='bench_mulredc.c' object='bench_mulredc-bench_mulredc.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(bench_mulredc_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o bench_mulredc-bench_mulredc.o `test -f 'bench_mulredc.c' || echo '$(srcdir)/'`bench_mulredc.c bench_mulredc-bench_mulredc.obj: bench_mulredc.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(bench_mulredc_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT bench_mulredc-bench_mulredc.obj -MD -MP -MF $(DEPDIR)/bench_mulredc-bench_mulredc.Tpo -c -o bench_mulredc-bench_mulredc.obj `if test -f 'bench_mulredc.c'; then $(CYGPATH_W) 'bench_mulredc.c'; else $(CYGPATH_W) '$(srcdir)/bench_mulredc.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/bench_mulredc-bench_mulredc.Tpo $(DEPDIR)/bench_mulredc-bench_mulredc.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='bench_mulredc.c' object='bench_mulredc-bench_mulredc.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(bench_mulredc_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o bench_mulredc-bench_mulredc.obj `if test -f 'bench_mulredc.c'; then $(CYGPATH_W) 'bench_mulredc.c'; else $(CYGPATH_W) '$(srcdir)/bench_mulredc.c'; fi` ecm-auxi.o: auxi.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-auxi.o -MD -MP -MF $(DEPDIR)/ecm-auxi.Tpo -c -o ecm-auxi.o `test -f 'auxi.c' || echo '$(srcdir)/'`auxi.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-auxi.Tpo $(DEPDIR)/ecm-auxi.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='auxi.c' object='ecm-auxi.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-auxi.o `test -f 'auxi.c' || echo '$(srcdir)/'`auxi.c ecm-auxi.obj: auxi.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-auxi.obj -MD -MP -MF $(DEPDIR)/ecm-auxi.Tpo -c -o ecm-auxi.obj `if test -f 'auxi.c'; then $(CYGPATH_W) 'auxi.c'; else $(CYGPATH_W) '$(srcdir)/auxi.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-auxi.Tpo $(DEPDIR)/ecm-auxi.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='auxi.c' object='ecm-auxi.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-auxi.obj `if test -f 'auxi.c'; then $(CYGPATH_W) 'auxi.c'; else $(CYGPATH_W) '$(srcdir)/auxi.c'; fi` ecm-b1_ainc.o: b1_ainc.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-b1_ainc.o -MD -MP -MF $(DEPDIR)/ecm-b1_ainc.Tpo -c -o ecm-b1_ainc.o `test -f 'b1_ainc.c' || echo '$(srcdir)/'`b1_ainc.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-b1_ainc.Tpo $(DEPDIR)/ecm-b1_ainc.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='b1_ainc.c' object='ecm-b1_ainc.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-b1_ainc.o `test -f 'b1_ainc.c' || echo '$(srcdir)/'`b1_ainc.c ecm-b1_ainc.obj: b1_ainc.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-b1_ainc.obj -MD -MP -MF $(DEPDIR)/ecm-b1_ainc.Tpo -c -o ecm-b1_ainc.obj `if test -f 'b1_ainc.c'; then $(CYGPATH_W) 'b1_ainc.c'; else $(CYGPATH_W) '$(srcdir)/b1_ainc.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-b1_ainc.Tpo $(DEPDIR)/ecm-b1_ainc.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='b1_ainc.c' object='ecm-b1_ainc.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-b1_ainc.obj `if test -f 'b1_ainc.c'; then $(CYGPATH_W) 'b1_ainc.c'; else $(CYGPATH_W) '$(srcdir)/b1_ainc.c'; fi` ecm-candi.o: candi.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-candi.o -MD -MP -MF $(DEPDIR)/ecm-candi.Tpo -c -o ecm-candi.o `test -f 'candi.c' || echo '$(srcdir)/'`candi.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-candi.Tpo $(DEPDIR)/ecm-candi.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='candi.c' object='ecm-candi.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-candi.o `test -f 'candi.c' || echo '$(srcdir)/'`candi.c ecm-candi.obj: candi.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-candi.obj -MD -MP -MF $(DEPDIR)/ecm-candi.Tpo -c -o ecm-candi.obj `if test -f 'candi.c'; then $(CYGPATH_W) 'candi.c'; else $(CYGPATH_W) '$(srcdir)/candi.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-candi.Tpo $(DEPDIR)/ecm-candi.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='candi.c' object='ecm-candi.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-candi.obj `if test -f 'candi.c'; then $(CYGPATH_W) 'candi.c'; else $(CYGPATH_W) '$(srcdir)/candi.c'; fi` ecm-eval.o: eval.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-eval.o -MD -MP -MF $(DEPDIR)/ecm-eval.Tpo -c -o ecm-eval.o `test -f 'eval.c' || echo '$(srcdir)/'`eval.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-eval.Tpo $(DEPDIR)/ecm-eval.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='eval.c' object='ecm-eval.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-eval.o `test -f 'eval.c' || echo '$(srcdir)/'`eval.c ecm-eval.obj: eval.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-eval.obj -MD -MP -MF $(DEPDIR)/ecm-eval.Tpo -c -o ecm-eval.obj `if test -f 'eval.c'; then $(CYGPATH_W) 'eval.c'; else $(CYGPATH_W) '$(srcdir)/eval.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-eval.Tpo $(DEPDIR)/ecm-eval.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='eval.c' object='ecm-eval.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-eval.obj `if test -f 'eval.c'; then $(CYGPATH_W) 'eval.c'; else $(CYGPATH_W) '$(srcdir)/eval.c'; fi` ecm-random.o: random.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-random.o -MD -MP -MF $(DEPDIR)/ecm-random.Tpo -c -o ecm-random.o `test -f 'random.c' || echo '$(srcdir)/'`random.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-random.Tpo $(DEPDIR)/ecm-random.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='random.c' object='ecm-random.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-random.o `test -f 'random.c' || echo '$(srcdir)/'`random.c ecm-random.obj: random.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-random.obj -MD -MP -MF $(DEPDIR)/ecm-random.Tpo -c -o ecm-random.obj `if test -f 'random.c'; then $(CYGPATH_W) 'random.c'; else $(CYGPATH_W) '$(srcdir)/random.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-random.Tpo $(DEPDIR)/ecm-random.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='random.c' object='ecm-random.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-random.obj `if test -f 'random.c'; then $(CYGPATH_W) 'random.c'; else $(CYGPATH_W) '$(srcdir)/random.c'; fi` ecm-main.o: main.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-main.o -MD -MP -MF $(DEPDIR)/ecm-main.Tpo -c -o ecm-main.o `test -f 'main.c' || echo '$(srcdir)/'`main.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-main.Tpo $(DEPDIR)/ecm-main.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='main.c' object='ecm-main.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-main.o `test -f 'main.c' || echo '$(srcdir)/'`main.c ecm-main.obj: main.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-main.obj -MD -MP -MF $(DEPDIR)/ecm-main.Tpo -c -o ecm-main.obj `if test -f 'main.c'; then $(CYGPATH_W) 'main.c'; else $(CYGPATH_W) '$(srcdir)/main.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-main.Tpo $(DEPDIR)/ecm-main.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='main.c' object='ecm-main.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-main.obj `if test -f 'main.c'; then $(CYGPATH_W) 'main.c'; else $(CYGPATH_W) '$(srcdir)/main.c'; fi` ecm-resume.o: resume.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-resume.o -MD -MP -MF $(DEPDIR)/ecm-resume.Tpo -c -o ecm-resume.o `test -f 'resume.c' || echo '$(srcdir)/'`resume.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-resume.Tpo $(DEPDIR)/ecm-resume.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='resume.c' object='ecm-resume.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-resume.o `test -f 'resume.c' || echo '$(srcdir)/'`resume.c ecm-resume.obj: resume.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-resume.obj -MD -MP -MF $(DEPDIR)/ecm-resume.Tpo -c -o ecm-resume.obj `if test -f 'resume.c'; then $(CYGPATH_W) 'resume.c'; else $(CYGPATH_W) '$(srcdir)/resume.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-resume.Tpo $(DEPDIR)/ecm-resume.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='resume.c' object='ecm-resume.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-resume.obj `if test -f 'resume.c'; then $(CYGPATH_W) 'resume.c'; else $(CYGPATH_W) '$(srcdir)/resume.c'; fi` ecm-getprime.o: getprime.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-getprime.o -MD -MP -MF $(DEPDIR)/ecm-getprime.Tpo -c -o ecm-getprime.o `test -f 'getprime.c' || echo '$(srcdir)/'`getprime.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-getprime.Tpo $(DEPDIR)/ecm-getprime.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='getprime.c' object='ecm-getprime.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-getprime.o `test -f 'getprime.c' || echo '$(srcdir)/'`getprime.c ecm-getprime.obj: getprime.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -MT ecm-getprime.obj -MD -MP -MF $(DEPDIR)/ecm-getprime.Tpo -c -o ecm-getprime.obj `if test -f 'getprime.c'; then $(CYGPATH_W) 'getprime.c'; else $(CYGPATH_W) '$(srcdir)/getprime.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecm-getprime.Tpo $(DEPDIR)/ecm-getprime.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='getprime.c' object='ecm-getprime.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(ecm_CPPFLAGS) $(CPPFLAGS) $(ecm_CFLAGS) $(CFLAGS) -c -o ecm-getprime.obj `if test -f 'getprime.c'; then $(CYGPATH_W) 'getprime.c'; else $(CYGPATH_W) '$(srcdir)/getprime.c'; fi` ecmfactor-ecmfactor.o: ecmfactor.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ecmfactor_CFLAGS) $(CFLAGS) -MT ecmfactor-ecmfactor.o -MD -MP -MF $(DEPDIR)/ecmfactor-ecmfactor.Tpo -c -o ecmfactor-ecmfactor.o `test -f 'ecmfactor.c' || echo '$(srcdir)/'`ecmfactor.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecmfactor-ecmfactor.Tpo $(DEPDIR)/ecmfactor-ecmfactor.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='ecmfactor.c' object='ecmfactor-ecmfactor.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ecmfactor_CFLAGS) $(CFLAGS) -c -o ecmfactor-ecmfactor.o `test -f 'ecmfactor.c' || echo '$(srcdir)/'`ecmfactor.c ecmfactor-ecmfactor.obj: ecmfactor.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ecmfactor_CFLAGS) $(CFLAGS) -MT ecmfactor-ecmfactor.obj -MD -MP -MF $(DEPDIR)/ecmfactor-ecmfactor.Tpo -c -o ecmfactor-ecmfactor.obj `if test -f 'ecmfactor.c'; then $(CYGPATH_W) 'ecmfactor.c'; else $(CYGPATH_W) '$(srcdir)/ecmfactor.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/ecmfactor-ecmfactor.Tpo $(DEPDIR)/ecmfactor-ecmfactor.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='ecmfactor.c' object='ecmfactor-ecmfactor.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ecmfactor_CFLAGS) $(CFLAGS) -c -o ecmfactor-ecmfactor.obj `if test -f 'ecmfactor.c'; then $(CYGPATH_W) 'ecmfactor.c'; else $(CYGPATH_W) '$(srcdir)/ecmfactor.c'; fi` rho-rho.o: rho.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(rho_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT rho-rho.o -MD -MP -MF $(DEPDIR)/rho-rho.Tpo -c -o rho-rho.o `test -f 'rho.c' || echo '$(srcdir)/'`rho.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/rho-rho.Tpo $(DEPDIR)/rho-rho.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='rho.c' object='rho-rho.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(rho_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o rho-rho.o `test -f 'rho.c' || echo '$(srcdir)/'`rho.c rho-rho.obj: rho.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(rho_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT rho-rho.obj -MD -MP -MF $(DEPDIR)/rho-rho.Tpo -c -o rho-rho.obj `if test -f 'rho.c'; then $(CYGPATH_W) 'rho.c'; else $(CYGPATH_W) '$(srcdir)/rho.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/rho-rho.Tpo $(DEPDIR)/rho-rho.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='rho.c' object='rho-rho.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(rho_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o rho-rho.obj `if test -f 'rho.c'; then $(CYGPATH_W) 'rho.c'; else $(CYGPATH_W) '$(srcdir)/rho.c'; fi` test_mulredc-test_mulredc.o: test_mulredc.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_mulredc_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT test_mulredc-test_mulredc.o -MD -MP -MF $(DEPDIR)/test_mulredc-test_mulredc.Tpo -c -o test_mulredc-test_mulredc.o `test -f 'test_mulredc.c' || echo '$(srcdir)/'`test_mulredc.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/test_mulredc-test_mulredc.Tpo $(DEPDIR)/test_mulredc-test_mulredc.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_mulredc.c' object='test_mulredc-test_mulredc.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_mulredc_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o test_mulredc-test_mulredc.o `test -f 'test_mulredc.c' || echo '$(srcdir)/'`test_mulredc.c test_mulredc-test_mulredc.obj: test_mulredc.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_mulredc_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT test_mulredc-test_mulredc.obj -MD -MP -MF $(DEPDIR)/test_mulredc-test_mulredc.Tpo -c -o test_mulredc-test_mulredc.obj `if test -f 'test_mulredc.c'; then $(CYGPATH_W) 'test_mulredc.c'; else $(CYGPATH_W) '$(srcdir)/test_mulredc.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/test_mulredc-test_mulredc.Tpo $(DEPDIR)/test_mulredc-test_mulredc.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='test_mulredc.c' object='test_mulredc-test_mulredc.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(test_mulredc_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o test_mulredc-test_mulredc.obj `if test -f 'test_mulredc.c'; then $(CYGPATH_W) 'test_mulredc.c'; else $(CYGPATH_W) '$(srcdir)/test_mulredc.c'; fi` tune-mpmod.o: mpmod.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-mpmod.o -MD -MP -MF $(DEPDIR)/tune-mpmod.Tpo -c -o tune-mpmod.o `test -f 'mpmod.c' || echo '$(srcdir)/'`mpmod.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-mpmod.Tpo $(DEPDIR)/tune-mpmod.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='mpmod.c' object='tune-mpmod.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-mpmod.o `test -f 'mpmod.c' || echo '$(srcdir)/'`mpmod.c tune-mpmod.obj: mpmod.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-mpmod.obj -MD -MP -MF $(DEPDIR)/tune-mpmod.Tpo -c -o tune-mpmod.obj `if test -f 'mpmod.c'; then $(CYGPATH_W) 'mpmod.c'; else $(CYGPATH_W) '$(srcdir)/mpmod.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-mpmod.Tpo $(DEPDIR)/tune-mpmod.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='mpmod.c' object='tune-mpmod.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-mpmod.obj `if test -f 'mpmod.c'; then $(CYGPATH_W) 'mpmod.c'; else $(CYGPATH_W) '$(srcdir)/mpmod.c'; fi` tune-tune.o: tune.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-tune.o -MD -MP -MF $(DEPDIR)/tune-tune.Tpo -c -o tune-tune.o `test -f 'tune.c' || echo '$(srcdir)/'`tune.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-tune.Tpo $(DEPDIR)/tune-tune.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='tune.c' object='tune-tune.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-tune.o `test -f 'tune.c' || echo '$(srcdir)/'`tune.c tune-tune.obj: tune.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-tune.obj -MD -MP -MF $(DEPDIR)/tune-tune.Tpo -c -o tune-tune.obj `if test -f 'tune.c'; then $(CYGPATH_W) 'tune.c'; else $(CYGPATH_W) '$(srcdir)/tune.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-tune.Tpo $(DEPDIR)/tune-tune.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='tune.c' object='tune-tune.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-tune.obj `if test -f 'tune.c'; then $(CYGPATH_W) 'tune.c'; else $(CYGPATH_W) '$(srcdir)/tune.c'; fi` tune-mul_lo.o: mul_lo.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-mul_lo.o -MD -MP -MF $(DEPDIR)/tune-mul_lo.Tpo -c -o tune-mul_lo.o `test -f 'mul_lo.c' || echo '$(srcdir)/'`mul_lo.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-mul_lo.Tpo $(DEPDIR)/tune-mul_lo.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='mul_lo.c' object='tune-mul_lo.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-mul_lo.o `test -f 'mul_lo.c' || echo '$(srcdir)/'`mul_lo.c tune-mul_lo.obj: mul_lo.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-mul_lo.obj -MD -MP -MF $(DEPDIR)/tune-mul_lo.Tpo -c -o tune-mul_lo.obj `if test -f 'mul_lo.c'; then $(CYGPATH_W) 'mul_lo.c'; else $(CYGPATH_W) '$(srcdir)/mul_lo.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-mul_lo.Tpo $(DEPDIR)/tune-mul_lo.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='mul_lo.c' object='tune-mul_lo.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-mul_lo.obj `if test -f 'mul_lo.c'; then $(CYGPATH_W) 'mul_lo.c'; else $(CYGPATH_W) '$(srcdir)/mul_lo.c'; fi` tune-listz.o: listz.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-listz.o -MD -MP -MF $(DEPDIR)/tune-listz.Tpo -c -o tune-listz.o `test -f 'listz.c' || echo '$(srcdir)/'`listz.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-listz.Tpo $(DEPDIR)/tune-listz.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='listz.c' object='tune-listz.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-listz.o `test -f 'listz.c' || echo '$(srcdir)/'`listz.c tune-listz.obj: listz.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-listz.obj -MD -MP -MF $(DEPDIR)/tune-listz.Tpo -c -o tune-listz.obj `if test -f 'listz.c'; then $(CYGPATH_W) 'listz.c'; else $(CYGPATH_W) '$(srcdir)/listz.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-listz.Tpo $(DEPDIR)/tune-listz.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='listz.c' object='tune-listz.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-listz.obj `if test -f 'listz.c'; then $(CYGPATH_W) 'listz.c'; else $(CYGPATH_W) '$(srcdir)/listz.c'; fi` tune-auxlib.o: auxlib.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-auxlib.o -MD -MP -MF $(DEPDIR)/tune-auxlib.Tpo -c -o tune-auxlib.o `test -f 'auxlib.c' || echo '$(srcdir)/'`auxlib.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-auxlib.Tpo $(DEPDIR)/tune-auxlib.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='auxlib.c' object='tune-auxlib.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-auxlib.o `test -f 'auxlib.c' || echo '$(srcdir)/'`auxlib.c tune-auxlib.obj: auxlib.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-auxlib.obj -MD -MP -MF $(DEPDIR)/tune-auxlib.Tpo -c -o tune-auxlib.obj `if test -f 'auxlib.c'; then $(CYGPATH_W) 'auxlib.c'; else $(CYGPATH_W) '$(srcdir)/auxlib.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-auxlib.Tpo $(DEPDIR)/tune-auxlib.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='auxlib.c' object='tune-auxlib.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-auxlib.obj `if test -f 'auxlib.c'; then $(CYGPATH_W) 'auxlib.c'; else $(CYGPATH_W) '$(srcdir)/auxlib.c'; fi` tune-ks-multiply.o: ks-multiply.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-ks-multiply.o -MD -MP -MF $(DEPDIR)/tune-ks-multiply.Tpo -c -o tune-ks-multiply.o `test -f 'ks-multiply.c' || echo '$(srcdir)/'`ks-multiply.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-ks-multiply.Tpo $(DEPDIR)/tune-ks-multiply.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='ks-multiply.c' object='tune-ks-multiply.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-ks-multiply.o `test -f 'ks-multiply.c' || echo '$(srcdir)/'`ks-multiply.c tune-ks-multiply.obj: ks-multiply.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-ks-multiply.obj -MD -MP -MF $(DEPDIR)/tune-ks-multiply.Tpo -c -o tune-ks-multiply.obj `if test -f 'ks-multiply.c'; then $(CYGPATH_W) 'ks-multiply.c'; else $(CYGPATH_W) '$(srcdir)/ks-multiply.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-ks-multiply.Tpo $(DEPDIR)/tune-ks-multiply.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='ks-multiply.c' object='tune-ks-multiply.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-ks-multiply.obj `if test -f 'ks-multiply.c'; then $(CYGPATH_W) 'ks-multiply.c'; else $(CYGPATH_W) '$(srcdir)/ks-multiply.c'; fi` tune-toomcook.o: toomcook.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-toomcook.o -MD -MP -MF $(DEPDIR)/tune-toomcook.Tpo -c -o tune-toomcook.o `test -f 'toomcook.c' || echo '$(srcdir)/'`toomcook.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-toomcook.Tpo $(DEPDIR)/tune-toomcook.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='toomcook.c' object='tune-toomcook.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-toomcook.o `test -f 'toomcook.c' || echo '$(srcdir)/'`toomcook.c tune-toomcook.obj: toomcook.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-toomcook.obj -MD -MP -MF $(DEPDIR)/tune-toomcook.Tpo -c -o tune-toomcook.obj `if test -f 'toomcook.c'; then $(CYGPATH_W) 'toomcook.c'; else $(CYGPATH_W) '$(srcdir)/toomcook.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-toomcook.Tpo $(DEPDIR)/tune-toomcook.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='toomcook.c' object='tune-toomcook.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-toomcook.obj `if test -f 'toomcook.c'; then $(CYGPATH_W) 'toomcook.c'; else $(CYGPATH_W) '$(srcdir)/toomcook.c'; fi` tune-schoen_strass.o: schoen_strass.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-schoen_strass.o -MD -MP -MF $(DEPDIR)/tune-schoen_strass.Tpo -c -o tune-schoen_strass.o `test -f 'schoen_strass.c' || echo '$(srcdir)/'`schoen_strass.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-schoen_strass.Tpo $(DEPDIR)/tune-schoen_strass.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='schoen_strass.c' object='tune-schoen_strass.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-schoen_strass.o `test -f 'schoen_strass.c' || echo '$(srcdir)/'`schoen_strass.c tune-schoen_strass.obj: schoen_strass.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-schoen_strass.obj -MD -MP -MF $(DEPDIR)/tune-schoen_strass.Tpo -c -o tune-schoen_strass.obj `if test -f 'schoen_strass.c'; then $(CYGPATH_W) 'schoen_strass.c'; else $(CYGPATH_W) '$(srcdir)/schoen_strass.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-schoen_strass.Tpo $(DEPDIR)/tune-schoen_strass.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='schoen_strass.c' object='tune-schoen_strass.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-schoen_strass.obj `if test -f 'schoen_strass.c'; then $(CYGPATH_W) 'schoen_strass.c'; else $(CYGPATH_W) '$(srcdir)/schoen_strass.c'; fi` tune-polyeval.o: polyeval.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-polyeval.o -MD -MP -MF $(DEPDIR)/tune-polyeval.Tpo -c -o tune-polyeval.o `test -f 'polyeval.c' || echo '$(srcdir)/'`polyeval.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-polyeval.Tpo $(DEPDIR)/tune-polyeval.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='polyeval.c' object='tune-polyeval.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-polyeval.o `test -f 'polyeval.c' || echo '$(srcdir)/'`polyeval.c tune-polyeval.obj: polyeval.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-polyeval.obj -MD -MP -MF $(DEPDIR)/tune-polyeval.Tpo -c -o tune-polyeval.obj `if test -f 'polyeval.c'; then $(CYGPATH_W) 'polyeval.c'; else $(CYGPATH_W) '$(srcdir)/polyeval.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-polyeval.Tpo $(DEPDIR)/tune-polyeval.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='polyeval.c' object='tune-polyeval.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-polyeval.obj `if test -f 'polyeval.c'; then $(CYGPATH_W) 'polyeval.c'; else $(CYGPATH_W) '$(srcdir)/polyeval.c'; fi` tune-median.o: median.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-median.o -MD -MP -MF $(DEPDIR)/tune-median.Tpo -c -o tune-median.o `test -f 'median.c' || echo '$(srcdir)/'`median.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-median.Tpo $(DEPDIR)/tune-median.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='median.c' object='tune-median.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-median.o `test -f 'median.c' || echo '$(srcdir)/'`median.c tune-median.obj: median.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-median.obj -MD -MP -MF $(DEPDIR)/tune-median.Tpo -c -o tune-median.obj `if test -f 'median.c'; then $(CYGPATH_W) 'median.c'; else $(CYGPATH_W) '$(srcdir)/median.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-median.Tpo $(DEPDIR)/tune-median.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='median.c' object='tune-median.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-median.obj `if test -f 'median.c'; then $(CYGPATH_W) 'median.c'; else $(CYGPATH_W) '$(srcdir)/median.c'; fi` tune-ecm_ntt.o: ecm_ntt.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-ecm_ntt.o -MD -MP -MF $(DEPDIR)/tune-ecm_ntt.Tpo -c -o tune-ecm_ntt.o `test -f 'ecm_ntt.c' || echo '$(srcdir)/'`ecm_ntt.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-ecm_ntt.Tpo $(DEPDIR)/tune-ecm_ntt.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='ecm_ntt.c' object='tune-ecm_ntt.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-ecm_ntt.o `test -f 'ecm_ntt.c' || echo '$(srcdir)/'`ecm_ntt.c tune-ecm_ntt.obj: ecm_ntt.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-ecm_ntt.obj -MD -MP -MF $(DEPDIR)/tune-ecm_ntt.Tpo -c -o tune-ecm_ntt.obj `if test -f 'ecm_ntt.c'; then $(CYGPATH_W) 'ecm_ntt.c'; else $(CYGPATH_W) '$(srcdir)/ecm_ntt.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-ecm_ntt.Tpo $(DEPDIR)/tune-ecm_ntt.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='ecm_ntt.c' object='tune-ecm_ntt.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-ecm_ntt.obj `if test -f 'ecm_ntt.c'; then $(CYGPATH_W) 'ecm_ntt.c'; else $(CYGPATH_W) '$(srcdir)/ecm_ntt.c'; fi` tune-ntt_gfp.o: ntt_gfp.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-ntt_gfp.o -MD -MP -MF $(DEPDIR)/tune-ntt_gfp.Tpo -c -o tune-ntt_gfp.o `test -f 'ntt_gfp.c' || echo '$(srcdir)/'`ntt_gfp.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-ntt_gfp.Tpo $(DEPDIR)/tune-ntt_gfp.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='ntt_gfp.c' object='tune-ntt_gfp.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-ntt_gfp.o `test -f 'ntt_gfp.c' || echo '$(srcdir)/'`ntt_gfp.c tune-ntt_gfp.obj: ntt_gfp.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-ntt_gfp.obj -MD -MP -MF $(DEPDIR)/tune-ntt_gfp.Tpo -c -o tune-ntt_gfp.obj `if test -f 'ntt_gfp.c'; then $(CYGPATH_W) 'ntt_gfp.c'; else $(CYGPATH_W) '$(srcdir)/ntt_gfp.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-ntt_gfp.Tpo $(DEPDIR)/tune-ntt_gfp.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='ntt_gfp.c' object='tune-ntt_gfp.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-ntt_gfp.obj `if test -f 'ntt_gfp.c'; then $(CYGPATH_W) 'ntt_gfp.c'; else $(CYGPATH_W) '$(srcdir)/ntt_gfp.c'; fi` tune-mpzspv.o: mpzspv.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-mpzspv.o -MD -MP -MF $(DEPDIR)/tune-mpzspv.Tpo -c -o tune-mpzspv.o `test -f 'mpzspv.c' || echo '$(srcdir)/'`mpzspv.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-mpzspv.Tpo $(DEPDIR)/tune-mpzspv.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='mpzspv.c' object='tune-mpzspv.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-mpzspv.o `test -f 'mpzspv.c' || echo '$(srcdir)/'`mpzspv.c tune-mpzspv.obj: mpzspv.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-mpzspv.obj -MD -MP -MF $(DEPDIR)/tune-mpzspv.Tpo -c -o tune-mpzspv.obj `if test -f 'mpzspv.c'; then $(CYGPATH_W) 'mpzspv.c'; else $(CYGPATH_W) '$(srcdir)/mpzspv.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-mpzspv.Tpo $(DEPDIR)/tune-mpzspv.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='mpzspv.c' object='tune-mpzspv.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-mpzspv.obj `if test -f 'mpzspv.c'; then $(CYGPATH_W) 'mpzspv.c'; else $(CYGPATH_W) '$(srcdir)/mpzspv.c'; fi` tune-mpzspm.o: mpzspm.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-mpzspm.o -MD -MP -MF $(DEPDIR)/tune-mpzspm.Tpo -c -o tune-mpzspm.o `test -f 'mpzspm.c' || echo '$(srcdir)/'`mpzspm.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-mpzspm.Tpo $(DEPDIR)/tune-mpzspm.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='mpzspm.c' object='tune-mpzspm.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-mpzspm.o `test -f 'mpzspm.c' || echo '$(srcdir)/'`mpzspm.c tune-mpzspm.obj: mpzspm.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-mpzspm.obj -MD -MP -MF $(DEPDIR)/tune-mpzspm.Tpo -c -o tune-mpzspm.obj `if test -f 'mpzspm.c'; then $(CYGPATH_W) 'mpzspm.c'; else $(CYGPATH_W) '$(srcdir)/mpzspm.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-mpzspm.Tpo $(DEPDIR)/tune-mpzspm.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='mpzspm.c' object='tune-mpzspm.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-mpzspm.obj `if test -f 'mpzspm.c'; then $(CYGPATH_W) 'mpzspm.c'; else $(CYGPATH_W) '$(srcdir)/mpzspm.c'; fi` tune-sp.o: sp.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-sp.o -MD -MP -MF $(DEPDIR)/tune-sp.Tpo -c -o tune-sp.o `test -f 'sp.c' || echo '$(srcdir)/'`sp.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-sp.Tpo $(DEPDIR)/tune-sp.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sp.c' object='tune-sp.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-sp.o `test -f 'sp.c' || echo '$(srcdir)/'`sp.c tune-sp.obj: sp.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-sp.obj -MD -MP -MF $(DEPDIR)/tune-sp.Tpo -c -o tune-sp.obj `if test -f 'sp.c'; then $(CYGPATH_W) 'sp.c'; else $(CYGPATH_W) '$(srcdir)/sp.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-sp.Tpo $(DEPDIR)/tune-sp.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='sp.c' object='tune-sp.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-sp.obj `if test -f 'sp.c'; then $(CYGPATH_W) 'sp.c'; else $(CYGPATH_W) '$(srcdir)/sp.c'; fi` tune-spv.o: spv.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-spv.o -MD -MP -MF $(DEPDIR)/tune-spv.Tpo -c -o tune-spv.o `test -f 'spv.c' || echo '$(srcdir)/'`spv.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-spv.Tpo $(DEPDIR)/tune-spv.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='spv.c' object='tune-spv.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-spv.o `test -f 'spv.c' || echo '$(srcdir)/'`spv.c tune-spv.obj: spv.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-spv.obj -MD -MP -MF $(DEPDIR)/tune-spv.Tpo -c -o tune-spv.obj `if test -f 'spv.c'; then $(CYGPATH_W) 'spv.c'; else $(CYGPATH_W) '$(srcdir)/spv.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-spv.Tpo $(DEPDIR)/tune-spv.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='spv.c' object='tune-spv.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-spv.obj `if test -f 'spv.c'; then $(CYGPATH_W) 'spv.c'; else $(CYGPATH_W) '$(srcdir)/spv.c'; fi` tune-spm.o: spm.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-spm.o -MD -MP -MF $(DEPDIR)/tune-spm.Tpo -c -o tune-spm.o `test -f 'spm.c' || echo '$(srcdir)/'`spm.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-spm.Tpo $(DEPDIR)/tune-spm.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='spm.c' object='tune-spm.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-spm.o `test -f 'spm.c' || echo '$(srcdir)/'`spm.c tune-spm.obj: spm.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-spm.obj -MD -MP -MF $(DEPDIR)/tune-spm.Tpo -c -o tune-spm.obj `if test -f 'spm.c'; then $(CYGPATH_W) 'spm.c'; else $(CYGPATH_W) '$(srcdir)/spm.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-spm.Tpo $(DEPDIR)/tune-spm.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='spm.c' object='tune-spm.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-spm.obj `if test -f 'spm.c'; then $(CYGPATH_W) 'spm.c'; else $(CYGPATH_W) '$(srcdir)/spm.c'; fi` tune-random.o: random.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-random.o -MD -MP -MF $(DEPDIR)/tune-random.Tpo -c -o tune-random.o `test -f 'random.c' || echo '$(srcdir)/'`random.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-random.Tpo $(DEPDIR)/tune-random.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='random.c' object='tune-random.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-random.o `test -f 'random.c' || echo '$(srcdir)/'`random.c tune-random.obj: random.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-random.obj -MD -MP -MF $(DEPDIR)/tune-random.Tpo -c -o tune-random.obj `if test -f 'random.c'; then $(CYGPATH_W) 'random.c'; else $(CYGPATH_W) '$(srcdir)/random.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-random.Tpo $(DEPDIR)/tune-random.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='random.c' object='tune-random.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-random.obj `if test -f 'random.c'; then $(CYGPATH_W) 'random.c'; else $(CYGPATH_W) '$(srcdir)/random.c'; fi` tune-mul_fft.o: mul_fft.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-mul_fft.o -MD -MP -MF $(DEPDIR)/tune-mul_fft.Tpo -c -o tune-mul_fft.o `test -f 'mul_fft.c' || echo '$(srcdir)/'`mul_fft.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-mul_fft.Tpo $(DEPDIR)/tune-mul_fft.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='mul_fft.c' object='tune-mul_fft.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-mul_fft.o `test -f 'mul_fft.c' || echo '$(srcdir)/'`mul_fft.c tune-mul_fft.obj: mul_fft.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-mul_fft.obj -MD -MP -MF $(DEPDIR)/tune-mul_fft.Tpo -c -o tune-mul_fft.obj `if test -f 'mul_fft.c'; then $(CYGPATH_W) 'mul_fft.c'; else $(CYGPATH_W) '$(srcdir)/mul_fft.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-mul_fft.Tpo $(DEPDIR)/tune-mul_fft.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='mul_fft.c' object='tune-mul_fft.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-mul_fft.obj `if test -f 'mul_fft.c'; then $(CYGPATH_W) 'mul_fft.c'; else $(CYGPATH_W) '$(srcdir)/mul_fft.c'; fi` tune-auxarith.o: auxarith.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-auxarith.o -MD -MP -MF $(DEPDIR)/tune-auxarith.Tpo -c -o tune-auxarith.o `test -f 'auxarith.c' || echo '$(srcdir)/'`auxarith.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-auxarith.Tpo $(DEPDIR)/tune-auxarith.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='auxarith.c' object='tune-auxarith.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-auxarith.o `test -f 'auxarith.c' || echo '$(srcdir)/'`auxarith.c tune-auxarith.obj: auxarith.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-auxarith.obj -MD -MP -MF $(DEPDIR)/tune-auxarith.Tpo -c -o tune-auxarith.obj `if test -f 'auxarith.c'; then $(CYGPATH_W) 'auxarith.c'; else $(CYGPATH_W) '$(srcdir)/auxarith.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-auxarith.Tpo $(DEPDIR)/tune-auxarith.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='auxarith.c' object='tune-auxarith.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-auxarith.obj `if test -f 'auxarith.c'; then $(CYGPATH_W) 'auxarith.c'; else $(CYGPATH_W) '$(srcdir)/auxarith.c'; fi` tune-memory.o: memory.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-memory.o -MD -MP -MF $(DEPDIR)/tune-memory.Tpo -c -o tune-memory.o `test -f 'memory.c' || echo '$(srcdir)/'`memory.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-memory.Tpo $(DEPDIR)/tune-memory.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='memory.c' object='tune-memory.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-memory.o `test -f 'memory.c' || echo '$(srcdir)/'`memory.c tune-memory.obj: memory.c @am__fastdepCC_TRUE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT tune-memory.obj -MD -MP -MF $(DEPDIR)/tune-memory.Tpo -c -o tune-memory.obj `if test -f 'memory.c'; then $(CYGPATH_W) 'memory.c'; else $(CYGPATH_W) '$(srcdir)/memory.c'; fi` @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/tune-memory.Tpo $(DEPDIR)/tune-memory.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='memory.c' object='tune-memory.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(tune_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tune-memory.obj `if test -f 'memory.c'; then $(CYGPATH_W) 'memory.c'; else $(CYGPATH_W) '$(srcdir)/memory.c'; fi` mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs distclean-libtool: -rm -f libtool config.lt install-man1: $(dist_man_MANS) @$(NORMAL_INSTALL) test -z "$(man1dir)" || $(MKDIR_P) "$(DESTDIR)$(man1dir)" @list=''; test -n "$(man1dir)" || exit 0; \ { for i in $$list; do echo "$$i"; done; \ l2='$(dist_man_MANS)'; for i in $$l2; do echo "$$i"; done | \ sed -n '/\.1[a-z]*$$/p'; \ } | while read p; do \ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; echo "$$p"; \ done | \ sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ sed 'N;N;s,\n, ,g' | { \ list=; while read file base inst; do \ if test "$$base" = "$$inst"; then list="$$list $$file"; else \ echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \ $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \ fi; \ done; \ for i in $$list; do echo "$$i"; done | $(am__base_list) | \ while read files; do \ test -z "$$files" || { \ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \ $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \ done; } uninstall-man1: @$(NORMAL_UNINSTALL) @list=''; test -n "$(man1dir)" || exit 0; \ files=`{ for i in $$list; do echo "$$i"; done; \ l2='$(dist_man_MANS)'; for i in $$l2; do echo "$$i"; done | \ sed -n '/\.1[a-z]*$$/p'; \ } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir) install-includeHEADERS: $(include_HEADERS) @$(NORMAL_INSTALL) test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)" @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ done | $(am__base_list) | \ while read files; do \ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \ $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \ done uninstall-includeHEADERS: @$(NORMAL_UNINSTALL) @list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir) # This directory's subdirectories are mostly independent; you can cd # into them and run `make' without going through this Makefile. # To change the values of `make' variables: instead of editing Makefiles, # (1) if the variable is set in `config.status', edit `config.status' # (which will cause the Makefiles to be regenerated when you run `make'); # (2) otherwise, pass the desired values on the `make' command line. $(RECURSIVE_TARGETS): @fail= failcom='exit 1'; \ for f in x $$MAKEFLAGS; do \ case $$f in \ *=* | --[!k]*);; \ *k*) failcom='fail=yes';; \ esac; \ done; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ list='$(SUBDIRS)'; for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" $(RECURSIVE_CLEAN_TARGETS): @fail= failcom='exit 1'; \ for f in x $$MAKEFLAGS; do \ case $$f in \ *=* | --[!k]*);; \ *k*) failcom='fail=yes';; \ esac; \ done; \ dot_seen=no; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ rev=''; for subdir in $$list; do \ if test "$$subdir" = "."; then :; else \ rev="$$subdir $$rev"; \ fi; \ done; \ rev="$$rev ."; \ target=`echo $@ | sed s/-recursive//`; \ for subdir in $$rev; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done && test -z "$$fail" tags-recursive: list='$(SUBDIRS)'; for subdir in $$list; do \ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ done ctags-recursive: list='$(SUBDIRS)'; for subdir in $$list; do \ test "$$subdir" = . || ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ done ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ mkid -fID $$unique tags: TAGS TAGS: tags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) set x; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: CTAGS CTAGS: ctags-recursive $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(DISTFILES) @list='$(MANS)'; if test -n "$$list"; then \ list=`for p in $$list; do \ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ if test -f "$$d$$p"; then echo "$$d$$p"; else :; fi; done`; \ if test -n "$$list" && \ grep 'ab help2man is required to generate this page' $$list >/dev/null; then \ echo "error: found man pages containing the \`missing help2man' replacement text:" >&2; \ grep -l 'ab help2man is required to generate this page' $$list | sed 's/^/ /' >&2; \ echo " to fix them, install help2man, remove and regenerate the man pages;" >&2; \ echo " typically \`make maintainer-clean' will remove them" >&2; \ exit 1; \ else :; fi; \ else :; fi $(am__remove_distdir) test -d "$(distdir)" || mkdir "$(distdir)" @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ fi; \ done @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \ $(am__relativize); \ new_distdir=$$reldir; \ dir1=$$subdir; dir2="$(top_distdir)"; \ $(am__relativize); \ new_top_distdir=$$reldir; \ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \ ($(am__cd) $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$new_top_distdir" \ distdir="$$new_distdir" \ am__remove_distdir=: \ am__skip_length_check=: \ am__skip_mode_fix=: \ distdir) \ || exit 1; \ fi; \ done -test -n "$(am__skip_mode_fix)" \ || find "$(distdir)" -type d ! -perm -755 \ -exec chmod u+rwx,go+rx {} \; -o \ ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ || chmod -R a+r "$(distdir)" dist-gzip: distdir tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz $(am__remove_distdir) dist-bzip2: distdir tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2 $(am__remove_distdir) dist-lzip: distdir tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz $(am__remove_distdir) dist-lzma: distdir tardir=$(distdir) && $(am__tar) | lzma -9 -c >$(distdir).tar.lzma $(am__remove_distdir) dist-xz: distdir tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz $(am__remove_distdir) dist-tarZ: distdir tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z $(am__remove_distdir) dist-shar: distdir shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz $(am__remove_distdir) dist-zip: distdir -rm -f $(distdir).zip zip -rq $(distdir).zip $(distdir) $(am__remove_distdir) dist dist-all: distdir tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz $(am__remove_distdir) # This target untars the dist file and tries a VPATH configuration. Then # it guarantees that the distribution is self-contained by making another # tarfile. distcheck: dist case '$(DIST_ARCHIVES)' in \ *.tar.gz*) \ GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\ *.tar.bz2*) \ bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\ *.tar.lzma*) \ lzma -dc $(distdir).tar.lzma | $(am__untar) ;;\ *.tar.lz*) \ lzip -dc $(distdir).tar.lz | $(am__untar) ;;\ *.tar.xz*) \ xz -dc $(distdir).tar.xz | $(am__untar) ;;\ *.tar.Z*) \ uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ *.shar.gz*) \ GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\ *.zip*) \ unzip $(distdir).zip ;;\ esac chmod -R a-w $(distdir); chmod a+w $(distdir) mkdir $(distdir)/_build mkdir $(distdir)/_inst chmod a-w $(distdir) test -d $(distdir)/_build || exit 0; \ dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ && am__cwd=`pwd` \ && $(am__cd) $(distdir)/_build \ && ../configure --srcdir=.. --prefix="$$dc_install_base" \ $(AM_DISTCHECK_CONFIGURE_FLAGS) \ $(DISTCHECK_CONFIGURE_FLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) dvi \ && $(MAKE) $(AM_MAKEFLAGS) check \ && $(MAKE) $(AM_MAKEFLAGS) install \ && $(MAKE) $(AM_MAKEFLAGS) installcheck \ && $(MAKE) $(AM_MAKEFLAGS) uninstall \ && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ distuninstallcheck \ && chmod -R a-w "$$dc_install_base" \ && ({ \ (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ } || { rm -rf "$$dc_destdir"; exit 1; }) \ && rm -rf "$$dc_destdir" \ && $(MAKE) $(AM_MAKEFLAGS) dist \ && rm -rf $(DIST_ARCHIVES) \ && $(MAKE) $(AM_MAKEFLAGS) distcleancheck \ && cd "$$am__cwd" \ || exit 1 $(am__remove_distdir) @(echo "$(distdir) archives ready for distribution: "; \ list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' distuninstallcheck: @test -n '$(distuninstallcheck_dir)' || { \ echo 'ERROR: trying to run $@ with an empty' \ '$$(distuninstallcheck_dir)' >&2; \ exit 1; \ }; \ $(am__cd) '$(distuninstallcheck_dir)' || { \ echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \ exit 1; \ }; \ test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \ || { echo "ERROR: files left after uninstall:" ; \ if test -n "$(DESTDIR)"; then \ echo " (check DESTDIR support)"; \ fi ; \ $(distuninstallcheck_listfiles) ; \ exit 1; } >&2 distcleancheck: distclean @if test '$(srcdir)' = . ; then \ echo "ERROR: distcleancheck can only run from a VPATH build" ; \ exit 1 ; \ fi @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ || { echo "ERROR: files left in build directory after distclean:" ; \ $(distcleancheck_listfiles) ; \ exit 1; } >&2 check-am: all-am check: check-recursive all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(MANS) $(HEADERS) \ config.h install-binPROGRAMS: install-libLTLIBRARIES installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(includedir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-binPROGRAMS clean-generic clean-libLTLIBRARIES \ clean-libtool clean-noinstPROGRAMS mostlyclean-am distclean: distclean-recursive -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -rf ./$(DEPDIR) -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-hdr distclean-libtool distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive html-am: info: info-recursive info-am: install-data-am: install-includeHEADERS install-man install-dvi: install-dvi-recursive install-dvi-am: install-exec-am: install-binPROGRAMS install-libLTLIBRARIES install-html: install-html-recursive install-html-am: install-info: install-info-recursive install-info-am: install-man: install-man1 install-pdf: install-pdf-recursive install-pdf-am: install-ps: install-ps-recursive install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -rf $(top_srcdir)/autom4te.cache -rm -rf ./$(DEPDIR) -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-binPROGRAMS uninstall-includeHEADERS \ uninstall-libLTLIBRARIES uninstall-man uninstall-man: uninstall-man1 .MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) all \ ctags-recursive install-am install-strip tags-recursive .PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ all all-am am--refresh check check-am clean clean-binPROGRAMS \ clean-generic clean-libLTLIBRARIES clean-libtool \ clean-noinstPROGRAMS ctags ctags-recursive dist dist-all \ dist-bzip2 dist-gzip dist-lzip dist-lzma dist-shar dist-tarZ \ dist-xz dist-zip distcheck distclean distclean-compile \ distclean-generic distclean-hdr distclean-libtool \ distclean-tags distcleancheck distdir distuninstallcheck dvi \ dvi-am html html-am info info-am install install-am \ install-binPROGRAMS install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-includeHEADERS install-info \ install-info-am install-libLTLIBRARIES install-man \ install-man1 install-pdf install-pdf-am install-ps \ install-ps-am install-strip installcheck installcheck-am \ installdirs installdirs-am maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags tags-recursive uninstall uninstall-am \ uninstall-binPROGRAMS uninstall-includeHEADERS \ uninstall-libLTLIBRARIES uninstall-man uninstall-man1 @ENABLE_ASM_REDC_TRUE@ # Add a tuning and testing program for the mulredc code @ENABLE_ASM_REDC_FALSE@ # Add a tuning program for the mulredc code @WITH_GWNUM_TRUE@ gwdata.ld : @WITH_GWNUM_TRUE@ echo "SECTIONS { .data : { . = ALIGN(0x20); *(_GWDATA) } }" >gwdata.ld @WITH_GWNUM_TRUE@ # Use ecm_DEPENDENCIES += gwdata.ld instead? Is that possible? @WITH_GWNUM_TRUE@ Fgwtest : Fgw.c gwdata.ld @WITH_GWNUM_TRUE@ $(CC) $(CFLAGS) $(CPPFLAGS) -g -DTESTDRIVE -Wl,gwdata.ld -o Fgwtest Fgw.c libecm.a $(LIBS) ecm-params: tune$(EXEEXT) @echo Optimising parameters for your system, please be patient. test -z "ecm-params.h" || rm -f ecm-params.h ./tune > ecm-params.h check: ecm$(EXEEXT) $(srcdir)/test.pp1 ./ecm$(EXEEXT) echo "" $(srcdir)/test.pm1 ./ecm$(EXEEXT) echo "" $(srcdir)/test.ecm ./ecm$(EXEEXT) longcheck: ecm$(EXEEXT) $(srcdir)/test.pp1 "$(VALGRIND) ./ecm$(EXEEXT)" $(srcdir)/test.pp1 "$(VALGRIND) ./ecm$(EXEEXT) -no-ntt" $(srcdir)/test.pp1 "$(VALGRIND) ./ecm$(EXEEXT) -modmuln" $(srcdir)/test.pp1 "$(VALGRIND) ./ecm$(EXEEXT) -redc" $(srcdir)/test.pp1 "$(VALGRIND) ./ecm$(EXEEXT) -mpzmod" $(srcdir)/test.pm1 "$(VALGRIND) ./ecm$(EXEEXT)" $(srcdir)/test.pm1 "$(VALGRIND) ./ecm$(EXEEXT) -no-ntt" $(srcdir)/test.pm1 "$(VALGRIND) ./ecm$(EXEEXT) -modmuln" $(srcdir)/test.pm1 "$(VALGRIND) ./ecm$(EXEEXT) -redc" $(srcdir)/test.pm1 "$(VALGRIND) ./ecm$(EXEEXT) -mpzmod" $(srcdir)/test.ecm "$(VALGRIND) ./ecm$(EXEEXT)" $(srcdir)/test.ecm "$(VALGRIND) ./ecm$(EXEEXT) -no-ntt" $(srcdir)/test.ecm "$(VALGRIND) ./ecm$(EXEEXT) -modmuln" $(srcdir)/test.ecm "$(VALGRIND) ./ecm$(EXEEXT) -redc" $(srcdir)/test.ecm "$(VALGRIND) ./ecm$(EXEEXT) -mpzmod" @MAKE_MANPAGE_TRUE@ecm.1: $(srcdir)/ecm.xml @MAKE_MANPAGE_TRUE@ xsltproc -o ecm.1 $(XSLDIR)/manpages/docbook.xsl $(srcdir)/ecm.xml # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: ecm-6.4.4/ecm_ntt.c0000644023561000001540000002745412106741274011030 00000000000000/* ecm_ntt.c - high level poly functions to interface between ecm and sp Copyright 2005, 2006, 2007, 2008, 2009, 2011, 2012 Dave Newman, Paul Zimmermann, Alexander Kruppa. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include "sp.h" #include "ecm-impl.h" #ifdef HAVE_UNISTD_H #include /* for unlink */ #endif #define UNUSED 0 /* memory: 4 * len mpspv coeffs */ void ntt_mul (mpzv_t r, mpzv_t x, mpzv_t y, spv_size_t len, mpzv_t t, int monic, mpzspm_t mpzspm) { mpzspv_t u, v; if (len < MUL_NTT_THRESHOLD) { list_mul (r, x, len, monic, y, len, monic, t); return; } u = mpzspv_init (2 * len, mpzspm); v = mpzspv_init (2 * len, mpzspm); mpzspv_from_mpzv (v, 0, y, len, mpzspm); mpzspv_from_mpzv (u, 0, x, len, mpzspm); mpzspv_mul_ntt(u, 0, u, 0, len, v, 0, len, 2 * len, monic, monic ? 2 * len : 0, mpzspm, NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT); mpzspv_to_mpzv (u, 0, r, 2 * len - 1 + monic, mpzspm); mpzspv_clear (u, mpzspm); mpzspv_clear (v, mpzspm); } /* memory: 2 * len mpzspv coeffs */ void ntt_PolyFromRoots (mpzv_t r, mpzv_t a, spv_size_t len, mpzv_t t, mpzspm_t mpzspm) { mpzspv_t x; spv_size_t i, m; ASSERT (len == ((spv_size_t)1) << ceil_log2 (len)); if (len <= MUL_NTT_THRESHOLD) { PolyFromRoots (r, a, len, t, mpzspm->modulus); return; } x = mpzspv_init (2 * len, mpzspm); for (i = 0; i < len; i += MUL_NTT_THRESHOLD) { PolyFromRoots (r, a + i, MUL_NTT_THRESHOLD, t, mpzspm->modulus); mpzspv_from_mpzv (x, 2 * i, r, MUL_NTT_THRESHOLD, mpzspm); } for (m = MUL_NTT_THRESHOLD; m < len; m *= 2) { for (i = 0; i < 2 * len; i += 4 * m) { mpzspv_mul_ntt (x, i, x, i, m, x, i + 2 * m, m, 2 * m, 1, 2 * m, mpzspm, NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT); if (2 * m < len) mpzspv_normalise (x, i, 2 * m, mpzspm); } } mpzspv_to_mpzv (x, 0, r, len, mpzspm); mpzspv_clear (x, mpzspm); } /* memory: 2 * len mpzspv coeffs */ int ntt_PolyFromRoots_Tree (mpzv_t r, mpzv_t a, spv_size_t len, mpzv_t t, int dolvl, mpzspm_t mpzspm, mpzv_t *Tree, FILE *TreeFile) { mpzspv_t x; spv_size_t i, m, m_max; mpzv_t src; mpzv_t *dst = Tree + ceil_log2 (len) - 1; ASSERT (len == ((spv_size_t)1) << ceil_log2 (len)); x = mpzspv_init (2 * len, mpzspm); if (dolvl >= 0) { src = a; dst = &r; } else { /* Copy the roots into the destination level of the tree (negating if so desired), set the source to this level (which now contains the possibly negated roots), and advance the destination level of the tree to the next level */ src = *dst; /* we consider x + root[i], which means we consider negated roots */ list_set (*dst--, a, len); } m = (dolvl == -1) ? 1 : 1 << (ceil_log2 (len) - 1 - dolvl); m_max = (dolvl == -1) ? len : 2 * m; for (; m < m_max && m < MUL_NTT_THRESHOLD; m *= 2) { /* dst = &r anyway for dolvl != -1 */ if (m == len / 2) dst = &r; if (TreeFile && list_out_raw (TreeFile, src, len) == ECM_ERROR) { outputf (OUTPUT_ERROR, "Error writing product tree of F\n"); return ECM_ERROR; } for (i = 0; i < len; i += 2 * m) list_mul (t + i, src + i, m, 1, src + i + m, m, 1, t + len); list_mod (*dst, t, len, mpzspm->modulus); src = *dst--; } for (; m < m_max; m *= 2) { ASSERT (m > 1); /* This code does not do the sign change. Let's assume MUL_NTT_THRESHOLD is always large enough that the degree 1 product are done in the above loop */ /* dst = &r anyway for dolvl != -1 */ if (m == len / 2) dst = &r; for (i = 0; i < 2 * len; i += 4 * m) { if (TreeFile && list_out_raw (TreeFile, src + i / 2, 2 * m) == ECM_ERROR) return ECM_ERROR; mpzspv_from_mpzv (x, i, src + i / 2, m, mpzspm); mpzspv_from_mpzv (x, i + 2 * m, src + i / 2 + m, m, mpzspm); mpzspv_mul_ntt (x, i, x, i, m, x, i + 2 * m, m, 2 * m, 1, 2 * m, mpzspm, NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT); mpzspv_to_mpzv (x, i, *dst + i / 2, 2 * m, mpzspm); /* we only do the mod reduction to reduce the file size a bit */ if (TreeFile) list_mod (*dst + i / 2, *dst + i / 2, 2 * m, mpzspm->modulus); } src = *dst--; } mpzspv_clear (x, mpzspm); return 0; } /* 2 NTTs of size 2 * len * 2 NTTs of size len * * memory: 2 * len mpzspv coeffs */ void ntt_PrerevertDivision (mpzv_t a, mpzv_t b, mpzv_t invb, mpzspv_t sp_b, mpzspv_t sp_invb, spv_size_t len, mpzv_t t, mpzspm_t mpzspm) { mpzspv_t x; if (len < PREREVERTDIVISION_NTT_THRESHOLD) { PrerevertDivision (a, b, invb, len, t, mpzspm->modulus); return; } x = mpzspv_init (2 * len, mpzspm); /* y = TOP (TOP (a) * invb) */ mpzspv_set_sp (x, 0, 0, len + 1, mpzspm); mpzspv_from_mpzv (x, len + 1, a + len, len - 1, mpzspm); mpzspv_mul_ntt (x, 0, x, 0, 2 * len, sp_invb, 0, UNUSED, 2 * len, 0, 0, mpzspm, NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT); mpzspv_normalise (x, 0, len, mpzspm); mpzspv_mul_ntt (x, 0, x, 0, len, sp_b, 0, UNUSED, len, 0, 0, mpzspm, NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT); mpzspv_to_mpzv (x, 0, t, len, mpzspm); mpzspv_clear (x, mpzspm); list_sub (t, t, a + len, len - 1); list_sub (a, a, t, len); /* can we avoid this mod without risking overflow later? */ list_mod (a, a, len, mpzspm->modulus); } /* memory: 7/2 * len mpzspv coeffs */ void ntt_PolyInvert (mpzv_t q, mpzv_t b, spv_size_t len, mpzv_t t, mpzspm_t mpzspm) { spv_size_t k = POLYINVERT_NTT_THRESHOLD / 2; mpzspv_t w, x, y, z; if (len < POLYINVERT_NTT_THRESHOLD) { PolyInvert (q, b, len, t, mpzspm->modulus); return; } PolyInvert (q + len - k, b + len - k, k, t, mpzspm->modulus); w = mpzspv_init (len / 2, mpzspm); x = mpzspv_init (len, mpzspm); y = mpzspv_init (len, mpzspm); z = mpzspv_init (len, mpzspm); mpzspv_from_mpzv (x, 0, q + len - k - 1, k + 1, mpzspm); mpzspv_from_mpzv (y, 0, b, len - 1, mpzspm); for (; k < len; k *= 2) { mpzspv_set (w, 0, x, 1, k, mpzspm); mpzspv_set (z, 0, y, len - 2 * k, 2 * k - 1, mpzspm); mpzspv_mul_ntt (z, 0, z, 0, 2 * k - 1, x, 0, k + 1, 2 * k, 0, 0, mpzspm, NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT); mpzspv_normalise (z, k, k, mpzspm); mpzspv_neg (z, 0, z, k, k, mpzspm); mpzspv_mul_ntt (x, 0, x, 0, 0, z, 0, k, 2 * k, 0, 0, mpzspm, NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT); if (2 * k < len) mpzspv_normalise (x, k, k, mpzspm); mpzspv_set (x, 1, x, k, k, mpzspm); /* legal overlap */ mpzspv_set (x, k + 1, w, 0, MIN(k, len / 2 - 1), mpzspm); } mpzspv_to_mpzv (x, 1, q, len - POLYINVERT_NTT_THRESHOLD / 2, mpzspm); #if defined DEBUG ntt_mul (t, q, b, len, NULL, 0, mpzspm); list_mod (t, t, 2 * len - 1, mpzspm->modulus); spv_size_t i; for (i = len - 1; i < 2 * len - 2; i++) if (mpz_cmp_ui (t[i], 0)) printf ("error in ntt_PolyInvert\n"); if (mpz_cmp_ui (t[2 * len - 2], 1)) printf ("error in ntt_PolyInvert-\n"); #endif mpzspv_clear (w, mpzspm); mpzspv_clear (x, mpzspm); mpzspv_clear (y, mpzspm); mpzspv_clear (z, mpzspm); } /* memory: 4 * len mpzspv coeffs */ int ntt_polyevalT (mpzv_t b, spv_size_t len, mpzv_t *Tree, mpzv_t T, mpzspv_t sp_invF, mpzspm_t mpzspm, char *TreeFilenameStem) { spv_size_t m, i; FILE *TreeFile = NULL; /* assume this "small" malloc will not fail in normal usage */ char *TreeFilename = NULL; mpzv_t *Tree_orig = Tree; int level = 0; /* = ceil_log2 (len / m) - 1 */ mpzspv_t x = mpzspv_init (2 * len, mpzspm); mpzspv_t y = mpzspv_init (2 * len, mpzspm); if (TreeFilenameStem) { TreeFilename = (char *) malloc (strlen (TreeFilenameStem) + 1 + 2 + 1); if (TreeFilename == NULL) { fprintf (stderr, "Cannot allocate memory in ntt_polyevalT\n"); exit (1); } } mpzspv_from_mpzv (x, 0, b, len, mpzspm); mpzspv_mul_ntt(x, 0, x, 0, len, sp_invF, 0, UNUSED, 2 * len, 0, 0, mpzspm, NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT); mpzspv_normalise (x, len - 1, len, mpzspm); mpzspv_set (y, 0, x, len - 1, len, mpzspm); /* y = high (b * invF) */ mpzspv_reverse (y, 0, len, mpzspm); /* y = rev (high (b * invF)) */ for (m = len / 2; m >= POLYEVALT_NTT_THRESHOLD; m /= 2) { if (TreeFilenameStem) { Tree = &T; sprintf (TreeFilename, "%s.%d", TreeFilenameStem, level); TreeFile = fopen (TreeFilename, "rb"); if (TreeFile == NULL) { outputf (OUTPUT_ERROR, "Error opening file %s for product tree of F\n", TreeFilename); mpzspv_clear (x, mpzspm); mpzspv_clear (y, mpzspm); return ECM_ERROR; } list_inp_raw (*Tree, TreeFile, len); fclose (TreeFile); unlink (TreeFilename); } for (i = 0; i < len; i += 2 * m) { list_revert (*Tree + i, m); mpzspv_set_sp (x, 0, 1, 1, mpzspm); mpzspv_from_mpzv (x, 1, *Tree + i, m, mpzspm); /* x contains reversed monic poly */ mpzspv_mul_ntt (x, 0, x, 0, m + 1, y, i, 2 * m, 2 * m, 0, 0, mpzspm, NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_FFT2 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT); if (m > POLYEVALT_NTT_THRESHOLD) mpzspv_normalise (x, m, m, mpzspm); list_revert (*Tree + i + m, m); mpzspv_set_sp (x, 2 * m, 1, 1, mpzspm); mpzspv_from_mpzv (x, 2 * m + 1, *Tree + i + m, m, mpzspm); mpzspv_mul_ntt(x, 2 * m, x, 2 * m, m + 1, y, i, UNUSED, 2 * m, 0, 0, mpzspm, NTT_MUL_STEP_FFT1 + NTT_MUL_STEP_MUL + NTT_MUL_STEP_IFFT); if (m > POLYEVALT_NTT_THRESHOLD) mpzspv_normalise (x, 3 * m, m, mpzspm); mpzspv_set (y, i, x, 3 * m, m, mpzspm); mpzspv_set (y, i + m, x, m, m, mpzspm); } Tree++; level++; } mpzspv_clear (x, mpzspm); mpzspv_to_mpzv (y, 0, T, len, mpzspm); /* T = rev (high (b * invF)) */ mpzspv_clear (y, mpzspm); for (i = 0; i < len; i++) mpz_mod (T[i], T[i], mpzspm->modulus); for (; m >= 1; m /= 2) { if (TreeFilenameStem) { sprintf (TreeFilename, "%s.%d", TreeFilenameStem, level); TreeFile = fopen (TreeFilename, "rb"); if (TreeFile == NULL) { outputf (OUTPUT_ERROR, "Error opening file %s for product tree of F\n", TreeFilename); return ECM_ERROR; } } TUpTree (T, Tree_orig, len, T + len, level++, 0, mpzspm->modulus, TreeFile); if (TreeFilenameStem) { fclose (TreeFile); unlink (TreeFilename); } } if (TreeFilenameStem) free (TreeFilename); list_swap (b, T, len); return 0; } ecm-6.4.4/auxi.c0000644023561000001540000000760212106741273010335 00000000000000/* Auxiliary functions for GMP-ECM. Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2011, 2012 Paul Zimmermann, Alexander Kruppa, Laurent Fousse, Jim Fougeron, Cyril Bouvier. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include "ecm-ecm.h" /****************************************************************************** * * * Auxiliary functions * * * ******************************************************************************/ /* returns the number of decimal digits of n */ unsigned int nb_digits (const mpz_t n) { mpz_t x; unsigned int size; size = mpz_sizeinbase (n, 10); /* the GMP documentation says mpz_sizeinbase returns the exact value, or one too big, thus: (a) either n < 10^(size-1), and n has size-1 digits (b) or n >= size-1, and n has size digits Note: mpz_sizeinbase returns 1 for n=0, thus we always have size >= 1. */ mpz_init (x); mpz_ui_pow_ui (x, 10, size - 1); if (mpz_cmpabs (n, x) < 0) size --; mpz_clear (x); return size; } /* Tries to read a number from a line from fd and stores it in r. Keeps reading lines until a number is found. Lines beginning with "#" are skipped. Returns 1 if a number was successfully read, 0 if no number can be read (i.e. at EOF) Function is now simpler. Much of the logic (other than skipping # lines is now contained within eval() function. */ int read_number (mpcandi_t *n, FILE *fd, int primetest) { int c; new_line: c = fgetc (fd); /* Skip comment lines beginning with '#' */ if (c == '#') { do c = fgetc (fd); while (c != EOF && !IS_NEWLINE(c)); if (IS_NEWLINE(c)) goto new_line; } if (c == EOF) return 0; ungetc (c, fd); if (!eval (n, fd, primetest)) goto new_line; #if 0 /* Code to test out eval_str function, which "appears" to work correctly. */ { /* warning!! Line is pretty small, but since this is just testing code, we can easily control the input for this test. This code should NEVER be compiled into released build, its only for testing of eval_str() */ char Line[500], *cp; fgets (Line, sizeof(Line), fd); if (!eval_str (n, Line, primetest, &cp)) goto new_line; fprintf (stderr, "\nLine is at %X cp is at %X\n", Line, cp); } #endif #if defined (DEBUG_EVALUATOR) if (n->cpExpr) fprintf (stderr, "%s\n", n->cpExpr); mpz_out_str (stderr, 10, n->n); fprintf (stderr, "\n"); #endif return 1; } int probab_prime_p (mpz_t N, int reps) { #ifdef WANT_SHELLCMD if (prpcmd != NULL) { FILE *fc; int r; fc = popen (prpcmd, "w"); if (fc != NULL) { gmp_fprintf (fc, "%Zd\n", N); r = pclose (fc); if (r == 0) /* Exit status of 0 means success = is a PRP */ return 1; else return 0; } else { fprintf (stderr, "Error executing the PRP command\n"); exit (EXIT_FAILURE); } } else #endif return mpz_probab_prime_p (N, reps); } ecm-6.4.4/ecm-params.h.pentium30000644023561000001540000000072112106741273013157 00000000000000#define MPZMOD_THRESHOLD 135 #define REDC_THRESHOLD 200 #define MPN_MUL_LO_THRESHOLD_TABLE {0, 0, 1, 1, 0, 0, 5, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 1, 14, 14, 16, 16, 1} #define NTT_GFP_TWIDDLE_DIF_BREAKOVER 11 #define NTT_GFP_TWIDDLE_DIT_BREAKOVER 11 #define MUL_NTT_THRESHOLD 262144 #define PREREVERTDIVISION_NTT_THRESHOLD 128 #define POLYINVERT_NTT_THRESHOLD 65536 #define POLYEVALT_NTT_THRESHOLD 16384 #define MPZSPV_NORMALISE_STRIDE 512 ecm-6.4.4/mpzspv.c0000644023561000001540000007253712106741273010737 00000000000000/* mpzspv.c - "mpz small prime polynomial" functions for arithmetic on mpzv's reduced modulo a mpzspm Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Dave Newman, Jason Papadopoulos, Alexander Kruppa, Paul Zimmermann. The SP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The SP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the SP Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include /* for stderr */ #include #include /* for memset */ #include "sp.h" mpzspv_t mpzspv_init (spv_size_t len, mpzspm_t mpzspm) { unsigned int i; mpzspv_t x = (mpzspv_t) malloc (mpzspm->sp_num * sizeof (spv_t)); if (x == NULL) return NULL; for (i = 0; i < mpzspm->sp_num; i++) { x[i] = (spv_t) sp_aligned_malloc (len * sizeof (sp_t)); if (x[i] == NULL) { while (i--) sp_aligned_free (x[i]); free (x); return NULL; } } return x; } void mpzspv_clear (mpzspv_t x, mpzspm_t mpzspm) { unsigned int i; ASSERT (mpzspv_verify (x, 0, 0, mpzspm)); for (i = 0; i < mpzspm->sp_num; i++) sp_aligned_free (x[i]); free (x); } /* check that: * - each of the spv's is at least offset + len long * - the data specified by (offset, len) is correctly normalised in the * range [0, sp) * * return 1 for success, 0 for failure */ int mpzspv_verify (mpzspv_t x, spv_size_t offset, spv_size_t len, mpzspm_t mpzspm) { unsigned int i; spv_size_t j; for (i = 0; i < mpzspm->sp_num; i++) { for (j = offset; j < offset + len; j++) if (x[i][j] >= mpzspm->spm[i]->sp) return 0; } return 1; } void mpzspv_set (mpzspv_t r, spv_size_t r_offset, mpzspv_t x, spv_size_t x_offset, spv_size_t len, mpzspm_t mpzspm) { unsigned int i; ASSERT (mpzspv_verify (r, r_offset + len, 0, mpzspm)); ASSERT (mpzspv_verify (x, x_offset, len, mpzspm)); for (i = 0; i < mpzspm->sp_num; i++) spv_set (r[i] + r_offset, x[i] + x_offset, len); } void mpzspv_revcopy (mpzspv_t r, spv_size_t r_offset, mpzspv_t x, spv_size_t x_offset, spv_size_t len, mpzspm_t mpzspm) { unsigned int i; ASSERT (mpzspv_verify (r, r_offset + len, 0, mpzspm)); ASSERT (mpzspv_verify (x, x_offset, len, mpzspm)); for (i = 0; i < mpzspm->sp_num; i++) spv_rev (r[i] + r_offset, x[i] + x_offset, len); } void mpzspv_set_sp (mpzspv_t r, spv_size_t offset, sp_t c, spv_size_t len, mpzspm_t mpzspm) { unsigned int i; ASSERT (mpzspv_verify (r, offset + len, 0, mpzspm)); ASSERT (c < SP_MIN); /* not strictly necessary but avoids mod functions */ for (i = 0; i < mpzspm->sp_num; i++) spv_set_sp (r[i] + offset, c, len); } void mpzspv_neg (mpzspv_t r, spv_size_t r_offset, mpzspv_t x, spv_size_t x_offset, spv_size_t len, mpzspm_t mpzspm) { unsigned int i; ASSERT (mpzspv_verify (r, r_offset + len, 0, mpzspm)); ASSERT (mpzspv_verify (x, x_offset, len, mpzspm)); for (i = 0; i < mpzspm->sp_num; i++) spv_neg (r[i] + r_offset, x[i] + x_offset, len, mpzspm->spm[i]->sp); } void mpzspv_add (mpzspv_t r, spv_size_t r_offset, mpzspv_t x, spv_size_t x_offset, mpzspv_t y, spv_size_t y_offset, spv_size_t len, mpzspm_t mpzspm) { unsigned int i; ASSERT (mpzspv_verify (r, r_offset + len, 0, mpzspm)); ASSERT (mpzspv_verify (x, x_offset, len, mpzspm)); for (i = 0; i < mpzspm->sp_num; i++) spv_add (r[i] + r_offset, x[i] + x_offset, y[i] + y_offset, len, mpzspm->spm[i]->sp); } void mpzspv_reverse (mpzspv_t x, spv_size_t offset, spv_size_t len, mpzspm_t mpzspm) { unsigned int i; spv_size_t j; sp_t t; spv_t spv; ASSERT (mpzspv_verify (x, offset, len, mpzspm)); for (i = 0; i < mpzspm->sp_num; i++) { spv = x[i] + offset; for (j = 0; j < len - 1 - j; j++) { t = spv[j]; spv[j] = spv[len - 1 - j]; spv[len - 1 - j] = t; } } } /* Return {xp, xn} mod p. Assume 2p < B where B = 2^GMP_NUMB_LIMB. We first compute {xp, xn} / B^n mod p using Montgomery reduction, where the number N to factor has n limbs. Then we multiply by B^(n+1) mod p (precomputed) and divide by B mod p. Assume invm = -1/p mod B and Bpow = B^n mod p */ static mp_limb_t ecm_mod_1 (mp_ptr xp, mp_size_t xn, mp_limb_t p, mp_size_t n, mp_limb_t invm, mp_limb_t Bpow) { mp_limb_t q, cy, hi, lo, x0, x1; if (xn == 0) return 0; /* the code below assumes xn <= n+1, thus we call mpn_mod_1 otherwise, but this should never (or rarely) happen */ if (xn > n + 1) return mpn_mod_1 (xp, xn, p); x0 = xp[0]; cy = (mp_limb_t) 0; while (n-- > 0) { /* Invariant: cy is the input carry on xp[1], x0 is xp[0] */ x1 = (xn > 1) ? xp[1] : 0; q = x0 * invm; /* q = -x0/p mod B */ umul_ppmm (hi, lo, q, p); /* hi*B + lo = -x0 mod B */ /* Add hi*B + lo to x1*B + x0. Since p <= B-2 we have hi*B + lo <= (B-1)(B-2) = B^2-3B+2, thus hi <= B-3 */ hi += cy + (lo != 0); /* cannot overflow */ x0 = x1 + hi; cy = x0 < hi; xn --; xp ++; } if (cy != 0) x0 -= p; /* now x0 = {xp, xn} / B^n mod p */ umul_ppmm (x1, x0, x0, Bpow); /* since Bpow < p, x1 <= p-1 */ q = x0 * invm; umul_ppmm (hi, lo, q, p); /* hi <= p-1 thus hi+x1+1 < 2p-1 < B */ hi = hi + x1 + (lo != 0); while (hi >= p) hi -= p; return hi; } /* convert mpzvi to CRT representation, naive version */ static void mpzspv_from_mpzv_slow (mpzspv_t x, const spv_size_t offset, mpz_t mpzvi, mpzspm_t mpzspm) { const unsigned int sp_num = mpzspm->sp_num; unsigned int j; mp_size_t n = mpz_size (mpzspm->modulus); /* GMP's comments on mpn_preinv_mod_1: * * "This function used to be documented, but is now considered obsolete. It * continues to exist for binary compatibility, even when not required * internally." * * It doesn't accept 0 as the dividend so we have to treat this case * separately */ /* Note: we can't use the mul_c field for mpn_preinv_mod_1, since on 64-bit it is floor(2^125/sp) where sp has 62 bits, and mpn_preinv_mod_1 needs floor(2^128/(4*sp))-2^64 = floor(2^126/sp)-2^64. On 32-bit it is floor(2^62/sp) where sp has 31 bits, and mpn_preinv_mod_1 needs floor(2^64/(2*sp))-2^32 = floor(2^63/sp)-2^32. */ /* Note: we could improve this as follows. Assume the number N to factor has n limbs. Instead of computing v mod p by reducing v by the high limbs, we first compute v/B^(n-1) mod p by reducing v by the low limbs, then deduce v mod p using a precomputed value of B^(n-1) mod p. The reduction v/B is done by using a precomputed k = 1/B mod p, thus v1*B+v0 = (v1+k*v0)*B and so on. */ for (j = 0; j < sp_num; j++) x[j][offset] = ecm_mod_1 (PTR(mpzvi), SIZ(mpzvi), (mp_limb_t) mpzspm->spm[j]->sp, n, mpzspm->spm[j]->invm, mpzspm->spm[j]->Bpow); /* The typecast to mp_limb_t assumes that mp_limb_t is at least as wide as sp_t */ } /* convert mpzvi to CRT representation, fast version, assumes mpzspm->T has been precomputed (see mpzspm.c) */ static void mpzspv_from_mpzv_fast (mpzspv_t x, const spv_size_t offset, mpz_t mpzvi, mpzspm_t mpzspm) { const unsigned int sp_num = mpzspm->sp_num; unsigned int i, j, k, i0 = I0_THRESHOLD, I0; mpzv_t *T = mpzspm->T; unsigned int d = mpzspm->d, ni; ASSERT (d > i0); /* T[0] serves as vector of temporary mpz_t's, since it contains the small primes, which are also in mpzspm->spm[j]->sp */ /* initially we split mpzvi in two */ ni = 1 << (d - 1); mpz_mod (T[0][0], mpzvi, T[d-1][0]); mpz_mod (T[0][ni], mpzvi, T[d-1][1]); for (i = d-1; i-- > i0;) { /* goes down from depth i+1 to i */ ni = 1 << i; for (j = k = 0; j + ni < sp_num; j += 2*ni, k += 2) { mpz_mod (T[0][j+ni], T[0][j], T[i][k+1]); mpz_mod (T[0][j], T[0][j], T[i][k]); } /* for the last entry T[0][j] if j < sp_num, there is nothing to do */ } /* last steps */ I0 = 1 << i0; for (j = 0; j < sp_num; j += I0) for (k = j; k < j + I0 && k < sp_num; k++) x[k][offset] = mpn_mod_1 (PTR(T[0][j]), SIZ(T[0][j]), (mp_limb_t) mpzspm->spm[k]->sp); /* The typecast to mp_limb_t assumes that mp_limb_t is at least as wide as sp_t */ } /* convert an array of len mpz_t numbers to CRT representation modulo sp_num moduli */ void mpzspv_from_mpzv (mpzspv_t x, const spv_size_t offset, const mpzv_t mpzv, const spv_size_t len, mpzspm_t mpzspm) { const unsigned int sp_num = mpzspm->sp_num; long i; ASSERT (mpzspv_verify (x, offset + len, 0, mpzspm)); ASSERT (sizeof (mp_limb_t) >= sizeof (sp_t)); #if defined(_OPENMP) #pragma omp parallel private(i) if (len > 16384) { /* Multi-threading with dynamic scheduling slows things down */ #pragma omp for schedule(static) #endif for (i = 0; i < (long) len; i++) { unsigned int j; if (mpz_sgn (mpzv[i]) == 0) { for (j = 0; j < sp_num; j++) x[j][i + offset] = 0; } else { ASSERT(mpz_sgn (mpzv[i]) > 0); /* We can't handle negative values */ if (mpzspm->T == NULL) mpzspv_from_mpzv_slow (x, i + offset, mpzv[i], mpzspm); else mpzspv_from_mpzv_fast (x, i + offset, mpzv[i], mpzspm); } } #if defined(_OPENMP) } #endif } /* See: Daniel J. Bernstein and Jonathan P. Sorenson, * Modular Exponentiation via the explicit Chinese Remainder Theorem * * memory: MPZSPV_NORMALISE_STRIDE floats */ void mpzspv_to_mpzv (mpzspv_t x, spv_size_t offset, mpzv_t mpzv, spv_size_t len, mpzspm_t mpzspm) { unsigned int i; spv_size_t k, l; float *f = (float *) malloc (MPZSPV_NORMALISE_STRIDE * sizeof (float)); float prime_recip; sp_t t; spm_t *spm = mpzspm->spm; mpz_t mt; if (f == NULL) { fprintf (stderr, "Cannot allocate memory in mpzspv_to_mpzv\n"); exit (1); } ASSERT (mpzspv_verify (x, offset, len, mpzspm)); mpz_init (mt); for (l = 0; l < len; l += MPZSPV_NORMALISE_STRIDE) { spv_size_t stride = MIN (MPZSPV_NORMALISE_STRIDE, len - l); for (k = 0; k < stride; k++) { f[k] = 0.5; mpz_set_ui (mpzv[k + l], 0); } for (i = 0; i < mpzspm->sp_num; i++) { prime_recip = 1.0f / (float) spm[i]->sp; for (k = 0; k < stride; k++) { t = sp_mul (x[i][l + k + offset], mpzspm->crt3[i], spm[i]->sp, spm[i]->mul_c); if (sizeof (sp_t) > sizeof (unsigned long)) { mpz_set_sp (mt, t); mpz_addmul (mpzv[l + k], mpzspm->crt1[i], mt); } else { mpz_addmul_ui (mpzv[l + k], mpzspm->crt1[i], t); } f[k] += (float) t * prime_recip; } } for (k = 0; k < stride; k++) mpz_add (mpzv[l + k], mpzv[l + k], mpzspm->crt2[(unsigned int) f[k]]); } mpz_clear (mt); free (f); } void mpzspv_pwmul (mpzspv_t r, spv_size_t r_offset, mpzspv_t x, spv_size_t x_offset, mpzspv_t y, spv_size_t y_offset, spv_size_t len, mpzspm_t mpzspm) { unsigned int i; ASSERT (mpzspv_verify (r, r_offset + len, 0, mpzspm)); ASSERT (mpzspv_verify (x, x_offset, len, mpzspm)); ASSERT (mpzspv_verify (y, y_offset, len, mpzspm)); for (i = 0; i < mpzspm->sp_num; i++) spv_pwmul (r[i] + r_offset, x[i] + x_offset, y[i] + y_offset, len, mpzspm->spm[i]->sp, mpzspm->spm[i]->mul_c); } /* B&S: ecrt mod m mod p_j. * * memory: MPZSPV_NORMALISE_STRIDE mpzspv coeffs * 6 * MPZSPV_NORMALISE_STRIDE sp's * MPZSPV_NORMALISE_STRIDE floats */ void mpzspv_normalise (mpzspv_t x, spv_size_t offset, spv_size_t len, mpzspm_t mpzspm) { unsigned int i, j, sp_num = mpzspm->sp_num; spv_size_t k, l; sp_t v; spv_t s, d, w; spm_t *spm = mpzspm->spm; float prime_recip; float *f; mpzspv_t t; ASSERT (mpzspv_verify (x, offset, len, mpzspm)); f = (float *) malloc (MPZSPV_NORMALISE_STRIDE * sizeof (float)); s = (spv_t) malloc (3 * MPZSPV_NORMALISE_STRIDE * sizeof (sp_t)); d = (spv_t) malloc (3 * MPZSPV_NORMALISE_STRIDE * sizeof (sp_t)); if (f == NULL || s == NULL || d == NULL) { fprintf (stderr, "Cannot allocate memory in mpzspv_normalise\n"); exit (1); } t = mpzspv_init (MPZSPV_NORMALISE_STRIDE, mpzspm); memset (s, 0, 3 * MPZSPV_NORMALISE_STRIDE * sizeof (sp_t)); for (l = 0; l < len; l += MPZSPV_NORMALISE_STRIDE) { spv_size_t stride = MIN (MPZSPV_NORMALISE_STRIDE, len - l); /* FIXME: use B&S Theorem 2.2 */ for (k = 0; k < stride; k++) f[k] = 0.5; for (i = 0; i < sp_num; i++) { prime_recip = 1.0f / (float) spm[i]->sp; for (k = 0; k < stride; k++) { x[i][l + k + offset] = sp_mul (x[i][l + k + offset], mpzspm->crt3[i], spm[i]->sp, spm[i]->mul_c); f[k] += (float) x[i][l + k + offset] * prime_recip; } } for (i = 0; i < sp_num; i++) { for (k = 0; k < stride; k++) { umul_ppmm (d[3 * k + 1], d[3 * k], mpzspm->crt5[i], (sp_t) f[k]); d[3 * k + 2] = 0; } for (j = 0; j < sp_num; j++) { w = x[j] + offset; v = mpzspm->crt4[i][j]; for (k = 0; k < stride; k++) umul_ppmm (s[3 * k + 1], s[3 * k], w[k + l], v); /* this mpn_add_n accounts for about a third of the function's * runtime */ mpn_add_n (d, d, s, 3 * stride); } for (k = 0; k < stride; k++) t[i][k] = mpn_mod_1 (d + 3 * k, 3, spm[i]->sp); } mpzspv_set (x, l + offset, t, 0, stride, mpzspm); } mpzspv_clear (t, mpzspm); free (s); free (d); free (f); } void mpzspv_to_ntt (mpzspv_t x, spv_size_t offset, spv_size_t len, spv_size_t ntt_size, int monic, mpzspm_t mpzspm) { unsigned int i; spv_size_t j, log2_ntt_size; spm_t spm; spv_t spv; ASSERT (mpzspv_verify (x, offset, len, mpzspm)); ASSERT (mpzspv_verify (x, offset + ntt_size, 0, mpzspm)); log2_ntt_size = ceil_log_2 (ntt_size); for (i = 0; i < mpzspm->sp_num; i++) { spm = mpzspm->spm[i]; spv = x[i] + offset; if (ntt_size < len) { for (j = ntt_size; j < len; j += ntt_size) spv_add (spv, spv, spv + j, ntt_size, spm->sp); } if (ntt_size > len) spv_set_zero (spv + len, ntt_size - len); if (monic) spv[len % ntt_size] = sp_add (spv[len % ntt_size], 1, spm->sp); spv_ntt_gfp_dif (spv, log2_ntt_size, spm); } } void mpzspv_from_ntt (mpzspv_t x, spv_size_t offset, spv_size_t ntt_size, spv_size_t monic_pos, mpzspm_t mpzspm) { unsigned int i; spv_size_t log2_ntt_size; spm_t spm; spv_t spv; ASSERT (mpzspv_verify (x, offset, ntt_size, mpzspm)); log2_ntt_size = ceil_log_2 (ntt_size); for (i = 0; i < mpzspm->sp_num; i++) { spm = mpzspm->spm[i]; spv = x[i] + offset; spv_ntt_gfp_dit (spv, log2_ntt_size, spm); /* spm->sp - (spm->sp - 1) / ntt_size is the inverse of ntt_size */ spv_mul_sp (spv, spv, spm->sp - (spm->sp - 1) / ntt_size, ntt_size, spm->sp, spm->mul_c); if (monic_pos) spv[monic_pos % ntt_size] = sp_sub (spv[monic_pos % ntt_size], 1, spm->sp); } } void mpzspv_random (mpzspv_t x, spv_size_t offset, spv_size_t len, mpzspm_t mpzspm) { unsigned int i; ASSERT (mpzspv_verify (x, offset, len, mpzspm)); for (i = 0; i < mpzspm->sp_num; i++) spv_random (x[i] + offset, len, mpzspm->spm[i]->sp); } /* Do multiplication via NTT. Depending on the value of "steps", does in-place forward transform of x, in-place forward transform of y, pair-wise multiplication of x by y to r, in-place inverse transform of r. Contrary to calling these three operations separately, this function does all three steps on a small-prime vector at a time, resulting in slightly better cache efficiency (also in preparation to storing NTT vectors on disk and reading them in for the multiplication). */ void mpzspv_mul_ntt (mpzspv_t r, const spv_size_t offsetr, mpzspv_t x, const spv_size_t offsetx, const spv_size_t lenx, mpzspv_t y, const spv_size_t offsety, const spv_size_t leny, const spv_size_t ntt_size, const int monic, const spv_size_t monic_pos, mpzspm_t mpzspm, const int steps) { spv_size_t log2_ntt_size; int i; ASSERT (mpzspv_verify (x, offsetx, lenx, mpzspm)); ASSERT (mpzspv_verify (y, offsety, leny, mpzspm)); ASSERT (mpzspv_verify (x, offsetx + ntt_size, 0, mpzspm)); ASSERT (mpzspv_verify (y, offsety + ntt_size, 0, mpzspm)); ASSERT (mpzspv_verify (r, offsetr + ntt_size, 0, mpzspm)); log2_ntt_size = ceil_log_2 (ntt_size); /* Need parallelization at higher level (e.g., handling a branch of the product tree in one thread) to make this worthwhile for ECM */ #define MPZSPV_MUL_NTT_OPENMP 0 #if defined(_OPENMP) && MPZSPV_MUL_NTT_OPENMP #pragma omp parallel if (ntt_size > 16384) { #pragma omp for #endif for (i = 0; i < (int) mpzspm->sp_num; i++) { spv_size_t j; spm_t spm = mpzspm->spm[i]; spv_t spvr = r[i] + offsetr; spv_t spvx = x[i] + offsetx; spv_t spvy = y[i] + offsety; if ((steps & NTT_MUL_STEP_FFT1) != 0) { if (ntt_size < lenx) { for (j = ntt_size; j < lenx; j += ntt_size) spv_add (spvx, spvx, spvx + j, ntt_size, spm->sp); } if (ntt_size > lenx) spv_set_zero (spvx + lenx, ntt_size - lenx); if (monic) spvx[lenx % ntt_size] = sp_add (spvx[lenx % ntt_size], 1, spm->sp); spv_ntt_gfp_dif (spvx, log2_ntt_size, spm); } if ((steps & NTT_MUL_STEP_FFT2) != 0) { if (ntt_size < leny) { for (j = ntt_size; j < leny; j += ntt_size) spv_add (spvy, spvy, spvy + j, ntt_size, spm->sp); } if (ntt_size > leny) spv_set_zero (spvy + leny, ntt_size - leny); if (monic) spvy[leny % ntt_size] = sp_add (spvy[leny % ntt_size], 1, spm->sp); spv_ntt_gfp_dif (spvy, log2_ntt_size, spm); } if ((steps & NTT_MUL_STEP_MUL) != 0) { spv_pwmul (spvr, spvx, spvy, ntt_size, spm->sp, spm->mul_c); } if ((steps & NTT_MUL_STEP_IFFT) != 0) { ASSERT (sizeof (mp_limb_t) >= sizeof (sp_t)); spv_ntt_gfp_dit (spvr, log2_ntt_size, spm); /* spm->sp - (spm->sp - 1) / ntt_size is the inverse of ntt_size */ spv_mul_sp (spvr, spvr, spm->sp - (spm->sp - 1) / ntt_size, ntt_size, spm->sp, spm->mul_c); if (monic_pos) spvr[monic_pos % ntt_size] = sp_sub (spvr[monic_pos % ntt_size], 1, spm->sp); } } #if defined(_OPENMP) && MPZSPV_MUL_NTT_OPENMP } #endif } /* Computes a DCT-I of the length dctlen. Input is the spvlen coefficients in spv. tmp is temp space and must have space for 2*dctlen-2 sp_t's */ void mpzspv_to_dct1 (mpzspv_t dct, const mpzspv_t spv, const spv_size_t spvlen, const spv_size_t dctlen, mpzspv_t tmp, const mpzspm_t mpzspm) { const spv_size_t l = 2 * (dctlen - 1); /* Length for the DFT */ const spv_size_t log2_l = ceil_log_2 (l); int j; #ifdef _OPENMP #pragma omp parallel private(j) { #pragma omp for #endif for (j = 0; j < (int) mpzspm->sp_num; j++) { const spm_t spm = mpzspm->spm[j]; spv_size_t i; /* Make a symmetric copy of spv in tmp. I.e. with spv = [3, 2, 1], spvlen = 3, dctlen = 5 (hence l = 8), we want tmp = [3, 2, 1, 0, 0, 0, 1, 2] */ spv_set (tmp[j], spv[j], spvlen); spv_rev (tmp[j] + l - spvlen + 1, spv[j] + 1, spvlen - 1); /* Now we have [3, 2, 1, ?, ?, ?, 1, 2]. Fill the ?'s with zeros. */ spv_set_sp (tmp[j] + spvlen, (sp_t) 0, l - 2 * spvlen + 1); #if 0 printf ("mpzspv_to_dct1: tmp[%d] = [", j); for (i = 0; i < l; i++) printf ("%lu, ", tmp[j][i]); printf ("]\n"); #endif spv_ntt_gfp_dif (tmp[j], log2_l, spm); #if 0 printf ("mpzspv_to_dct1: tmp[%d] = [", j); for (i = 0; i < l; i++) printf ("%lu, ", tmp[j][i]); printf ("]\n"); #endif /* The forward transform is scrambled. We want elements [0 ... l/2] of the unscrabled data, that is all the coefficients with the most significant bit in the index (in log2(l) word size) unset, plus the element at index l/2. By scrambling, these map to the elements with even index, plus the element at index 1. The elements with scrambled index 2*i are stored in h[i], the element with scrambled index 1 is stored in h[params->l] */ #ifdef WANT_ASSERT /* Test that the coefficients are symmetric (if they were unscrambled) and that our algorithm for finding identical coefficients in the scrambled data works */ { spv_size_t m = 5; for (i = 2; i < l; i += 2L) { /* This works, but why? */ if (i + i / 2L > m) m = 2L * m + 1L; ASSERT (tmp[j][i] == tmp[j][m - i]); #if 0 printf ("mpzspv_to_dct1: DFT[%lu] == DFT[%lu]\n", i, m - i); #endif } } #endif /* Copy coefficients to dct buffer */ for (i = 0; i < l / 2; i++) dct[j][i] = tmp[j][i * 2]; dct[j][l / 2] = tmp[j][1]; } #ifdef _OPENMP } #endif } /* Multiply the polynomial in "dft" by the RLP in "dct", where "dft" contains the polynomial coefficients (not FFT'd yet) and "dct" contains the DCT-I coefficients of the RLP. The latter are assumed to be in the layout produced by mpzspv_to_dct1(). Output are the coefficients of the product polynomial, stored in dft. The "steps" parameter controls which steps are computed: NTT_MUL_STEP_FFT1: do forward transform NTT_MUL_STEP_MUL: do point-wise product NTT_MUL_STEP_IFFT: do inverse transform */ void mpzspv_mul_by_dct (mpzspv_t dft, const mpzspv_t dct, const spv_size_t len, const mpzspm_t mpzspm, const int steps) { int j; spv_size_t log2_len = ceil_log_2 (len); #ifdef _OPENMP #pragma omp parallel private(j) { #pragma omp for #endif for (j = 0; j < (int) (mpzspm->sp_num); j++) { const spm_t spm = mpzspm->spm[j]; const spv_t spv = dft[j]; unsigned long i, m; /* Forward DFT of dft[j] */ if ((steps & NTT_MUL_STEP_FFT1) != 0) spv_ntt_gfp_dif (spv, log2_len, spm); /* Point-wise product */ if ((steps & NTT_MUL_STEP_MUL) != 0) { m = 5UL; spv[0] = sp_mul (spv[0], dct[j][0], spm->sp, spm->mul_c); spv[1] = sp_mul (spv[1], dct[j][len / 2UL], spm->sp, spm->mul_c); for (i = 2UL; i < len; i += 2UL) { /* This works, but why? */ if (i + i / 2UL > m) m = 2UL * m + 1; spv[i] = sp_mul (spv[i], dct[j][i / 2UL], spm->sp, spm->mul_c); spv[m - i] = sp_mul (spv[m - i], dct[j][i / 2UL], spm->sp, spm->mul_c); } } /* Inverse transform of dft[j] */ if ((steps & NTT_MUL_STEP_IFFT) != 0) { spv_ntt_gfp_dit (spv, log2_len, spm); /* Divide by transform length. FIXME: scale the DCT of h instead */ spv_mul_sp (spv, spv, spm->sp - (spm->sp - 1) / len, len, spm->sp, spm->mul_c); } } #ifdef _OPENMP } #endif } void mpzspv_sqr_reciprocal (mpzspv_t dft, const spv_size_t n, const mpzspm_t mpzspm) { const spv_size_t log2_n = ceil_log_2 (n); const spv_size_t len = ((spv_size_t) 2) << log2_n; const spv_size_t log2_len = 1 + log2_n; int j; ASSERT(mpzspm->max_ntt_size % 3UL == 0UL); ASSERT(len % 3UL != 0UL); ASSERT(mpzspm->max_ntt_size % len == 0UL); #ifdef _OPENMP #pragma omp parallel { #pragma omp for #endif for (j = 0; j < (int) (mpzspm->sp_num); j++) { const spm_t spm = mpzspm->spm[j]; const spv_t spv = dft[j]; sp_t w1, w2, invlen; const sp_t sp = spm->sp, mul_c = spm->mul_c; spv_size_t i; /* Zero out NTT elements [n .. len-n] */ spv_set_sp (spv + n, (sp_t) 0, len - 2*n + 1); #ifdef TRACE_ntt_sqr_reciprocal if (j == 0) { printf ("ntt_sqr_reciprocal: NTT vector mod %lu\n", sp); ntt_print_vec ("ntt_sqr_reciprocal: before weighting:", spv, len); } #endif /* Compute the root for the weight signal, a 3rd primitive root of unity */ w1 = sp_pow (spm->prim_root, mpzspm->max_ntt_size / 3UL, sp, mul_c); /* Compute iw= 1/w */ w2 = sp_pow (spm->inv_prim_root, mpzspm->max_ntt_size / 3UL, sp, mul_c); #ifdef TRACE_ntt_sqr_reciprocal if (j == 0) printf ("w1 = %lu ,w2 = %lu\n", w1, w2); #endif ASSERT(sp_mul(w1, w2, sp, mul_c) == (sp_t) 1); ASSERT(w1 != (sp_t) 1); ASSERT(sp_pow (w1, 3UL, sp, mul_c) == (sp_t) 1); ASSERT(w2 != (sp_t) 1); ASSERT(sp_pow (w2, 3UL, sp, mul_c) == (sp_t) 1); /* Fill NTT elements spv[len-n+1 .. len-1] with coefficients and apply weight signal to spv[i] and spv[l-i] for 0 <= i < n Use the fact that w^i + w^{-i} = -1 if i != 0 (mod 3). */ for (i = 0; i + 2 < n; i += 3) { sp_t t, u; if (i > 0) spv[len - i] = spv[i]; t = spv[i + 1]; u = sp_mul (t, w1, sp, mul_c); spv[i + 1] = u; spv[len - i - 1] = sp_neg (sp_add (t, u, sp), sp); t = spv[i + 2]; u = sp_mul (t, w2, sp, mul_c); spv[i + 2] = u; spv[len - i - 2] = sp_neg (sp_add (t, u, sp), sp); } if (i < n && i > 0) { spv[len - i] = spv[i]; } if (i + 1 < n) { sp_t t, u; t = spv[i + 1]; u = sp_mul (t, w1, sp, mul_c); spv[i + 1] = u; spv[len - i - 1] = sp_neg (sp_add (t, u, sp), sp); } #ifdef TRACE_ntt_sqr_reciprocal if (j == 0) ntt_print_vec ("ntt_sqr_reciprocal: after weighting:", spv, len); #endif /* Forward DFT of dft[j] */ spv_ntt_gfp_dif (spv, log2_len, spm); #ifdef TRACE_ntt_sqr_reciprocal if (j == 0) ntt_print_vec ("ntt_sqr_reciprocal: after forward transform:", spv, len); #endif /* Square the transformed vector point-wise */ spv_pwmul (spv, spv, spv, len, sp, mul_c); #ifdef TRACE_ntt_sqr_reciprocal if (j == 0) ntt_print_vec ("ntt_sqr_reciprocal: after point-wise squaring:", spv, len); #endif /* Inverse transform of dft[j] */ spv_ntt_gfp_dit (spv, log2_len, spm); #ifdef TRACE_ntt_sqr_reciprocal if (j == 0) ntt_print_vec ("ntt_sqr_reciprocal: after inverse transform:", spv, len); #endif /* Un-weight and divide by transform length */ invlen = sp - (sp - (sp_t) 1) / len; /* invlen = 1/len (mod sp) */ w1 = sp_mul (invlen, w1, sp, mul_c); w2 = sp_mul (invlen, w2, sp, mul_c); for (i = 0; i < 2 * n - 3; i += 3) { spv[i] = sp_mul (spv[i], invlen, sp, mul_c); spv[i + 1] = sp_mul (spv[i + 1], w2, sp, mul_c); spv[i + 2] = sp_mul (spv[i + 2], w1, sp, mul_c); } if (i < 2 * n - 1) spv[i] = sp_mul (spv[i], invlen, sp, mul_c); if (i < 2 * n - 2) spv[i + 1] = sp_mul (spv[i + 1], w2, sp, mul_c); #ifdef TRACE_ntt_sqr_reciprocal if (j == 0) ntt_print_vec ("ntt_sqr_reciprocal: after un-weighting:", spv, len); #endif /* Separate the coefficients of R in the wrapped-around product. */ /* Set w1 = cuberoot(1)^l where cuberoot(1) is the same primitive 3rd root of unity we used for the weight signal */ w1 = sp_pow (spm->prim_root, mpzspm->max_ntt_size / 3UL, sp, mul_c); w1 = sp_pow (w1, len % 3UL, sp, mul_c); /* Set w2 = 1/(w1 - 1/w1). Incidentally, w2 = 1/sqrt(-3) */ w2 = sp_inv (w1, sp, mul_c); w2 = sp_sub (w1, w2, sp); w2 = sp_inv (w2, sp, mul_c); #ifdef TRACE_ntt_sqr_reciprocal if (j == 0) printf ("For separating: w1 = %lu, w2 = %lu\n", w1, w2); #endif for (i = len - (2*n - 2); i <= len / 2; i++) { sp_t t, u; /* spv[i] = s_i + w^{-l} s_{l-i}. spv[l-i] = s_{l-i} + w^{-l} s_i */ t = sp_mul (spv[i], w1, sp, mul_c); /* t = w^l s_i + s_{l-i} */ t = sp_sub (t, spv[len - i], sp); /* t = w^l s_i + w^{-l} s_i */ t = sp_mul (t, w2, sp, mul_c); /* t = s_1 */ u = sp_sub (spv[i], t, sp); /* u = w^{-l} s_{l-i} */ u = sp_mul (u, w1, sp, mul_c); /* u = s_{l-i} */ spv[i] = t; spv[len - i] = u; ASSERT(i < len / 2 || t == u); } #ifdef TRACE_ntt_sqr_reciprocal if (j == 0) ntt_print_vec ("ntt_sqr_reciprocal: after un-wrapping:", spv, len); #endif } #ifdef _OPENMP } #endif } ecm-6.4.4/acinclude.m40000644023561000001540000002514312106741274011415 00000000000000dnl Various routines adapted from gmp-4.1.4 define(X86_PATTERN, [[i?86*-*-* | k[5-8]*-*-* | pentium*-*-* | athlon-*-* | viac3*-*-*]]) dnl GMP_INIT([M4-DEF-FILE]) dnl ----------------------- dnl Initializations for GMP config.m4 generation. dnl dnl FIXME: The generated config.m4 doesn't get recreated by config.status. dnl Maybe the relevant "echo"s should go through AC_CONFIG_COMMANDS. AC_DEFUN([GMP_INIT], [ifelse([$1], , gmp_configm4=config.m4, gmp_configm4="[$1]") gmp_tmpconfigm4=cnfm4.tmp gmp_tmpconfigm4i=cnfm4i.tmp gmp_tmpconfigm4p=cnfm4p.tmp rm -f $gmp_tmpconfigm4 $gmp_tmpconfigm4i $gmp_tmpconfigm4p ]) dnl GMP_FINISH dnl ---------- dnl Create config.m4 from its accumulated parts. dnl dnl __CONFIG_M4_INCLUDED__ is used so that a second or subsequent include dnl of config.m4 is harmless. dnl dnl A separate ifdef on the angle bracket quoted part ensures the quoting dnl style there is respected. The basic defines from gmp_tmpconfigm4 are dnl fully quoted but are still put under an ifdef in case any have been dnl redefined by one of the m4 include files. dnl dnl Doing a big ifdef within asm-defs.m4 and/or other macro files wouldn't dnl work, since it'd interpret parentheses and quotes in dnl comments, and dnl having a whole file as a macro argument would overflow the string space dnl on BSD m4. AC_DEFUN([GMP_FINISH], [AC_REQUIRE([GMP_INIT]) echo "creating $gmp_configm4" echo ["d""nl $gmp_configm4. Generated automatically by configure."] > $gmp_configm4 if test -f $gmp_tmpconfigm4; then echo ["changequote(<,>)"] >> $gmp_configm4 echo ["ifdef(<__CONFIG_M4_INCLUDED__>,,<"] >> $gmp_configm4 cat $gmp_tmpconfigm4 >> $gmp_configm4 echo [">)"] >> $gmp_configm4 echo ["changequote(\`,')"] >> $gmp_configm4 rm $gmp_tmpconfigm4 fi echo ["ifdef(\`__CONFIG_M4_INCLUDED__',,\`"] >> $gmp_configm4 if test -f $gmp_tmpconfigm4i; then cat $gmp_tmpconfigm4i >> $gmp_configm4 rm $gmp_tmpconfigm4i fi if test -f $gmp_tmpconfigm4p; then cat $gmp_tmpconfigm4p >> $gmp_configm4 rm $gmp_tmpconfigm4p fi echo ["')"] >> $gmp_configm4 echo ["define(\`__CONFIG_M4_INCLUDED__')"] >> $gmp_configm4 ]) dnl GMP_PROG_M4 dnl ----------- dnl Find a working m4, either in $PATH or likely locations, and setup $M4 dnl and an AC_SUBST accordingly. If $M4 is already set then it's a user dnl choice and is accepted with no checks. GMP_PROG_M4 is like dnl AC_PATH_PROG or AC_CHECK_PROG, but tests each m4 found to see if it's dnl good enough. dnl dnl See mpn/asm-defs.m4 for details on the known bad m4s. AC_DEFUN([GMP_PROG_M4], [AC_ARG_VAR(M4,[m4 macro processor]) AC_CACHE_CHECK([for suitable m4], gmp_cv_prog_m4, [if test -n "$M4"; then gmp_cv_prog_m4="$M4" else cat >conftest.m4 <<\EOF dnl Must protect this against being expanded during autoconf m4! dnl Dont put "dnl"s in this as autoconf will flag an error for unexpanded dnl macros. [define(dollarhash,``$][#'')ifelse(dollarhash(x),1,`define(t1,Y)', ``bad: $][# not supported (SunOS /usr/bin/m4) '')ifelse(eval(89),89,`define(t2,Y)', `bad: eval() doesnt support 8 or 9 in a constant (OpenBSD 2.6 m4) ')ifelse(t1`'t2,YY,`good ')] EOF dnl ' <- balance the quotes for emacs sh-mode echo "trying m4" >&AC_FD_CC gmp_tmp_val=`(m4 conftest.m4) 2>&AC_FD_CC` echo "$gmp_tmp_val" >&AC_FD_CC if test "$gmp_tmp_val" = good; then gmp_cv_prog_m4="m4" else IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" dnl $ac_dummy forces splitting on constant user-supplied paths. dnl POSIX.2 word splitting is done only on the output of word expansions, dnl not every word. This closes a longstanding sh security hole. ac_dummy="$PATH:/usr/5bin" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. echo "trying $ac_dir/m4" >&AC_FD_CC gmp_tmp_val=`($ac_dir/m4 conftest.m4) 2>&AC_FD_CC` echo "$gmp_tmp_val" >&AC_FD_CC if test "$gmp_tmp_val" = good; then gmp_cv_prog_m4="$ac_dir/m4" break fi done IFS="$ac_save_ifs" if test -z "$gmp_cv_prog_m4"; then AC_MSG_ERROR([No usable m4 in \$PATH or /usr/5bin (see config.log for reasons).]) fi fi rm -f conftest.m4 fi]) M4="$gmp_cv_prog_m4" AC_SUBST(M4) ]) dnl GMP_DEFINE(MACRO, DEFINITION [, LOCATION]) dnl ------------------------------------------ dnl Define M4 macro MACRO as DEFINITION in temporary file. dnl dnl If LOCATION is `POST', the definition will appear after any include() dnl directives inserted by GMP_INCLUDE. Mind the quoting! No shell dnl variables will get expanded. Don't forget to invoke GMP_FINISH to dnl create file config.m4. config.m4 uses `<' and '>' as quote characters dnl for all defines. AC_DEFUN([GMP_DEFINE], [AC_REQUIRE([GMP_INIT]) echo ['define(<$1>, <$2>)'] >>ifelse([$3], [POST], $gmp_tmpconfigm4p, $gmp_tmpconfigm4) ]) dnl GMP_TRY_ASSEMBLE(asm-code,[action-success][,action-fail]) dnl ---------------------------------------------------------- dnl Attempt to assemble the given code. dnl Do "action-success" if this succeeds, "action-fail" if not. dnl dnl conftest.o and conftest.out are available for inspection in dnl "action-success". If either action does a "break" out of a loop then dnl an explicit "rm -f conftest*" will be necessary. dnl dnl This is not unlike AC_TRY_COMPILE, but there's no default includes or dnl anything in "asm-code", everything wanted must be given explicitly. AC_DEFUN([GMP_TRY_ASSEMBLE], [cat >conftest.s <&AC_FD_CC ifelse([$2],,:,[$2]) else cat conftest.out >&AC_FD_CC echo "configure: failed program was:" >&AC_FD_CC cat conftest.s >&AC_FD_CC ifelse([$3],,:,[$3]) fi rm -f conftest* ]) dnl GMP_ASM_TYPE dnl ------------ dnl Can we say ".type", and how? dnl dnl For i386 GNU/Linux ELF systems, and very likely other ELF systems, dnl .type and .size are important on functions in shared libraries. If dnl .type is omitted and the mainline program references that function then dnl the code will be copied down to the mainline at load time like a piece dnl of data. If .size is wrong or missing (it defaults to 4 bytes or some dnl such) then incorrect bytes will be copied and a segv is the most likely dnl result. In any case such copying is not what's wanted, a .type dnl directive will ensure a PLT entry is used. dnl dnl In GMP the assembler functions are normally only used from within the dnl library (since most programs are not interested in the low level dnl routines), and in those circumstances a missing .type isn't fatal, dnl letting the problem go unnoticed. tests/mpn/t-asmtype.c aims to check dnl for it. AC_DEFUN([GMP_ASM_TYPE], [AC_CACHE_CHECK([for assembler .type directive], gmp_cv_asm_type, [gmp_cv_asm_type= for gmp_tmp_prefix in @ \# %; do GMP_TRY_ASSEMBLE([ .type sym,${gmp_tmp_prefix}function], [if grep "\.type pseudo-op used outside of \.def/\.endef ignored" conftest.out >/dev/null; then : ; else gmp_cv_asm_type=".type \$][1,${gmp_tmp_prefix}\$][2" break fi]) done rm -f conftest* ]) echo ["define(, <$gmp_cv_asm_type>)"] >> $gmp_tmpconfigm4 ]) dnl GMP_ASM_GLOBL dnl ------------- dnl Can we say `.global'? AC_DEFUN([GMP_ASM_GLOBL], [AC_CACHE_CHECK([how to export a symbol], gmp_cv_asm_globl, [case $host in *-*-hpux*) gmp_cv_asm_globl=".export" ;; *) gmp_cv_asm_globl=".globl" ;; esac ]) echo ["define(, <$gmp_cv_asm_globl>)"] >> $gmp_tmpconfigm4 ]) dnl GMP_ASM_TEXT dnl ------------ AC_DEFUN([GMP_ASM_TEXT], [AC_CACHE_CHECK([how to switch to text section], gmp_cv_asm_text, [case $host in *-*-aix*) gmp_cv_asm_text=[".csect .text[PR]"] ;; *-*-hpux*) gmp_cv_asm_text=".code" ;; *) gmp_cv_asm_text=".text" ;; esac ]) echo ["define(, <$gmp_cv_asm_text>)"] >> $gmp_tmpconfigm4 ]) dnl GMP_ASM_LABEL_SUFFIX dnl -------------------- dnl Should a label have a colon or not? AC_DEFUN([GMP_ASM_LABEL_SUFFIX], [AC_CACHE_CHECK([what assembly label suffix to use], gmp_cv_asm_label_suffix, [case $host in # Empty is only for the HP-UX hppa assembler; hppa gas requires a colon. *-*-hpux*) gmp_cv_asm_label_suffix= ;; *) gmp_cv_asm_label_suffix=: ;; esac ]) echo ["define(, <\$][1$gmp_cv_asm_label_suffix>)"] >> $gmp_tmpconfigm4 ]) dnl ECM_INCLUDE(FILE) dnl --------------------- dnl Add an include_mpn() to config.m4. FILE should be a path dnl relative to the main source directory, for example dnl dnl ECM_INCLUDE(`powerpc64/defs.m4') dnl AC_DEFUN([ECM_INCLUDE], [AC_REQUIRE([GMP_INIT]) echo ["include($1)"] >> $gmp_tmpconfigm4 ]) dnl GMP_ASM_UNDERSCORE dnl ------------------ dnl Determine whether global symbols need to be prefixed with an underscore. dnl A test program is linked to an assembler module with or without an dnl underscore to see which works. dnl dnl This method should be more reliable than grepping a .o file or using dnl nm, since it corresponds to what a real program is going to do. Note dnl in particular that grepping doesn't work with SunOS 4 native grep since dnl that grep seems to have trouble with '\0's in files. AC_DEFUN([GMP_ASM_UNDERSCORE], [AC_REQUIRE([GMP_ASM_TEXT]) AC_REQUIRE([GMP_ASM_GLOBL]) AC_REQUIRE([GMP_ASM_LABEL_SUFFIX]) AC_CACHE_CHECK([if globals are prefixed by underscore], gmp_cv_asm_underscore, [cat >conftes1.c <conftes2.s <>conftes2.s < #include #include /* GMP header file */ #include "ecm.h" /* ecm header file */ int main (int argc, char *argv[]) { mpz_t n, f; int res; double B1; if (argc != 3) { fprintf (stderr, "Usage: ecmfactor \n"); exit (1); } mpz_init (n); /* read number on command line */ if (mpz_set_str (n, argv[1], 10)) { fprintf (stderr, "Invalid number: %s\n", argv[1]); exit (1); } B1 = atof (argv[2]); mpz_init (f); /* for potential factor */ printf ("Performing one curve with B1=%1.0f\n", B1); res = ecm_factor (f, n, B1, NULL); if (res > 0) { printf ("found factor in step %u: ", res); mpz_out_str (stdout, 10, f); printf ("\n"); #if 0 printf ("lucky curve was b*y^2 = x^3 + a*x^2 + x\n"); printf ("with a = (v-u)^3*(3*u+v)/(4*u^3*v)-2,"); printf (" u = sigma^2-5, v = 4*sigma\n"); #endif } else if (res == ECM_NO_FACTOR_FOUND) printf ("found no factor\n"); else printf ("error\n"); mpz_clear (f); mpz_clear (n); return 0; } ecm-6.4.4/ChangeLog0000644023561000001540000135304412113421551010772 00000000000000------------------------------------------------------------------------ r2438 | kruppa | 2013-02-27 16:16:07 +0100 (Wed, 27 Feb 2013) | 3 lines Remove -t option from man page Makefile should look for ecm.xml in $(source)/ ------------------------------------------------------------------------ r2436 | kruppa | 2013-02-26 19:46:56 +0100 (Tue, 26 Feb 2013) | 2 lines Replaced several alloca() by malloc() to avoid segfault with very large P+-1 stage 2 ------------------------------------------------------------------------ r2435 | kruppa | 2013-02-26 19:44:05 +0100 (Tue, 26 Feb 2013) | 2 lines Removed memory lead due to surplus mpres_init() ------------------------------------------------------------------------ r2434 | zimmerma | 2013-02-22 15:07:14 +0100 (Fri, 22 Feb 2013) | 2 lines [main.c] removed -t option (should have been removed in r1860) ------------------------------------------------------------------------ r2433 | kruppa | 2013-02-22 13:26:09 +0100 (Fri, 22 Feb 2013) | 3 lines Use malloc() instead of alloca() for tmp in mpn_fft_fft_bailey_decompose() to avoid segfault with very large stage 2 ------------------------------------------------------------------------ r2422 | zimmerma | 2013-02-19 21:04:41 +0100 (Tue, 19 Feb 2013) | 2 lines [champions.h] updated for ECM ------------------------------------------------------------------------ r2421 | zimmerma | 2013-02-19 21:02:29 +0100 (Tue, 19 Feb 2013) | 2 lines [ChangeLog] updated ------------------------------------------------------------------------ r2419 | zimmerma | 2013-02-19 20:58:19 +0100 (Tue, 19 Feb 2013) | 2 lines [INSTALL-ecm] updated ------------------------------------------------------------------------ r2417 | zimmerma | 2013-02-19 20:42:03 +0100 (Tue, 19 Feb 2013) | 2 lines [NEWS] updated ------------------------------------------------------------------------ r2416 | kruppa | 2013-02-19 19:42:04 +0100 (Tue, 19 Feb 2013) | 5 lines Merged r1971 from trunk: applied patch from Leif Leonhardy to make the assembly code work with --enable-shared (see http://trac.sagemath.org/sage_trac/ticket/11705) ------------------------------------------------------------------------ r2414 | kruppa | 2013-02-19 17:36:35 +0100 (Tue, 19 Feb 2013) | 2 lines Updated changelog ------------------------------------------------------------------------ r2413 | kruppa | 2013-02-19 16:15:15 +0100 (Tue, 19 Feb 2013) | 2 lines Test was backwards :( ------------------------------------------------------------------------ r2412 | kruppa | 2013-02-19 16:10:27 +0100 (Tue, 19 Feb 2013) | 2 lines Define correct __gmpn_redc_{12} prototype for GMP <5.1. Define REDC{12} macros only if the functions exist. ------------------------------------------------------------------------ r2411 | kruppa | 2013-02-19 13:29:43 +0100 (Tue, 19 Feb 2013) | 2 lines Use defined() for HAVE_ALLOCA_H test ------------------------------------------------------------------------ r2407 | kruppa | 2013-02-15 14:22:50 +0100 (Fri, 15 Feb 2013) | 2 lines MinGW has alloca() prototype in malloc.h ------------------------------------------------------------------------ r2406 | kruppa | 2013-02-14 13:08:03 +0100 (Thu, 14 Feb 2013) | 3 lines Merging r2344: Remove stray '$' ------------------------------------------------------------------------ r2405 | kruppa | 2013-02-13 19:18:14 +0100 (Wed, 13 Feb 2013) | 2 lines Update version numbers for 6.4.4 ------------------------------------------------------------------------ r2404 | kruppa | 2013-02-13 18:31:09 +0100 (Wed, 13 Feb 2013) | 2 lines Fixed typo ------------------------------------------------------------------------ r2402 | kruppa | 2013-02-13 17:55:16 +0100 (Wed, 13 Feb 2013) | 2 lines Merge r2401 into 6.4.4 branch ------------------------------------------------------------------------ r2400 | kruppa | 2013-02-13 17:49:56 +0100 (Wed, 13 Feb 2013) | 2 lines Merge of commit r2320 ------------------------------------------------------------------------ r2399 | kruppa | 2013-02-13 17:42:57 +0100 (Wed, 13 Feb 2013) | 1 line Branch for patchlevel release 6.4.4 ------------------------------------------------------------------------ r2096 | kruppa | 2012-06-12 17:35:10 +0200 (Tue, 12 Jun 2012) | 2 lines Changed paths: M /branches/6.4.3/build.vc10/Makefile.am Removed bench_mulredc from EXTRA_DIST to avoid including .svn dir in distribution ------------------------------------------------------------------------ r2095 | kruppa | 2012-06-12 17:26:17 +0200 (Tue, 12 Jun 2012) | 2 lines Changed paths: M /branches/6.4.3/TODO M /branches/6.4.3/ecm.1 M /branches/6.4.3/ecm.xml Remove incorrect hypen in "Peter Lawrence Montgomery" ------------------------------------------------------------------------ r2094 | brian_gladman | 2012-06-12 17:23:55 +0200 (Tue, 12 Jun 2012) | 1 line Changed paths: M /branches/6.4.3/build.vc10/tests.py Add a few more tests that I missed earlier ------------------------------------------------------------------------ r2093 | brian_gladman | 2012-06-12 16:45:25 +0200 (Tue, 12 Jun 2012) | 1 line Changed paths: M /branches/6.4.3/build.vc10/config.h M /branches/6.4.3/build.vc10/readme.txt M /branches/6.4.3/build.vc10/tests.py Minor update to Windows files and add new tests ------------------------------------------------------------------------ r2090 | kruppa | 2012-06-12 14:48:43 +0200 (Tue, 12 Jun 2012) | 2 lines Changed paths: M /trunk/test.pm1 Added test for bug fixed in revision 2068 ------------------------------------------------------------------------ r2088 | kruppa | 2012-06-12 14:16:03 +0200 (Tue, 12 Jun 2012) | 2 lines Changed paths: M /trunk/NEWS Updated for 6.4.3 release ------------------------------------------------------------------------ r2082 | kruppa | 2012-06-07 14:20:20 +0200 (Thu, 07 Jun 2012) | 2 lines Changed paths: M /trunk/cudawrapper.c M /trunk/ecm-ecm.h M /trunk/ecm-impl.h M /trunk/factor.c M /trunk/main.c M /trunk/random.c Make random value more 64-bit-like, following suggestion by Jayson King ------------------------------------------------------------------------ r2072 | zimmerma | 2012-06-04 10:33:24 +0200 (Mon, 04 Jun 2012) | 2 lines Changed paths: M /trunk/NEWS [NEWS] added item ------------------------------------------------------------------------ r2068 | kruppa | 2012-06-01 23:15:44 +0200 (Fri, 01 Jun 2012) | 2 lines Changed paths: M /trunk/mpzspm.c Replace mpz_init_set_ui() by mpz_set_sp() so it works under Windows ------------------------------------------------------------------------ r1878 | zimmerma | 2012-03-19 10:11:57 +0100 (Mon, 19 Mar 2012) | 2 lines Changed paths: M /trunk/ecm-params.h.athlon64 M /trunk/ecm-params.h.core2 M /trunk/ecm-params.h.corei5 updated default tuning parameters ------------------------------------------------------------------------ r1877 | bouvierc | 2012-03-18 22:31:30 +0100 (Sun, 18 Mar 2012) | 2 lines Changed paths: D /trunk/gpu/gpu_ecm_cc13 Remove old gpu code. ------------------------------------------------------------------------ r1876 | brian_gladman | 2012-03-17 22:42:08 +0100 (Sat, 17 Mar 2012) | 1 line Changed paths: M /trunk/build.vc10/ecm/ecm.vcxproj M /trunk/build.vc10/ecm/ecm.vcxproj.filters remove trial.c from VC++ build ------------------------------------------------------------------------ r1875 | zimmerma | 2012-03-17 10:36:43 +0100 (Sat, 17 Mar 2012) | 3 lines Changed paths: M /trunk/NEWS M /trunk/mpzspm.c M /trunk/mpzspv.c M /trunk/sp.h M /trunk/spm.c implement new LSB reduction of residues mod small primes in mpzspv.c, yields significant speedup ------------------------------------------------------------------------ r1874 | zimmerma | 2012-03-17 08:08:46 +0100 (Sat, 17 Mar 2012) | 3 lines Changed paths: M /trunk/batch.c M /trunk/ellparam_batch.c [batch.c] fixed copyright years and typo [ellparam_batch.c] fixed copyright years ------------------------------------------------------------------------ r1873 | bouvierc | 2012-03-16 18:38:41 +0100 (Fri, 16 Mar 2012) | 2 lines Changed paths: M /trunk/batch.c M /trunk/ellparam_batch.c Wrong names in the Copyright. ------------------------------------------------------------------------ r1872 | zimmerma | 2012-03-16 18:35:39 +0100 (Fri, 16 Mar 2012) | 2 lines Changed paths: D /trunk/gpu/getprime.c D /trunk/gpu/getprime.h D /trunk/gpu/makefile D /trunk/gpu/modular_arithmetic.c D /trunk/gpu/modular_arithmetic.h D /trunk/gpu/prototype.c D /trunk/gpu/prototype.h D /trunk/gpu/stage1-c.c D /trunk/gpu/stage1.c removed obsolete files ------------------------------------------------------------------------ r1871 | bouvierc | 2012-03-16 18:08:34 +0100 (Fri, 16 Mar 2012) | 2 lines Changed paths: M /trunk/batch.c M /trunk/ellparam_batch.c M /trunk/main.c Add licence in batch.c and ellparam_batch.c ------------------------------------------------------------------------ r1867 | zimmerma | 2012-03-16 17:03:40 +0100 (Fri, 16 Mar 2012) | 2 lines Changed paths: M /trunk/Fgw.c M /trunk/Makefile.am M /trunk/NEWS M /trunk/auxarith.c M /trunk/auxi.c M /trunk/auxlib.c M /trunk/b1_ainc.c M /trunk/bestd.c M /trunk/build.vc10/config.h M /trunk/candi.c M /trunk/configure.in D /trunk/countsmooth.c M /trunk/ecm-ecm.h M /trunk/ecm-gmp.h M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/ecm.h M /trunk/ecm2.c M /trunk/ecm_ntt.c M /trunk/ecmfactor.c M /trunk/eval.c M /trunk/factor.c M /trunk/getprime.c M /trunk/gpu/getprime.c M /trunk/gpu/modular_arithmetic.c M /trunk/gpu/prototype.c M /trunk/gpu/stage1-c.c M /trunk/gpu/stage1.c M /trunk/ks-multiply.c M /trunk/listz.c M /trunk/lucas.c M /trunk/main.c M /trunk/median.c M /trunk/memory.c M /trunk/mpmod.c M /trunk/mpmod.h M /trunk/mpzspm.c M /trunk/mpzspv.c M /trunk/mul_fft.c M /trunk/mul_lo.c A /trunk/nodist/rho.gp (from /trunk/rho.gp:1864) M /trunk/ntt_gfp.c M /trunk/pm1.c M /trunk/pm1fs2.c M /trunk/polyeval.c M /trunk/powerpc64/mulredc.m4 M /trunk/powerpc64/mulredc_1_2.m4 M /trunk/powerpc64/redc.asm M /trunk/pp1.c M /trunk/random.c M /trunk/resume.c M /trunk/rho.c D /trunk/rho.gp D /trunk/runecm2.c M /trunk/schoen_strass.c M /trunk/sets_long.c M /trunk/sp.c M /trunk/sp.h M /trunk/spm.c M /trunk/spv.c M /trunk/stage2.c M /trunk/test.ecm M /trunk/test.pm1 M /trunk/test.pp1 M /trunk/testlong.pp1 M /trunk/toomcook.c M /trunk/tune.c updated the copyright headers to GPL 3 and LGPL 3 ------------------------------------------------------------------------ r1866 | zimmerma | 2012-03-16 16:05:59 +0100 (Fri, 16 Mar 2012) | 2 lines Changed paths: A /trunk/nodist/countsmooth.c (from /trunk/countsmooth.c:1864) A /trunk/nodist/runecm2.c (from /trunk/runecm2.c:1864) moved countsmooth.c and runecm2.c to nodist ------------------------------------------------------------------------ r1865 | zimmerma | 2012-03-16 16:04:58 +0100 (Fri, 16 Mar 2012) | 2 lines Changed paths: A /trunk/nodist [nodist] new directory for non-distributed files ------------------------------------------------------------------------ r1864 | bouvierc | 2012-03-16 14:50:33 +0100 (Fri, 16 Mar 2012) | 3 lines Changed paths: M /trunk/gpu/gpu_ecm/Makefile M /trunk/gpu/gpu_ecm/utils.h Forgot to commit the new Makefile! Now include also directly ecm-ecm.h in gpu/gpu_ecm/utils.h ------------------------------------------------------------------------ r1863 | bouvierc | 2012-03-16 14:37:12 +0100 (Fri, 16 Mar 2012) | 7 lines Changed paths: M /trunk/batch.c M /trunk/ecm-impl.h M /trunk/gpu/gpu_ecm/main.c M /trunk/gpu/gpu_ecm/utils.h M /trunk/random.c Rewriting Makefile of GPUECM to separate libecm file from ecm-ecm file. [Makefile] Separate file from libecm. Goal: linking directly libecm. [batch.c] Now only include ecm-impl.h [random.c] No outputf in GPUECM [main.c] change #define to avoid conflicts ------------------------------------------------------------------------ r1862 | zimmerma | 2012-03-16 10:21:32 +0100 (Fri, 16 Mar 2012) | 2 lines Changed paths: D /trunk/ecmfactor2.c [ecmfactor2.c] removed unmaintained program ------------------------------------------------------------------------ r1861 | zimmerma | 2012-03-16 10:16:14 +0100 (Fri, 16 Mar 2012) | 2 lines Changed paths: D /trunk/bestdaux.c [bestdaux.c] removed unused file ------------------------------------------------------------------------ r1860 | zimmerma | 2012-03-16 10:05:05 +0100 (Fri, 16 Mar 2012) | 2 lines Changed paths: D /trunk/trial.c [trial.c] removed unmaintained and untested file ------------------------------------------------------------------------ r1859 | zimmerma | 2012-03-16 09:39:46 +0100 (Fri, 16 Mar 2012) | 4 lines Changed paths: M /trunk/Makefile.am M /trunk/mpmod.c M /trunk/test.ecm [test.ecm] added one test [mpmod.c] added an ASSERT [Makefile.am] missing tab ------------------------------------------------------------------------ r1858 | zimmerma | 2012-03-16 08:51:11 +0100 (Fri, 16 Mar 2012) | 2 lines Changed paths: M /trunk/tune.c [tune.c] removed trailing blank ------------------------------------------------------------------------ r1857 | zimmerma | 2012-03-16 08:13:22 +0100 (Fri, 16 Mar 2012) | 4 lines Changed paths: M /trunk/Makefile.am M /trunk/test.pm1 M /trunk/test.pp1 [test.pp1] added one test and removed blank line at the end [test.pm1] removed blank line at the end [Makefile.am] added blank line between tests ------------------------------------------------------------------------ r1856 | bouvierc | 2012-03-15 19:05:05 +0100 (Thu, 15 Mar 2012) | 2 lines Changed paths: M /trunk/batch.c Forgot to remove unused variables. ------------------------------------------------------------------------ r1855 | zimmerma | 2012-03-15 19:00:20 +0100 (Thu, 15 Mar 2012) | 2 lines Changed paths: M /trunk/mpmod.c [mpmod.c] fixed wrong patch in r1851 ------------------------------------------------------------------------ r1854 | bouvierc | 2012-03-15 16:01:21 +0100 (Thu, 15 Mar 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/cudakernel.cu Changes in Cuda_Dbl_mod ------------------------------------------------------------------------ r1853 | bouvierc | 2012-03-15 13:29:24 +0100 (Thu, 15 Mar 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/cudakernel.cu Rewrite Normalize function. ------------------------------------------------------------------------ r1852 | bouvierc | 2012-03-15 13:27:47 +0100 (Thu, 15 Mar 2012) | 2 lines Changed paths: M /trunk/batch.c Rewrite dup_add_batch2 with 6 residues (like dup_add_batch1) ------------------------------------------------------------------------ r1851 | zimmerma | 2012-03-15 12:09:28 +0100 (Thu, 15 Mar 2012) | 5 lines Changed paths: M /trunk/batch.c M /trunk/mpmod.c [mpmod.c] fixed bug in ecm_redc_n (found on gcc45 and gcc61), probably a new release 6.4.2 is needed [batch.c] reduced number of auxiliary variables from 5 to 2 in dup_add_batch1 (remains to do the same in dup_add_batch2) ------------------------------------------------------------------------ r1850 | zimmerma | 2012-03-15 08:08:05 +0100 (Thu, 15 Mar 2012) | 4 lines Changed paths: M /trunk/NEWS M /trunk/build.vc10/config.h M /trunk/configure.in M /trunk/main.c M /trunk/test.ecm [configure.in,build.vc10/config.h] bump version to 7.0-dev [main.c] make batch=1 mode the default one for ECM [test.ecm] added -batch=0 where needed ------------------------------------------------------------------------ r1847 | zimmerma | 2012-03-14 23:19:47 +0100 (Wed, 14 Mar 2012) | 2 lines Changed paths: M /trunk/mpzspv.c [mpzspv.c] added note about possible improvement in mpzspv_from_mpzv_slow() ------------------------------------------------------------------------ r1846 | zimmerma | 2012-03-14 20:38:17 +0100 (Wed, 14 Mar 2012) | 2 lines Changed paths: M /trunk/README.dev [README.dev] added tag for 6.4.1 release ------------------------------------------------------------------------ r1844 | zimmerma | 2012-03-14 18:29:22 +0100 (Wed, 14 Mar 2012) | 2 lines Changed paths: M /trunk/ChangeLog M /trunk/build.vc10/config.h M /trunk/configure.in This will be the 6.4.1 release (if all final tests pass) ------------------------------------------------------------------------ r1842 | zimmerma | 2012-03-14 18:25:52 +0100 (Wed, 14 Mar 2012) | 5 lines Changed paths: M /trunk/configure.in M /trunk/ecm-impl.h M /trunk/ecm-params.h.hppa2.0 M /trunk/ecm-params.h.ia64 M /trunk/ecm-params.h.mips64el M /trunk/ecm-params.h.powerpc970 M /trunk/ecm-params.h.sparc64 updated various tuning parameters [configure.in] tuning parameters for ia64 and hppa2.0 were not used! Also fixed check for MPIR [ecm-impl.h] fixed // comment ------------------------------------------------------------------------ r1840 | zimmerma | 2012-03-14 16:46:05 +0100 (Wed, 14 Mar 2012) | 2 lines Changed paths: M /trunk/ecm.c [ecm.c] removed computation of number of MULs and SQRs, to save a few cycles ------------------------------------------------------------------------ r1839 | bouvierc | 2012-03-14 15:47:08 +0100 (Wed, 14 Mar 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/cudakernel.cu Change in modular substraction. ------------------------------------------------------------------------ r1838 | zimmerma | 2012-03-14 15:35:59 +0100 (Wed, 14 Mar 2012) | 2 lines Changed paths: M /trunk/tune.c [tune.c] check for failed memory allocation ------------------------------------------------------------------------ r1837 | bouvierc | 2012-03-14 15:28:56 +0100 (Wed, 14 Mar 2012) | 4 lines Changed paths: M /trunk/gpu/gpu_ecm/cudakernel.cu Add specific code for mul for CC 2.0 cards. Treat differently access to constant variables. -> results in significant speed-up. ------------------------------------------------------------------------ r1836 | bouvierc | 2012-03-14 15:04:39 +0100 (Wed, 14 Mar 2012) | 3 lines Changed paths: M /trunk/gpu/gpu_ecm/cudakernel.cu Rewriting addition, substraction and multiplication by 2. No comparison is needed anymore so Cuda_Cmp is removed. ------------------------------------------------------------------------ r1835 | bouvierc | 2012-03-14 14:53:30 +0100 (Wed, 14 Mar 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/cudakernel.cu Rewrite #define for asm statement. ------------------------------------------------------------------------ r1834 | zimmerma | 2012-03-14 09:27:53 +0100 (Wed, 14 Mar 2012) | 4 lines Changed paths: M /trunk/Makefile.am M /trunk/bench_mulredc.c [bench_mulredc.c] only print to stdout what goes to ecm-params.h so that we can do ./bench_mulredc >> ecm-params.h [Makefile.am] build bench_mulredc by default ------------------------------------------------------------------------ r1833 | zimmerma | 2012-03-13 21:04:49 +0100 (Tue, 13 Mar 2012) | 2 lines Changed paths: M /trunk/ecm-params.h.core2 [ecm-params.h.core2] updated ------------------------------------------------------------------------ r1832 | zimmerma | 2012-03-13 18:19:44 +0100 (Tue, 13 Mar 2012) | 3 lines Changed paths: M /trunk/ChangeLog M /trunk/build.vc10/config.h M /trunk/configure.in [configure.in,build.vc10/config.h] changed version to 6.4.1-rc3 [ChangeLog] updated ------------------------------------------------------------------------ r1831 | zimmerma | 2012-03-13 13:47:38 +0100 (Tue, 13 Mar 2012) | 2 lines Changed paths: M /trunk/mpzspv.c [mpzspv.c] removed call to ecm_bdiv_r_1 since it is not working ------------------------------------------------------------------------ r1830 | zimmerma | 2012-03-13 13:15:09 +0100 (Tue, 13 Mar 2012) | 5 lines Changed paths: M /trunk/ecm-params.h.corei5 M /trunk/mpzspv.c [ecm-params.h.corei5] updated [mpzspv.c] removed new code with mpn_preinv_mod_1() in mpzspv_from_mpzv_slow since it was wrong (added comment explaining why) added new ecm_bdiv_r_1 code (disabled for now) ------------------------------------------------------------------------ r1829 | brian_gladman | 2012-03-13 10:22:16 +0100 (Tue, 13 Mar 2012) | 1 line Changed paths: M /trunk/build.vc10/bench_mulredc/bench_mulredc.vcxproj M /trunk/build.vc10/bench_mulredc/bench_mulredc.vcxproj.filters M /trunk/sp.h ensure ATTRIBUTE_UNUSED is defined as empty for MSVC in sp.h ------------------------------------------------------------------------ r1828 | bouvierc | 2012-03-13 09:47:07 +0100 (Tue, 13 Mar 2012) | 3 lines Changed paths: M /trunk/gpu/gpu_ecm/Makefile M /trunk/gpu/gpu_ecm/cudakernel.cu [cudakernel.cu] Fix a bug [Makefile] Add 32-bit lib for CUDA ------------------------------------------------------------------------ r1827 | brian_gladman | 2012-03-13 09:43:40 +0100 (Tue, 13 Mar 2012) | 1 line Changed paths: M /trunk/build.vc10/bench_mulredc D /trunk/build.vc10/bench_mulredc/bench_mulredc.vcxproj.user remove file uploaded in error ------------------------------------------------------------------------ r1826 | zimmerma | 2012-03-13 09:24:34 +0100 (Tue, 13 Mar 2012) | 2 lines Changed paths: M /trunk/ecm-params.h.athlon64 [ecm-params.h.athlon64] redo tuning ------------------------------------------------------------------------ r1825 | zimmerma | 2012-03-13 08:46:17 +0100 (Tue, 13 Mar 2012) | 2 lines Changed paths: M /trunk/README M /trunk/bench_mulredc.c changes suggested by David Cleaver for Windows+MingW64+Msys ------------------------------------------------------------------------ r1824 | zimmerma | 2012-03-13 08:30:42 +0100 (Tue, 13 Mar 2012) | 2 lines Changed paths: M /trunk/sp.h [sp.h] removed more compiler warnings (on gcc110) ------------------------------------------------------------------------ r1823 | zimmerma | 2012-03-13 08:17:49 +0100 (Tue, 13 Mar 2012) | 2 lines Changed paths: M /trunk/eval.c [eval.c] fixed compiler warnings (found on gcc70) ------------------------------------------------------------------------ r1822 | zimmerma | 2012-03-12 23:00:07 +0100 (Mon, 12 Mar 2012) | 4 lines Changed paths: M /trunk/configure.in M /trunk/mpzspv.c M /trunk/spm.c [mpzspv.c] use __gmpn_preinv_mod_1 in mpzspv_from_mpzv_slow() [configure.in] recognize __gmpn_preinv_mod_1 [spm.c] fixed typo ------------------------------------------------------------------------ r1821 | zimmerma | 2012-03-12 19:59:40 +0100 (Mon, 12 Mar 2012) | 2 lines Changed paths: M /trunk/test.ecm [test.ecm] modified batch1 test which was not working on 32-bit ------------------------------------------------------------------------ r1820 | zimmerma | 2012-03-12 18:25:09 +0100 (Mon, 12 Mar 2012) | 2 lines Changed paths: M /trunk/test.ecm [test.ecm] added two non-regression tests for bug fixed by r1819 ------------------------------------------------------------------------ r1819 | zimmerma | 2012-03-12 17:43:22 +0100 (Mon, 12 Mar 2012) | 2 lines Changed paths: M /trunk/mpmod.c [mpmod.c] fixed bug in mpresn_addsub (found on gcc45) ------------------------------------------------------------------------ r1818 | zimmerma | 2012-03-12 16:18:22 +0100 (Mon, 12 Mar 2012) | 5 lines Changed paths: M /trunk/bench_mulredc.c [bench_mulredc.c] use same cputime() function as in auxlib.c (should work on Windows too) and setup number of iterations to get about 100ms for each test (avoid long time on slow computers) ------------------------------------------------------------------------ r1817 | zimmerma | 2012-03-12 14:48:29 +0100 (Mon, 12 Mar 2012) | 7 lines Changed paths: M /trunk/COPYING M /trunk/COPYING.LIB M /trunk/Makefile.am M /trunk/NEWS M /trunk/bench_mulredc.c M /trunk/longlong.h M /trunk/mpmod.c M /trunk/pm1fs2.c [COPYING,COPYING.LIB] switched to GPL v3 and LGPL v3 [bench_mulredc.c,Makefile.am] fix for --disable-asm-redc [NEWS] added new items [longlong.h] copied umul_ppmm code for MIPS from GMP 5.0.4 [mpmod.c] now use mulredc_1 when available [pm1fs2.c] fixed issue when sizeof(unsigned long) < sizeof(sp_t) ------------------------------------------------------------------------ r1816 | brian_gladman | 2012-03-12 12:28:43 +0100 (Mon, 12 Mar 2012) | 1 line Changed paths: D /trunk/build.vc10/ecm.cuda.sln A /trunk/build.vc10/gpu_ecm.sln (from /trunk/build.vc10/ecm.cuda.sln:1814) rename the Visual Studio solution for the gpu build to gpu_ecm ------------------------------------------------------------------------ r1815 | brian_gladman | 2012-03-12 12:21:16 +0100 (Mon, 12 Mar 2012) | 1 line Changed paths: A /trunk/build.vc10/getopt.c A /trunk/build.vc10/getopt.h add files needed for the gpu build with MS Visual Studio and Nvidia Nsight ------------------------------------------------------------------------ r1814 | brian_gladman | 2012-03-12 12:05:58 +0100 (Mon, 12 Mar 2012) | 1 line Changed paths: M /trunk/build.vc10/config.h update to match MPIR 2.5.1 (which now has mpn_redc_2) ------------------------------------------------------------------------ r1813 | zimmerma | 2012-03-12 09:25:24 +0100 (Mon, 12 Mar 2012) | 2 lines Changed paths: M /trunk/bench_mulredc.c [bench_mulredc.c] fixed compiler warning ------------------------------------------------------------------------ r1809 | zimmerma | 2012-03-11 18:55:19 +0100 (Sun, 11 Mar 2012) | 3 lines Changed paths: M /trunk/batch.c M /trunk/build.vc10/config.h M /trunk/configure.in [batch.c] fixed bug since d_1 might not fit in an "unsigned long" on Windows [configure.in,build.vc10/config.h] bump version number to 6.4.1-rc2 ------------------------------------------------------------------------ r1808 | zimmerma | 2012-03-10 11:47:44 +0100 (Sat, 10 Mar 2012) | 2 lines Changed paths: M /trunk/ecm-params.h.athlon64 M /trunk/ecm-params.h.core2 M /trunk/ecm-params.h.corei5 added tuning parameters for MPIR ------------------------------------------------------------------------ r1807 | zimmerma | 2012-03-10 11:26:20 +0100 (Sat, 10 Mar 2012) | 2 lines Changed paths: M /trunk/configure.in [configure.in] recognize if GMP is MPIR ------------------------------------------------------------------------ r1806 | zimmerma | 2012-03-10 10:49:20 +0100 (Sat, 10 Mar 2012) | 3 lines Changed paths: M /trunk/batch.c M /trunk/ecm-impl.h M /trunk/mpmod.c [mpmod.c] new function mpresn_unpad to normalize mpz_t values [batch.c] don't forget to normalize x1 and z1 at the end!!! ------------------------------------------------------------------------ r1805 | zimmerma | 2012-03-10 10:33:46 +0100 (Sat, 10 Mar 2012) | 2 lines Changed paths: M /trunk/test.ecm [test.ecm] fixed for new batch1 reduction ------------------------------------------------------------------------ r1804 | zimmerma | 2012-03-09 22:50:17 +0100 (Fri, 09 Mar 2012) | 2 lines Changed paths: M /trunk/README M /trunk/bench_mulredc.c M /trunk/configure.in M /trunk/ecm-gmp.h M /trunk/ecm-params.h.athlon64 M /trunk/ecm-params.h.core2 M /trunk/ecm-params.h.corei5 M /trunk/mpmod.c M /trunk/mpmod.h M /trunk/test_mulredc.c M /trunk/x86_64/Makefile.am removed ecm_redc3 code from x86_64 (variable-size REDC assembly code) ------------------------------------------------------------------------ r1802 | zimmerma | 2012-03-09 18:21:09 +0100 (Fri, 09 Mar 2012) | 2 lines Changed paths: M /trunk/mpmod.c [mpmod.c] fixed error when __gmpn_add_nc is not defined ------------------------------------------------------------------------ r1801 | zimmerma | 2012-03-09 18:13:48 +0100 (Fri, 09 Mar 2012) | 5 lines Changed paths: M /trunk/batch.c M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/main.c M /trunk/mpmod.c M /trunk/test.ecm Change the definition of d in batch1 mode from mp_limb_t to mp_limb_t/B so that we can use LSB (Montgomery) division instead of MSB division, this gives a nice speedup. However this changes the input parameter A, and some tests are still failing. Do not use this version in production! ------------------------------------------------------------------------ r1800 | bouvierc | 2012-03-09 15:21:35 +0100 (Fri, 09 Mar 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/Makefile Dependencies in Makefile for GPU-ECM ------------------------------------------------------------------------ r1799 | zimmerma | 2012-03-09 10:49:47 +0100 (Fri, 09 Mar 2012) | 6 lines Changed paths: M /trunk/README.dev M /trunk/batch.c M /trunk/configure.in M /trunk/ecm-impl.h M /trunk/mpmod.c fixed various problems with ecm-6.4.1-rc1 reported by David Cleaver (http://lists.gforge.inria.fr/pipermail/ecm-discuss/2012-March/004144.html): * replaced unsigned long by mp_limb_t in batch=1 (under Windows, unsigned long has 32 bits only) * configure.in: added -lm for mathematical functions ------------------------------------------------------------------------ r1798 | zimmerma | 2012-03-08 13:11:00 +0100 (Thu, 08 Mar 2012) | 2 lines Changed paths: M /trunk/Makefile.am [Makefile.am] GMPLIB is not needed for libecm ------------------------------------------------------------------------ r1797 | zimmerma | 2012-03-08 10:11:52 +0100 (Thu, 08 Mar 2012) | 2 lines Changed paths: M /trunk/sets_long.c [sets_long.c] added comments ------------------------------------------------------------------------ r1796 | zimmerma | 2012-03-08 08:40:29 +0100 (Thu, 08 Mar 2012) | 2 lines Changed paths: M /trunk/Makefile.am [Makefile.am] forgot ecm-params.h.corei5 ------------------------------------------------------------------------ r1795 | zimmerma | 2012-03-07 16:41:21 +0100 (Wed, 07 Mar 2012) | 2 lines Changed paths: M /trunk/NEWS [NEWS] fixed typo ------------------------------------------------------------------------ r1794 | zimmerma | 2012-03-07 16:36:37 +0100 (Wed, 07 Mar 2012) | 7 lines Changed paths: M /trunk/ChangeLog M /trunk/INSTALL-ecm M /trunk/NEWS M /trunk/README.dev M /trunk/bench_mulredc.c M /trunk/ecmbench [bench_mulredc.c] fix for Svoboda [NEWS] updated for 6.4.1 [ChangeLog] idem [INSTALL-ecm] ibidem [ecmbench] rm -> rm -f [README.dev] updated ------------------------------------------------------------------------ r1793 | bouvierc | 2012-03-07 15:12:28 +0100 (Wed, 07 Mar 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/Makefile Some changes in the Makefile for GPU-ECM ------------------------------------------------------------------------ r1792 | zimmerma | 2012-03-07 15:11:10 +0100 (Wed, 07 Mar 2012) | 2 lines Changed paths: M /trunk/ecm-params.h.powerpc970 [ecm-params.h.powerpc970] removed exec flag ------------------------------------------------------------------------ r1791 | zimmerma | 2012-03-07 15:02:14 +0100 (Wed, 07 Mar 2012) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/mpmod.c [ecm-impl.h,mpmod.c] removed mult_modulus (was always equal to orig_modulus) ------------------------------------------------------------------------ r1790 | zimmerma | 2012-03-07 14:22:50 +0100 (Wed, 07 Mar 2012) | 2 lines Changed paths: M /trunk/pm1fs2.c [pm1fs2.c] changed assert to avoid compiler warning with clang 2.9 ------------------------------------------------------------------------ r1789 | zimmerma | 2012-03-07 13:54:25 +0100 (Wed, 07 Mar 2012) | 2 lines Changed paths: M /trunk/configure.in [configure.in] added missing build.vc10/bench_mulredc/Makefile ------------------------------------------------------------------------ r1788 | zimmerma | 2012-03-07 13:46:16 +0100 (Wed, 07 Mar 2012) | 3 lines Changed paths: M /trunk/README.dev M /trunk/build.vc10/config.h M /trunk/configure.in M /trunk/ecm-gmp.h change version to 6.4.1-rc1 switched assertions to off for the release candidate ------------------------------------------------------------------------ r1787 | zimmerma | 2012-03-07 13:13:20 +0100 (Wed, 07 Mar 2012) | 2 lines Changed paths: M /trunk/listz.c M /trunk/stage2.c [listz.c] cleanup, and removed some dead code in #if 0 ... #endif ------------------------------------------------------------------------ r1786 | zimmerma | 2012-03-07 10:40:51 +0100 (Wed, 07 Mar 2012) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/ecm_ntt.c M /trunk/listz.c fix NEGATED_ROOTS=1 once for all ------------------------------------------------------------------------ r1785 | zimmerma | 2012-03-07 10:12:57 +0100 (Wed, 07 Mar 2012) | 2 lines Changed paths: M /trunk/main.c [main.c] -treefile is valid for ECM only ------------------------------------------------------------------------ r1784 | zimmerma | 2012-03-07 10:01:19 +0100 (Wed, 07 Mar 2012) | 2 lines Changed paths: M /trunk/test.ecm [test.ecm] added test with -treefile ------------------------------------------------------------------------ r1783 | zimmerma | 2012-03-05 08:56:02 +0100 (Mon, 05 Mar 2012) | 3 lines Changed paths: M /trunk/ecm.c [ecm.c] with -v -v, print A=... and x0=... for coherence with command line options ------------------------------------------------------------------------ r1782 | zimmerma | 2012-03-02 17:27:57 +0100 (Fri, 02 Mar 2012) | 3 lines Changed paths: M /trunk/README.dev M /trunk/TODO [README.dev] added check of -treefile before a release [TODO] at some point we should raise sigma to 64 bits ------------------------------------------------------------------------ r1781 | bouvierc | 2012-03-01 17:43:16 +0100 (Thu, 01 Mar 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/cudakernel.cu M /trunk/gpu/gpu_ecm/main.c Better measurement of actual running time. ------------------------------------------------------------------------ r1779 | bouvierc | 2012-03-01 13:44:35 +0100 (Thu, 01 Mar 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/check.sh M /trunk/gpu/gpu_ecm/cudakernel.cu M /trunk/gpu/gpu_ecm/main.c Don't do modular reduction anymore in GPU arithmetic. ------------------------------------------------------------------------ r1778 | bouvierc | 2012-02-29 15:50:23 +0100 (Wed, 29 Feb 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/cudakernel.cu M /trunk/gpu/gpu_ecm/main.c Prepare the code to use Montgomery-Svoboda algorithm for REDC ------------------------------------------------------------------------ r1777 | bouvierc | 2012-02-29 15:49:02 +0100 (Wed, 29 Feb 2012) | 2 lines Changed paths: M /trunk/batch.c Add an include when linked for GPU-ECM ------------------------------------------------------------------------ r1775 | bouvierc | 2012-02-28 15:50:25 +0100 (Tue, 28 Feb 2012) | 2 lines Changed paths: M /trunk/batch.c M /trunk/ecm-ecm.h M /trunk/ecm-impl.h M /trunk/main.c Batch mode: allow to save and load s from a file. ------------------------------------------------------------------------ r1774 | bouvierc | 2012-02-27 11:45:40 +0100 (Mon, 27 Feb 2012) | 2 lines Changed paths: M /trunk/test.ecm Replace ecm and ./ecm by $ECM in two lines of the test file. ------------------------------------------------------------------------ r1773 | brian_gladman | 2012-02-24 16:47:31 +0100 (Fri, 24 Feb 2012) | 1 line Changed paths: M /trunk/build.vc10/ecm/ecm.vcxproj M /trunk/build.vc10/ecm/ecm.vcxproj.filters ------------------------------------------------------------------------ r1772 | brian_gladman | 2012-02-24 16:43:18 +0100 (Fri, 24 Feb 2012) | 1 line Changed paths: M /trunk/build.vc10/config.h M /trunk/build.vc10/gpu_ecm/gpu_ecm.vcxproj M /trunk/build.vc10/libecm/libecm.vcxproj M /trunk/build.vc10/libecm/libecm.vcxproj.filters Update the gpu_ecm build for Windows ------------------------------------------------------------------------ r1771 | bouvierc | 2012-02-24 15:15:31 +0100 (Fri, 24 Feb 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/cudakernel.cu M /trunk/gpu/gpu_ecm/cudakernel.h Make the CPU code handling the GPU clearer. ------------------------------------------------------------------------ r1770 | bouvierc | 2012-02-24 13:58:59 +0100 (Fri, 24 Feb 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/main.c M /trunk/gpu/gpu_ecm/utils.c M /trunk/gpu/gpu_ecm/utils.h Minor change in main.c. Add some comments. ------------------------------------------------------------------------ r1769 | bouvierc | 2012-02-24 13:43:26 +0100 (Fri, 24 Feb 2012) | 2 lines Changed paths: M /trunk/batch.c Fix a problem reported by Brian Gladman. ------------------------------------------------------------------------ r1768 | bouvierc | 2012-02-24 11:24:31 +0100 (Fri, 24 Feb 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/main.c M /trunk/gpu/gpu_ecm/utils.c M /trunk/gpu/gpu_ecm/utils.h All factors found and cofactors are printed the same way. ------------------------------------------------------------------------ r1767 | bouvierc | 2012-02-24 11:02:51 +0100 (Fri, 24 Feb 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/main.c Use the struct mpcandi_t. ------------------------------------------------------------------------ r1766 | bouvierc | 2012-02-24 10:45:26 +0100 (Fri, 24 Feb 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/Makefile M /trunk/gpu/gpu_ecm/utils.c M /trunk/resume.c Write the right program name and right version in resume file. ------------------------------------------------------------------------ r1765 | bouvierc | 2012-02-24 10:32:36 +0100 (Fri, 24 Feb 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/cudakernel.cu M /trunk/gpu/gpu_ecm/main.c M /trunk/gpu/gpu_ecm/utils.c M /trunk/gpu/gpu_ecm/utils.h More clear variables' name. More comment in main.c ------------------------------------------------------------------------ r1764 | bouvierc | 2012-02-24 09:47:36 +0100 (Fri, 24 Feb 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/Makefile M /trunk/gpu/gpu_ecm/main.c M /trunk/gpu/gpu_ecm/utils.c M /trunk/gpu/gpu_ecm/utils.h Use write_resumefile_line from GMP-ECM. ------------------------------------------------------------------------ r1763 | zimmerma | 2012-02-24 08:19:43 +0100 (Fri, 24 Feb 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/check.sh [check.sh] translated error message in english ------------------------------------------------------------------------ r1762 | bouvierc | 2012-02-23 18:19:07 +0100 (Thu, 23 Feb 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/Makefile M /trunk/gpu/gpu_ecm/README M /trunk/gpu/gpu_ecm/main.c M /trunk/gpu/gpu_ecm/utils.c M /trunk/gpu/gpu_ecm/utils.h Using more functions from GMP-ECM. ------------------------------------------------------------------------ r1761 | bouvierc | 2012-02-23 18:18:26 +0100 (Thu, 23 Feb 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/check.sh Keep temp files when an error occurs in check.sh. ------------------------------------------------------------------------ r1760 | bouvierc | 2012-02-23 17:55:03 +0100 (Thu, 23 Feb 2012) | 2 lines Changed paths: M /trunk/ecm-ecm.h Delete duplicate prototypes. ------------------------------------------------------------------------ r1759 | bouvierc | 2012-02-23 17:11:10 +0100 (Thu, 23 Feb 2012) | 2 lines Changed paths: M /trunk/auxi.c M /trunk/ecm-ecm.h M /trunk/main.c Move some functions out of main.c in order to use them with GPU-ECM. ------------------------------------------------------------------------ r1758 | bouvierc | 2012-02-23 16:23:58 +0100 (Thu, 23 Feb 2012) | 2 lines Changed paths: M /trunk/batch.c M /trunk/gpu/gpu_ecm/Makefile M /trunk/gpu/gpu_ecm/def.h M /trunk/gpu/gpu_ecm/main.c M /trunk/gpu/gpu_ecm/utils.c M /trunk/gpu/gpu_ecm/utils.h Start to use GMP-ECM functions in GPU-ECM when they exist. ------------------------------------------------------------------------ r1757 | bouvierc | 2012-02-23 14:54:11 +0100 (Thu, 23 Feb 2012) | 4 lines Changed paths: M /trunk/gpu/gpu_ecm/Makefile A /trunk/gpu/gpu_ecm/README.dev D /trunk/gpu/gpu_ecm/cudaarith.cu D /trunk/gpu/gpu_ecm/cudaarith.h A /trunk/gpu/gpu_ecm/cudakernel.cu (from /trunk/gpu/gpu_ecm/cudautils.cu:1756) A /trunk/gpu/gpu_ecm/cudakernel.h (from /trunk/gpu/gpu_ecm/cudautils.h:1756) D /trunk/gpu/gpu_ecm/cudautils.cu D /trunk/gpu/gpu_ecm/cudautils.h A /trunk/gpu/gpu_ecm/def.h (from /trunk/gpu/gpu_ecm/main.h:1756) A /trunk/gpu/gpu_ecm/main.c (from /trunk/gpu/gpu_ecm/main.cu:1756) D /trunk/gpu/gpu_ecm/main.cu D /trunk/gpu/gpu_ecm/main.h A /trunk/gpu/gpu_ecm/utils.c (from /trunk/gpu/gpu_ecm/utils.cu:1756) D /trunk/gpu/gpu_ecm/utils.cu M /trunk/gpu/gpu_ecm/utils.h Reorganization of the code. C code is put in C files compiled with gcc and Cuda code is put in .cu files and compiled with nvcc. ------------------------------------------------------------------------ r1756 | bouvierc | 2012-02-22 21:18:28 +0100 (Wed, 22 Feb 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/utils.cu Fix a bug reported by Brian Gladman. ------------------------------------------------------------------------ r1755 | bouvierc | 2012-02-22 16:12:37 +0100 (Wed, 22 Feb 2012) | 5 lines Changed paths: M /trunk/ecm.c M /trunk/main.c M /trunk/test.ecm [ecm.c] Fix a problem for batch mode 2 when the value obtained from the parametrization is printed [main.c] Print which batch mode is used. [test.ecm] Add a test for batch mode 2. ------------------------------------------------------------------------ r1754 | bouvierc | 2012-02-22 16:09:58 +0100 (Wed, 22 Feb 2012) | 2 lines Changed paths: D /trunk/gpu/gpu_ecm/obj D /trunk/gpu/gpu_ecm/test.sh Delete useless files ------------------------------------------------------------------------ r1753 | bouvierc | 2012-02-22 16:07:10 +0100 (Wed, 22 Feb 2012) | 4 lines Changed paths: M /trunk/gpu/gpu_ecm/Makefile A /trunk/gpu/gpu_ecm/README M /trunk/gpu/gpu_ecm/check.sh M /trunk/gpu/gpu_ecm/main.h [Makefile] A more-easy-to-use Makefile [README] README explaining how to compile and execute GPU-ECM and how to use check.sh ------------------------------------------------------------------------ r1752 | bouvierc | 2012-02-22 12:04:26 +0100 (Wed, 22 Feb 2012) | 2 lines Changed paths: M /trunk/batch.c fix a bug in batch mode 2 ------------------------------------------------------------------------ r1751 | bouvierc | 2012-02-22 11:42:57 +0100 (Wed, 22 Feb 2012) | 2 lines Changed paths: M /trunk/batch.c M /trunk/ecm.c Fix batch mode 1 which didn't pass all the test because of previous commits. ------------------------------------------------------------------------ r1750 | bouvierc | 2012-02-22 10:21:27 +0100 (Wed, 22 Feb 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/main.cu M /trunk/gpu/gpu_ecm/utils.cu Fix some bugs for 32-bits machines. ------------------------------------------------------------------------ r1749 | bouvierc | 2012-02-21 18:25:00 +0100 (Tue, 21 Feb 2012) | 4 lines Changed paths: M /trunk/batch.c D /trunk/batchmode.h M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/main.c Now the choice of the which batch mode is used is not done by a #define. It can be passed as a argument: -batch[=1|2], with -batch being equivalent to -batch=1 ------------------------------------------------------------------------ r1748 | bouvierc | 2012-02-21 17:46:23 +0100 (Tue, 21 Feb 2012) | 2 lines Changed paths: A /trunk/batchmode.h A /trunk/ellparam_batch.c Forgot to add 2 new files in the last commit. ------------------------------------------------------------------------ r1747 | bouvierc | 2012-02-21 17:45:28 +0100 (Tue, 21 Feb 2012) | 4 lines Changed paths: M /trunk/Makefile.am M /trunk/batch.c M /trunk/ecm.c M /trunk/main.c Add elliptic parametrization for batch mode 2 Move the choice of A for batch mode in ecm.c (as the choice of sigma for Suyama) ------------------------------------------------------------------------ r1746 | brian_gladman | 2012-02-16 18:40:14 +0100 (Thu, 16 Feb 2012) | 1 line Changed paths: M /trunk/gpu/gpu_ecm/main.cu set IDLE priority for GPU application on Windows ------------------------------------------------------------------------ r1745 | brian_gladman | 2012-02-16 17:31:39 +0100 (Thu, 16 Feb 2012) | 1 line Changed paths: A /trunk/build.vc10/readme_gpu.txt add short readme for the Windows GPU build ------------------------------------------------------------------------ r1744 | brian_gladman | 2012-02-16 17:18:53 +0100 (Thu, 16 Feb 2012) | 1 line Changed paths: A /trunk/build.vc10/ecm.cuda.sln Add Visual Studio build for the GPU code ------------------------------------------------------------------------ r1743 | brian_gladman | 2012-02-16 17:15:45 +0100 (Thu, 16 Feb 2012) | 1 line Changed paths: M /trunk/build.vc10/getrusage.h A /trunk/build.vc10/gpu_ecm A /trunk/build.vc10/gpu_ecm/gpu_ecm.vcxproj M /trunk/gpu/gpu_ecm/main.cu M /trunk/gpu/gpu_ecm/main.h M /trunk/gpu/gpu_ecm/utils.cu M /trunk/gpu/gpu_ecm/utils.h M /trunk/gpu/modular_arithmetic.c ------------------------------------------------------------------------ r1742 | brian_gladman | 2012-02-13 22:33:07 +0100 (Mon, 13 Feb 2012) | 1 line Changed paths: M /trunk/build.vc10/config.h correct conflicted config.h ------------------------------------------------------------------------ r1741 | brian_gladman | 2012-02-13 22:06:19 +0100 (Mon, 13 Feb 2012) | 1 line Changed paths: M /trunk/bench_mulredc.c M /trunk/build.vc10/Makefile.am M /trunk/build.vc10/assembler/Makefile.am A /trunk/build.vc10/assembler/mulredc.asm M /trunk/build.vc10/assembler/mulredc.h A /trunk/build.vc10/assembler/redc.asm A /trunk/build.vc10/bench_mulredc A /trunk/build.vc10/bench_mulredc/Makefile.am A /trunk/build.vc10/bench_mulredc/bench_mulredc.vcxproj A /trunk/build.vc10/bench_mulredc/bench_mulredc.vcxproj.filters A /trunk/build.vc10/bench_mulredc/bench_mulredc.vcxproj.user M /trunk/build.vc10/config.h M /trunk/build.vc10/ecm/ecm.vcxproj M /trunk/build.vc10/ecm.sln A /trunk/build.vc10/getrusage.c A /trunk/build.vc10/getrusage.h M /trunk/build.vc10/libecm/libecm.vcxproj M /trunk/build.vc10/libecm/libecm.vcxproj.filters M /trunk/build.vc10/tune/tune.vcxproj M /trunk/build.vc10/tune/tune.vcxproj.filters Revamp the Windows Visual Studio 2010 build ------------------------------------------------------------------------ r1738 | zimmerma | 2012-02-12 23:46:20 +0100 (Sun, 12 Feb 2012) | 3 lines Changed paths: M /trunk/bench_mulredc.c [bench_mulredc.c] added some Svoboda code (not tested so far) and avoid some warnings if mpn_redc_2 and mpn_redc_n are not defined ------------------------------------------------------------------------ r1737 | dcleaver | 2012-02-12 23:24:37 +0100 (Sun, 12 Feb 2012) | 1 line Changed paths: M /trunk/Fgw.c M /trunk/test.ecm Clean up comments and make ecm test deterministic ------------------------------------------------------------------------ r1736 | zimmerma | 2012-02-12 23:05:25 +0100 (Sun, 12 Feb 2012) | 2 lines Changed paths: M /trunk/mpmod.c [mpmod.c] fixed misplaced #ifdef, added comments, MPN_COPY -> mpn_copyi ------------------------------------------------------------------------ r1735 | dcleaver | 2012-02-12 21:50:37 +0100 (Sun, 12 Feb 2012) | 1 line Changed paths: M /trunk/test.ecm Added/fixed test to exercise r1734 patch ------------------------------------------------------------------------ r1734 | dcleaver | 2012-02-12 21:44:33 +0100 (Sun, 12 Feb 2012) | 1 line Changed paths: M /trunk/Fgw.c M /trunk/test.ecm Patched Fgw.c to allocate adequate memory for special inputs ------------------------------------------------------------------------ r1733 | zimmerma | 2012-02-12 20:15:50 +0100 (Sun, 12 Feb 2012) | 2 lines Changed paths: M /trunk/bench_mulredc.c M /trunk/configure.in M /trunk/mpmod.h also consider mpn_redc_n in bench_mulredc (not yet used in mpmod.c) ------------------------------------------------------------------------ r1732 | brian_gladman | 2012-02-12 19:54:10 +0100 (Sun, 12 Feb 2012) | 1 line Changed paths: M /trunk/build.vc10/assembler/mulredc.h M /trunk/build.vc10/libecm/libecm.vcxproj M /trunk/build.vc10/tune/tune.vcxproj Make use of assembler code the default for Windows ------------------------------------------------------------------------ r1731 | zimmerma | 2012-02-12 17:34:08 +0100 (Sun, 12 Feb 2012) | 2 lines Changed paths: M /trunk/mpmod.c [mpmod.c] fixed problem reported by David Cleaver on ecm-discuss ------------------------------------------------------------------------ r1730 | zimmerma | 2012-02-12 17:23:22 +0100 (Sun, 12 Feb 2012) | 5 lines Changed paths: M /trunk/bench_mulredc.c M /trunk/configure.in M /trunk/ecm-params.h.core2 M /trunk/mpmod.c [configure.in] bump minimal GMP version to 5.0.0 [bench_mulredc.c,mpmod.c] since mpn_sqr is defined in GMP >= 5, no need to test it [ecm-params.h.core2] updated ------------------------------------------------------------------------ r1729 | zimmerma | 2012-02-12 16:12:25 +0100 (Sun, 12 Feb 2012) | 2 lines Changed paths: M /trunk/build.vc10/assembler/Makefile.am [assembler/Makefile.am] added missing mulredc.h ------------------------------------------------------------------------ r1726 | zimmerma | 2012-02-12 12:03:54 +0100 (Sun, 12 Feb 2012) | 2 lines Changed paths: M /trunk/Makefile.am M /trunk/bench_mulredc.c M /trunk/mpmod.c A /trunk/mpmod.h define macros for different choices of redc modular arithmetic ------------------------------------------------------------------------ r1725 | zimmerma | 2012-02-12 08:35:20 +0100 (Sun, 12 Feb 2012) | 2 lines Changed paths: M /trunk/build.vc10/Makefile.am [build.vc10/Makefile.am] removed non-existent file ecm-params.h.x64.core2 ------------------------------------------------------------------------ r1724 | brian_gladman | 2012-02-12 00:07:15 +0100 (Sun, 12 Feb 2012) | 1 line Changed paths: M /trunk/build.vc10/Makefile.am add distributed in build.vc10 to files to makefile.am ------------------------------------------------------------------------ r1723 | zimmerma | 2012-02-11 14:37:39 +0100 (Sat, 11 Feb 2012) | 15 lines Changed paths: M /trunk/README M /trunk/bench_mulredc.c M /trunk/ecm-impl.h M /trunk/ecm-params.h.alpha-ev56 M /trunk/ecm-params.h.armv5tel M /trunk/ecm-params.h.athlon64 M /trunk/ecm-params.h.core2 M /trunk/ecm-params.h.corei5 M /trunk/ecm-params.h.hppa2.0 M /trunk/ecm-params.h.ia64 M /trunk/ecm-params.h.mips64el M /trunk/ecm-params.h.pentium-m M /trunk/ecm-params.h.pentium4 M /trunk/ecm-params.h.powerpc970 M /trunk/ecm-params.h.sparc64 M /trunk/main.c M /trunk/mpmod.c M /trunk/test.pm1 M /trunk/tune.c Complete rewrite of the tuning mechanism for mulredc and sqrredc: instead of having a simple threshold, we have a full table for each one up to 20 limbs, where each entry for n limbs is an integer saying which function(s) should be used for the modular multiplication or squaring. Those tables are computed by bench_mulredc. On processors where assembly redc is available, I suggest we always have --enable-asm-redc, since the best routine will be chosen by bench_mulredc. On processors where assembly redc is not available, I suggest we still can use/compile bench_mulredc to choose the best routines. Ultimately bench_mulredc should be incorporated into "make tune". ------------------------------------------------------------------------ r1722 | brian_gladman | 2012-02-11 12:09:52 +0100 (Sat, 11 Feb 2012) | 1 line Changed paths: M /trunk/build.vc10/ecm/Makefile.am M /trunk/build.vc10/libecm/Makefile.am M /trunk/build.vc10/tune/Makefile.am Add IDE filters to the GMP-ECM distrubution ------------------------------------------------------------------------ r1721 | zimmerma | 2012-02-11 09:45:15 +0100 (Sat, 11 Feb 2012) | 2 lines Changed paths: M /trunk/build.vc10/Makefile.am A /trunk/build.vc10/tune/Makefile.am M /trunk/configure.in another try to fix missing vc10 files ------------------------------------------------------------------------ r1720 | brian_gladman | 2012-02-10 21:40:28 +0100 (Fri, 10 Feb 2012) | 1 line Changed paths: M /trunk/build.vc10/ecm/ecm.vcxproj M /trunk/build.vc10/tests.py another Windows build correction ------------------------------------------------------------------------ r1719 | zimmerma | 2012-02-10 21:38:19 +0100 (Fri, 10 Feb 2012) | 3 lines Changed paths: M /trunk/bench_mulredc.c M /trunk/x86_64/Makefile.am M /trunk/x86_64/mulredc.h [x86_64] enable back redc3 code (can be useful for modular squaring) [bench_mulredc.c] print results of tuning at the end ------------------------------------------------------------------------ r1718 | zimmerma | 2012-02-10 20:52:57 +0100 (Fri, 10 Feb 2012) | 2 lines Changed paths: M /trunk/Makefile.am A /trunk/build.vc10/Makefile.am A /trunk/build.vc10/assembler/Makefile.am A /trunk/build.vc10/ecm/Makefile.am A /trunk/build.vc10/libecm/Makefile.am M /trunk/configure.in put in "make dist" missing build.vc10 files (to be checked) ------------------------------------------------------------------------ r1717 | zimmerma | 2012-02-10 20:12:15 +0100 (Fri, 10 Feb 2012) | 3 lines Changed paths: M /trunk/ecm.c [ecm.c] for the batch mode and A=4d-2 with d "random", the torsion smoothness multiplier is 1/(3*3^(1/128)) = 0.330... and not 1/3 ! ------------------------------------------------------------------------ r1716 | brian_gladman | 2012-02-10 13:32:53 +0100 (Fri, 10 Feb 2012) | 1 line Changed paths: M /trunk/build.vc10/libecm/libecm.vcxproj minor windows build change ------------------------------------------------------------------------ r1715 | brian_gladman | 2012-02-10 10:49:38 +0100 (Fri, 10 Feb 2012) | 1 line Changed paths: M /trunk/build.vc10/ecm-params.h M /trunk/build.vc10/ecm-params.h.win32.amd M /trunk/build.vc10/ecm-params.h.win32.intel M /trunk/build.vc10/ecm-params.h.x64.amd M /trunk/build.vc10/ecm-params.h.x64.intel M /trunk/build.vc10/tune/tune.vcxproj M /trunk/build.vc10/tune/tune.vcxproj.filters correct windows tuning errors ------------------------------------------------------------------------ r1714 | zimmerma | 2012-02-09 12:42:54 +0100 (Thu, 09 Feb 2012) | 2 lines Changed paths: M /trunk/bench_mulredc.c [bench_mulredc.c] updated to measure more low-level functions ------------------------------------------------------------------------ r1713 | zimmerma | 2012-02-08 12:29:36 +0100 (Wed, 08 Feb 2012) | 3 lines Changed paths: M /trunk/ecm.c [ecm.c] corrected the value of BATCH_EXTRA_SMOOTHNESS for GMP_NUMB_BITS >= 64, where we use A=4d-2 with d a square ------------------------------------------------------------------------ r1712 | zimmerma | 2012-02-07 22:24:56 +0100 (Tue, 07 Feb 2012) | 4 lines Changed paths: M /trunk/mpmod.c [mpmod.c] new function sqrredc in C (not used yet because slower than the assembly mulredc) use redc_basecase_n in mpresn_sqr and mpresn_mul: small speedup ------------------------------------------------------------------------ r1711 | zimmerma | 2012-02-06 17:15:47 +0100 (Mon, 06 Feb 2012) | 3 lines Changed paths: M /trunk/mpmod.c [mpmod.c] use TUNE_SQRREDC_THRESH and TUNE_MULREDC_THRESH in the mpresn_* functions ------------------------------------------------------------------------ r1710 | zimmerma | 2012-02-06 16:10:32 +0100 (Mon, 06 Feb 2012) | 3 lines Changed paths: M /trunk/README.dev M /trunk/ecmbench [ecmbench] use $1 to allow testing different versions [README.dev] added item for efficiency non-regression ------------------------------------------------------------------------ r1708 | zimmerma | 2012-02-06 15:58:01 +0100 (Mon, 06 Feb 2012) | 2 lines Changed paths: M /trunk/ecm.c M /trunk/ecm2.c M /trunk/lucas.c M /trunk/mpmod.c M /trunk/pm1fs2.c M /trunk/pp1.c M /trunk/tune.c replaced all occurrences of mpres_mul (a, x, x, m) by mpres_sqr (a, x, m) ------------------------------------------------------------------------ r1706 | zimmerma | 2012-02-06 14:34:35 +0100 (Mon, 06 Feb 2012) | 7 lines Changed paths: M /trunk/NEWS M /trunk/README.dev M /trunk/configure.in M /trunk/ecm-impl.h A /trunk/ecm-params.h.corei5 A /trunk/ecmbench M /trunk/mpmod.c M /trunk/tune.c [tune.c] cleaned up comptutation of TUNE_MULREDC_THRESH and TUNE_SQRREDC_THRESH [ecm-params.h.corei5] new parameters for Core i5 [configure.in] now recognize Core i5 [ecmbench] new bench utility [ecm-impl.h] new macro MULREDC_ASSEMBLY_MAX [mpmod.c] use MULREDC_ASSEMBLY_MAX ------------------------------------------------------------------------ r1703 | zimmerma | 2012-02-04 09:47:09 +0100 (Sat, 04 Feb 2012) | 2 lines Changed paths: M /trunk/build.vc10/config.h M /trunk/configure.in switch version to 6.5-dev, and assertions on by default ------------------------------------------------------------------------ r1701 | zimmerma | 2012-02-03 20:59:21 +0100 (Fri, 03 Feb 2012) | 2 lines Changed paths: M /trunk/batch.c M /trunk/mpmod.c make new batch-mode code also work with --disable-asm-redc ------------------------------------------------------------------------ r1700 | zimmerma | 2012-02-03 17:14:22 +0100 (Fri, 03 Feb 2012) | 6 lines Changed paths: M /trunk/batch.c M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/mpmod.c M /trunk/pm1fs2.c [pm1fs2.c] renamed mpmod_copy into mpmod_init_set [batch.c] use new function mpresn_pad [ecm-impl.h] added mult_modulus (currrently initialized to N) [mpmod.c] simplified the mpresn_* functions (assuming repr=ECM_MOD_MODMULN) [ecm.c] in batch mode, force repr=ECM_MOD_MODMULN ------------------------------------------------------------------------ r1697 | zimmerma | 2012-02-03 10:07:59 +0100 (Fri, 03 Feb 2012) | 4 lines Changed paths: M /trunk/batch.c M /trunk/ecm-impl.h M /trunk/mpmod.c [ecm-impl.h] added missing prototypes [batch.c] removed debug printf() statements [mpmod.c] fixed compiler warnings ------------------------------------------------------------------------ r1696 | zimmerma | 2012-02-02 20:08:19 +0100 (Thu, 02 Feb 2012) | 2 lines Changed paths: M /trunk/NEWS [NEWS] added --enable-mulredc-svodoba ------------------------------------------------------------------------ r1695 | bouvierc | 2012-02-02 15:57:09 +0100 (Thu, 02 Feb 2012) | 4 lines Changed paths: M /trunk/batch.c M /trunk/mpmod.c Replace mpz operations by mpn operations in dup_add in batch.c For now only mul, sqr, add and sub have been modified. Reduction have not changed. ------------------------------------------------------------------------ r1694 | kruppa | 2012-02-02 15:52:19 +0100 (Thu, 02 Feb 2012) | 2 lines Changed paths: M /trunk/configure.in M /trunk/x86_64/mulredc.m4 Support for Svoboda mulredc in x86_64/. Matching support in mpmod.c TBD ------------------------------------------------------------------------ r1693 | zimmerma | 2012-01-30 17:58:24 +0100 (Mon, 30 Jan 2012) | 2 lines Changed paths: M /trunk/getprime.c [getprime.c] fixed typo in commented printf() ------------------------------------------------------------------------ r1690 | kruppa | 2012-01-16 16:00:54 +0100 (Mon, 16 Jan 2012) | 2 lines Changed paths: M /trunk/mpzspv.c Slight cleanup ------------------------------------------------------------------------ r1684 | zimmerma | 2012-01-09 14:12:38 +0100 (Mon, 09 Jan 2012) | 2 lines Changed paths: M /trunk/mpmod.c [mpmod.c] perform lazy reduction in REDC when N < B^n/4 ------------------------------------------------------------------------ r1678 | zimmerma | 2012-01-07 16:45:28 +0100 (Sat, 07 Jan 2012) | 2 lines Changed paths: M /trunk/INSTALL-ecm M /trunk/Makefile.am M /trunk/configure.in removed leftover references to build.vc9 ------------------------------------------------------------------------ r1677 | zimmerma | 2012-01-06 18:32:22 +0100 (Fri, 06 Jan 2012) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/Makefile M /trunk/gpu/gpu_ecm_cc13/Makefile cleaned up Makefiles ------------------------------------------------------------------------ r1676 | brian_gladman | 2012-01-04 18:14:22 +0100 (Wed, 04 Jan 2012) | 2 lines Changed paths: M /trunk D /trunk/build.vc9 Remove some errors from the Visual Studio readme.txt filesremove unmaintained Visual Studio 2008 build files ------------------------------------------------------------------------ r1675 | zimmerma | 2012-01-04 09:53:53 +0100 (Wed, 04 Jan 2012) | 2 lines Changed paths: M /trunk/main.c [main.c] fixed warning on 32-bit processor ------------------------------------------------------------------------ r1674 | zimmerma | 2012-01-04 00:44:48 +0100 (Wed, 04 Jan 2012) | 2 lines Changed paths: M /trunk/README.dev [README.dev] added tag for 6.4 ------------------------------------------------------------------------ r1672 | zimmerma | 2012-01-04 00:42:50 +0100 (Wed, 04 Jan 2012) | 2 lines Changed paths: M /trunk/ChangeLog [ChangeLog] updated, this is the GMP-ECM 6.4 release ------------------------------------------------------------------------ r1671 | brian_gladman | 2012-01-04 00:39:25 +0100 (Wed, 04 Jan 2012) | 1 line Changed paths: M /trunk/build.vc10/readme.txt M /trunk/build.vc9/readme.txt Remove some errors from the Visual Studio readme.txt files ------------------------------------------------------------------------ r1670 | zimmerma | 2012-01-04 00:38:05 +0100 (Wed, 04 Jan 2012) | 2 lines Changed paths: A /trunk/INSTALL-ecm (from /trunk/INSTALL:1662) [INSTALL-ecm] specific INSTALL file for GMP-ECM ------------------------------------------------------------------------ r1669 | zimmerma | 2012-01-04 00:34:36 +0100 (Wed, 04 Jan 2012) | 2 lines Changed paths: M /trunk/mpmod.c M /trunk/resume.c removed compiler warnings found with gcc 4.6.1 ------------------------------------------------------------------------ r1668 | zimmerma | 2012-01-04 00:26:26 +0100 (Wed, 04 Jan 2012) | 2 lines Changed paths: M /trunk/configure.in [configure.in] added code to check for Pentium 4 ------------------------------------------------------------------------ r1667 | zimmerma | 2012-01-04 00:24:49 +0100 (Wed, 04 Jan 2012) | 2 lines Changed paths: D /trunk/INSTALL [INSTALL] removed from svn, since it is now autogenerated by the autotools ------------------------------------------------------------------------ r1666 | zimmerma | 2012-01-04 00:24:01 +0100 (Wed, 04 Jan 2012) | 2 lines Changed paths: M /trunk/ChangeLog [ChangeLog] updated for 6.4 ------------------------------------------------------------------------ r1665 | zimmerma | 2012-01-04 00:21:36 +0100 (Wed, 04 Jan 2012) | 2 lines Changed paths: M /trunk/TODO [TODO] removed several done items ------------------------------------------------------------------------ r1664 | zimmerma | 2012-01-04 00:21:13 +0100 (Wed, 04 Jan 2012) | 2 lines Changed paths: M /trunk/README [README] added sample example ------------------------------------------------------------------------ r1663 | zimmerma | 2012-01-04 00:20:54 +0100 (Wed, 04 Jan 2012) | 2 lines Changed paths: M /trunk/Makefile.am [Makefile.am] added INSTALL-ecm (INSTALL is now the generic GNU package file) ------------------------------------------------------------------------ r1662 | zimmerma | 2012-01-04 00:13:33 +0100 (Wed, 04 Jan 2012) | 2 lines Changed paths: M /trunk/NEWS [NEWS] added one item ------------------------------------------------------------------------ r1661 | zimmerma | 2012-01-04 00:13:06 +0100 (Wed, 04 Jan 2012) | 3 lines Changed paths: M /trunk/ecm-gmp.h M /trunk/mpmod.c M /trunk/schoen_strass.c use of mpn_mullo_n when available check if _mpz_realloc sets value to 0 ------------------------------------------------------------------------ r1660 | zimmerma | 2012-01-04 00:12:03 +0100 (Wed, 04 Jan 2012) | 3 lines Changed paths: M /trunk/build.vc10/config.h M /trunk/build.vc9/config.h M /trunk/configure.in changed version to 6.4 added check for mpn_mullo_n ------------------------------------------------------------------------ r1659 | zimmerma | 2012-01-04 00:03:08 +0100 (Wed, 04 Jan 2012) | 2 lines Changed paths: M /trunk/build.vc10/readme.txt M /trunk/build.vc9/readme.txt fixed typos ------------------------------------------------------------------------ r1658 | zimmerma | 2012-01-03 23:49:23 +0100 (Tue, 03 Jan 2012) | 2 lines Changed paths: M /trunk/ecm-params.h.pentium4 [ecm-params.h.pentium4] updated for ecm-6.4 ------------------------------------------------------------------------ r1657 | zimmerma | 2012-01-03 23:31:19 +0100 (Tue, 03 Jan 2012) | 3 lines Changed paths: M /trunk/ecm-params.h.athlon64 M /trunk/ecm-params.h.core2 [ecm-params.h.athlon64] updated on a true AMD machine [ecm-params.h.core2] added model name ------------------------------------------------------------------------ r1656 | zimmerma | 2012-01-03 23:25:27 +0100 (Tue, 03 Jan 2012) | 2 lines Changed paths: M /trunk/ecm-params.h.core2 [ecm-params.h.core2] updated for ecm-6.4 ------------------------------------------------------------------------ r1655 | zimmerma | 2012-01-03 23:20:11 +0100 (Tue, 03 Jan 2012) | 2 lines Changed paths: M /trunk/ecm-params.h.athlon64 [ecm-params.h.athlon64] updated for ecm 6.4 ------------------------------------------------------------------------ r1653 | kruppa | 2012-01-03 04:28:57 +0100 (Tue, 03 Jan 2012) | 2 lines Changed paths: M /trunk/mpmod.c nn must be initialised before testing assertions involving nn ------------------------------------------------------------------------ r1647 | zimmerma | 2011-12-30 10:27:49 +0100 (Fri, 30 Dec 2011) | 2 lines Changed paths: M /trunk/INSTALL [INSTALL] modified instructions for GWNUM ------------------------------------------------------------------------ r1644 | zimmerma | 2011-12-27 16:28:42 +0100 (Tue, 27 Dec 2011) | 2 lines Changed paths: M /trunk/AUTHORS M /trunk/README.dev added Cyril Bouvier as author, small fixes in README.dev ------------------------------------------------------------------------ r1643 | zimmerma | 2011-12-27 16:22:31 +0100 (Tue, 27 Dec 2011) | 2 lines Changed paths: M /trunk/ChangeLog M /trunk/INSTALL M /trunk/NEWS M /trunk/README.dev M /trunk/build.vc10/config.h M /trunk/build.vc9/config.h M /trunk/configure.in prepare for the 6.4 release: now 6.4-rc1 ------------------------------------------------------------------------ r1642 | zimmerma | 2011-12-27 15:39:10 +0100 (Tue, 27 Dec 2011) | 3 lines Changed paths: M /trunk/main.c [main.c] on 64-bit processors, take d a square in batch mode, which gives a larger average torsion, and thus a larger success probability ------------------------------------------------------------------------ r1641 | zimmerma | 2011-12-27 12:06:43 +0100 (Tue, 27 Dec 2011) | 13 lines Changed paths: M /trunk/main.c M /trunk/pm1.c work on "bug" reported by Jason Papadopoulos: the choice between NTT and no-NTT code for P-1 was wrong. In fact, since revision 1558, we prefer the variant with the larger transform length, which is sometimes a bad choice. For example with the following resume file the new choice is twice as slow: METHOD=P-1; B1=10000000; N=29799904256775982671863388319999573561548825027149399972531599612392671227006866151136667908641695103422986028076864929902803267437351318167549013218980573566942647077444419419003164546362008247462049; X=0x58bcade9a21209a49e884562ffac2b1dc3041ba75aacb160628223b64bc056cd2212ea489c9dfebe3336df2359ad41cb5ddfa54f7e1ed908cf5b47feed64d7b7daf309751bd9d5aa5848079de14d5590d13be1; CHECKSUM=2706566245; PROGRAM=GMP-ECM 6.3; X0=0xd13920f9; WHO=jasonp@COMPUTER; TIME=Mon Dec 12 07:50:07 2011; Moreover even with -ntt we could not force the NTT code to be used. This patch only fixes that problem: we can now force the NTT code with -ntt for PM1. Apart from the efficiency problem above (still to be solved), there is another problem: in case both NTT and no-NTT are tried, and finally NTT is preferred, the computed B2 value is wrong (it is that of no-NTT). ------------------------------------------------------------------------ r1640 | brian_gladman | 2011-12-20 13:22:32 +0100 (Tue, 20 Dec 2011) | 1 line Changed paths: M /trunk/build.vc10/readme.txt update Visual Studio readme.txt to reflect output directory change ------------------------------------------------------------------------ r1639 | brian_gladman | 2011-12-20 09:03:09 +0100 (Tue, 20 Dec 2011) | 1 line Changed paths: M /trunk/build.vc10/ecm/ecm.vcxproj M /trunk/build.vc10/libecm/libecm.vcxproj M /trunk/build.vc10/mp_lib.props M /trunk/build.vc10/tune/tune.vcxproj move Visual Studio build output directories up one level in the directory tree ------------------------------------------------------------------------ r1638 | kruppa | 2011-12-14 16:07:17 +0100 (Wed, 14 Dec 2011) | 7 lines Changed paths: M /trunk/ecm_ntt.c M /trunk/mpzspv.c M /trunk/pm1fs2.c M /trunk/sp.h Moved some NTT related functions from pm1fs2.c to mpzspv.c Added NTT mul function that does forward transforms, point-wise multiply, and inverse transforms one small-prime vector at a time, which slightly improves memory access locality (also perhaps in preparation for later disk-stored vectors) ------------------------------------------------------------------------ r1637 | brian_gladman | 2011-11-21 22:43:09 +0100 (Mon, 21 Nov 2011) | 1 line Changed paths: M /trunk/build.vc10/tune/tune.vcxproj Add _WIN64 compiler define to the Windows x64 tune build ------------------------------------------------------------------------ r1636 | bouvierc | 2011-11-09 21:59:11 +0100 (Wed, 09 Nov 2011) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/cudaarith.cu M /trunk/gpu/gpu_ecm/main.cu M /trunk/gpu/gpu_ecm/main.h M /trunk/gpu/gpu_ecm/utils.cu Translate comments from french to english in gpu_ecm's code ------------------------------------------------------------------------ r1635 | zimmerma | 2011-11-08 21:15:24 +0100 (Tue, 08 Nov 2011) | 2 lines Changed paths: M /trunk/batch.c M /trunk/mpmod.c removed compiler warnings ------------------------------------------------------------------------ r1634 | zimmerma | 2011-11-08 21:11:47 +0100 (Tue, 08 Nov 2011) | 4 lines Changed paths: M /trunk/athlon/mulredc.h M /trunk/batch.c M /trunk/configure.in M /trunk/ecm-impl.h M /trunk/main.c M /trunk/mpmod.c M /trunk/pentium4/mulredc.h M /trunk/powerpc64/mulredc.h M /trunk/x86_64/mulredc.h changed NATIVE_REDC into USE_ASM_REDC to be coherent with --enable-asm-redc added new mpres_sqr function, to make it easier if/when we implement a faster modular squaring function ------------------------------------------------------------------------ r1633 | bouvierc | 2011-11-04 17:37:31 +0100 (Fri, 04 Nov 2011) | 3 lines Changed paths: A /trunk/gpu/gpu_ecm/check.sh To check the correctness of gpu_ecm use : ./check B1 ------------------------------------------------------------------------ r1632 | bouvierc | 2011-11-04 17:35:53 +0100 (Fri, 04 Nov 2011) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm/Makefile M /trunk/gpu/gpu_ecm/cudaarith.cu M /trunk/gpu/gpu_ecm/cudautils.cu M /trunk/gpu/gpu_ecm/cudautils.h D /trunk/gpu/gpu_ecm/gpu_ecm M /trunk/gpu/gpu_ecm/main.cu M /trunk/gpu/gpu_ecm/main.h M /trunk/gpu/gpu_ecm/utils.cu M /trunk/gpu/gpu_ecm/utils.h New version of gpu_ecm (compatible with Fermi cards) ------------------------------------------------------------------------ r1631 | dcleaver | 2011-10-24 04:30:28 +0200 (Mon, 24 Oct 2011) | 1 line Changed paths: M /trunk/batch.c use original compute_s with increased MAX_HEIGHT ------------------------------------------------------------------------ r1630 | zimmerma | 2011-10-23 13:16:09 +0200 (Sun, 23 Oct 2011) | 2 lines Changed paths: M /trunk/batch.c M /trunk/main.c [batch.c, main.c] removed useless code + gnu coding style ------------------------------------------------------------------------ r1629 | dcleaver | 2011-10-23 06:01:57 +0200 (Sun, 23 Oct 2011) | 1 line Changed paths: M /trunk/batch.c M /trunk/ecm-ecm.h M /trunk/factor.c M /trunk/main.c clear s in proper place, show correct time to calculate s, make output defines the same between ecm-ecm.h and ecm-impl.h ------------------------------------------------------------------------ r1628 | dcleaver | 2011-10-23 05:13:25 +0200 (Sun, 23 Oct 2011) | 1 line Changed paths: M /trunk/ecm-ecm.h corrected function prototype ------------------------------------------------------------------------ r1627 | dcleaver | 2011-10-23 05:08:25 +0200 (Sun, 23 Oct 2011) | 1 line Changed paths: M /trunk/batch.c M /trunk/ecm-ecm.h M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/ecm.h M /trunk/factor.c M /trunk/main.c update compute_s to allow larger B1, and compute s once per B1 ------------------------------------------------------------------------ r1626 | zimmerma | 2011-10-18 14:06:23 +0200 (Tue, 18 Oct 2011) | 2 lines Changed paths: M /trunk/ecm.c [ecm.c] added #define for batch mode smoothness constant ------------------------------------------------------------------------ r1625 | brian_gladman | 2011-10-15 20:03:36 +0200 (Sat, 15 Oct 2011) | 1 line Changed paths: M /trunk ------------------------------------------------------------------------ r1624 | zimmerma | 2011-10-13 11:36:46 +0200 (Thu, 13 Oct 2011) | 4 lines Changed paths: M /trunk/ecm.c M /trunk/rho.c [rho.c] added comments about EXTRA_SMOOTHNESS factor (Alex please complete) [ecm.c] take into account experimental factor of 3 less in torsion for batch mode (for expected number of curves and time) ------------------------------------------------------------------------ r1623 | brian_gladman | 2011-10-10 18:18:13 +0200 (Mon, 10 Oct 2011) | 1 line Changed paths: M /trunk/build.vc10/libecm/libecm.vcxproj M /trunk/build.vc10/libecm/libecm.vcxproj.filters add batch.c to VC++ build ------------------------------------------------------------------------ r1622 | dcleaver | 2011-10-09 19:58:34 +0200 (Sun, 09 Oct 2011) | 1 line Changed paths: M /trunk/batch.c Test: removed some trailing white space ------------------------------------------------------------------------ r1621 | zimmerma | 2011-10-02 22:27:01 +0200 (Sun, 02 Oct 2011) | 2 lines Changed paths: M /trunk/batch.c [batch.c] fix on 32-bit machines when s has >= 2^31 bits ------------------------------------------------------------------------ r1620 | zimmerma | 2011-10-02 09:10:05 +0200 (Sun, 02 Oct 2011) | 2 lines Changed paths: M /trunk/Fgw.c [Fgw.c] added math.h header ------------------------------------------------------------------------ r1619 | zimmerma | 2011-10-01 17:12:52 +0200 (Sat, 01 Oct 2011) | 3 lines Changed paths: M /trunk/Fgw.c M /trunk/ecm-ecm.h M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/ecm.h M /trunk/factor.c M /trunk/main.c included code from David Cleaver to recognize inputs of the form k*b^n+c (only with GWNUM) ------------------------------------------------------------------------ r1618 | bouvierc | 2011-09-29 14:53:25 +0200 (Thu, 29 Sep 2011) | 2 lines Changed paths: M /trunk/ecm-ecm.h Modify MAX_B1 in order to avoid error during char to double conversion ------------------------------------------------------------------------ r1617 | zimmerma | 2011-09-29 14:47:29 +0200 (Thu, 29 Sep 2011) | 3 lines Changed paths: M /trunk/batch.c [batch.c] MAX_B1 -> MAX_B1_BATCH to avoid conflict with MAX_B1 defined in ecm-ecm.h ------------------------------------------------------------------------ r1616 | zimmerma | 2011-09-29 14:39:55 +0200 (Thu, 29 Sep 2011) | 2 lines Changed paths: M /trunk/batch.c [batch.c] added comments and rewritten compute_s() ------------------------------------------------------------------------ r1615 | bouvierc | 2011-09-29 10:28:42 +0200 (Thu, 29 Sep 2011) | 3 lines Changed paths: M /trunk/batch.c New compute_s fonction with only one array to accumulate ------------------------------------------------------------------------ r1614 | zimmerma | 2011-09-29 08:16:12 +0200 (Thu, 29 Sep 2011) | 3 lines Changed paths: M /trunk/Fgw.c M /trunk/batch.c [Fgw.c] fixed error in ASSERT_ALWAYS [batch.c] added missing mpz_clear for s ------------------------------------------------------------------------ r1613 | zimmerma | 2011-09-28 16:46:16 +0200 (Wed, 28 Sep 2011) | 2 lines Changed paths: M /trunk/Fgw.c [Fgw.c] added constraints on gw_c, and fixed stupid error ------------------------------------------------------------------------ r1612 | zimmerma | 2011-09-28 16:43:52 +0200 (Wed, 28 Sep 2011) | 2 lines Changed paths: M /trunk/auxlib.c M /trunk/mpmod.c M /trunk/mul_fft.c removed dead code (reported by gcc 4.6.1) ------------------------------------------------------------------------ r1611 | zimmerma | 2011-09-28 14:24:29 +0200 (Wed, 28 Sep 2011) | 2 lines Changed paths: M /trunk/Fgw.c [Fgw.c] added assertions on gw_k ------------------------------------------------------------------------ r1610 | zimmerma | 2011-09-26 08:53:04 +0200 (Mon, 26 Sep 2011) | 2 lines Changed paths: M /trunk/factor.c [factor.c] set batch=0 in ecm_init ------------------------------------------------------------------------ r1609 | zimmerma | 2011-09-25 21:48:04 +0200 (Sun, 25 Sep 2011) | 2 lines Changed paths: M /trunk/Fgw.c M /trunk/ecm.c more changes from David Cleaver for GWNUM and batch mode ------------------------------------------------------------------------ r1608 | zimmerma | 2011-09-23 16:43:33 +0200 (Fri, 23 Sep 2011) | 2 lines Changed paths: M /trunk/test.ecm [test.ecm] added one test for -batch that did fail on 32-bit computers ------------------------------------------------------------------------ r1607 | zimmerma | 2011-09-23 16:28:24 +0200 (Fri, 23 Sep 2011) | 2 lines Changed paths: M /trunk/batch.c M /trunk/ecm-impl.h M /trunk/mpmod.c [batch.c] tiny speedup using new mpres_mul_2exp function ------------------------------------------------------------------------ r1606 | bouvierc | 2011-09-23 14:57:00 +0200 (Fri, 23 Sep 2011) | 2 lines Changed paths: M /trunk/batch.c Correct the bug reported by David Cleaver on 32-bit system ------------------------------------------------------------------------ r1605 | zimmerma | 2011-09-23 08:45:28 +0200 (Fri, 23 Sep 2011) | 2 lines Changed paths: M /trunk/test.ecm [test.ecm] added one test for the batch mode ------------------------------------------------------------------------ r1604 | zimmerma | 2011-09-22 12:06:04 +0200 (Thu, 22 Sep 2011) | 2 lines Changed paths: M /trunk/batch.c M /trunk/test.ecm [batch.c] check that A = 2 (mod 4) for the batch mode ------------------------------------------------------------------------ r1603 | zimmerma | 2011-09-21 07:50:50 +0200 (Wed, 21 Sep 2011) | 2 lines Changed paths: M /trunk/batch.c [batch.c] better fix ------------------------------------------------------------------------ r1602 | zimmerma | 2011-09-20 17:41:13 +0200 (Tue, 20 Sep 2011) | 2 lines Changed paths: M /trunk/batch.c [batch.c] small changes ------------------------------------------------------------------------ r1601 | zimmerma | 2011-09-20 17:29:40 +0200 (Tue, 20 Sep 2011) | 2 lines Changed paths: M /trunk/batch.c M /trunk/ecm-ecm.h M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/resume.c fixed a few compiler warnings with batch mode ------------------------------------------------------------------------ r1600 | zimmerma | 2011-09-20 08:49:15 +0200 (Tue, 20 Sep 2011) | 2 lines Changed paths: M /trunk/main.c [main.c] fixed stupid error in batch mode (thanks David Cleaver) ------------------------------------------------------------------------ r1599 | brian_gladman | 2011-09-19 23:26:40 +0200 (Mon, 19 Sep 2011) | 1 line Changed paths: M /trunk/build.vc10/ecm/ecm.vcxproj M /trunk/build.vc10/ecm/ecm.vcxproj.filters add batch mode file to Visual Studio 2010 builds ------------------------------------------------------------------------ r1598 | zimmerma | 2011-09-19 20:45:04 +0200 (Mon, 19 Sep 2011) | 2 lines Changed paths: M /trunk/batch.c M /trunk/ecm.c M /trunk/main.c cleanup of the batch mode ------------------------------------------------------------------------ r1597 | bouvierc | 2011-09-16 15:54:32 +0200 (Fri, 16 Sep 2011) | 3 lines Changed paths: M /trunk/Makefile.am M /trunk/TODO A /trunk/batch.c M /trunk/configure.in M /trunk/ecm-ecm.h M /trunk/ecm-gmp.h M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/ecm.h M /trunk/factor.c M /trunk/main.c M /trunk/mpmod.c M /trunk/test.ecm Implementation of the batch mode (option -batch) Use Montgomery's parametrization to save 1 multiplication ------------------------------------------------------------------------ r1596 | zimmerma | 2011-09-15 18:41:42 +0200 (Thu, 15 Sep 2011) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/pp1.c got rid of the obsolete mpres_normalize and mpres_semi_normalize functions ------------------------------------------------------------------------ r1595 | zimmerma | 2011-09-03 09:33:12 +0200 (Sat, 03 Sep 2011) | 2 lines Changed paths: M /trunk/Fgw.c [Fgw.c] more changes from David Cleaver, for future new version of GWNUM ------------------------------------------------------------------------ r1594 | zimmerma | 2011-09-02 20:19:45 +0200 (Fri, 02 Sep 2011) | 2 lines Changed paths: M /trunk/ecm-params.h.athlon64 [ecm-params.h.athlon64] updated (from David Cleaver) ------------------------------------------------------------------------ r1593 | zimmerma | 2011-09-02 16:47:44 +0200 (Fri, 02 Sep 2011) | 2 lines Changed paths: M /trunk/main.c [main.c] deal with MPIR in print_config() too ------------------------------------------------------------------------ r1592 | zimmerma | 2011-09-02 16:37:02 +0200 (Fri, 02 Sep 2011) | 2 lines Changed paths: M /trunk/README.dev [README.dev] added item for a new release ------------------------------------------------------------------------ r1591 | zimmerma | 2011-09-01 19:03:25 +0200 (Thu, 01 Sep 2011) | 2 lines Changed paths: M /trunk/Fgw.c M /trunk/INSTALL fixed typo in name of David Cleaver (sorry) ------------------------------------------------------------------------ r1590 | zimmerma | 2011-08-22 13:58:47 +0200 (Mon, 22 Aug 2011) | 3 lines Changed paths: M /trunk/Fgw.c M /trunk/INSTALL M /trunk/Makefile.am added changes contributed by David Cleaver to use gwnum 26.6 on Windows x64 with MingW64 in Msys ------------------------------------------------------------------------ r1589 | kruppa | 2011-07-25 16:11:38 +0200 (Mon, 25 Jul 2011) | 2 lines Changed paths: M /trunk/configure.in Patch to make configure.in work with autoconf 2.68, provided by Ralf Recker ------------------------------------------------------------------------ r1588 | zimmerma | 2011-06-30 13:37:01 +0200 (Thu, 30 Jun 2011) | 2 lines Changed paths: M /trunk/champions.h [champions.h] bumped P-1 value ------------------------------------------------------------------------ r1587 | bouvierc | 2011-06-24 14:02:32 +0200 (Fri, 24 Jun 2011) | 3 lines Changed paths: M /trunk/gpu/gpu_ecm/Makefile M /trunk/gpu/gpu_ecm/cudaarith.cu M /trunk/gpu/gpu_ecm/cudautils.cu M /trunk/gpu/gpu_ecm/cudautils.h M /trunk/gpu/gpu_ecm/main.cu M /trunk/gpu/gpu_ecm/main.h M /trunk/gpu/gpu_ecm/test.sh M /trunk/gpu/gpu_ecm/utils.cu M /trunk/gpu/gpu_ecm/utils.h Latest improvements. ------------------------------------------------------------------------ r1586 | zimmerma | 2011-06-20 16:34:33 +0200 (Mon, 20 Jun 2011) | 3 lines Changed paths: M /trunk/main.c [main.c] fixed error message allow 1 as factor found (when input number is 1) ------------------------------------------------------------------------ r1585 | zimmerma | 2011-06-20 16:19:19 +0200 (Mon, 20 Jun 2011) | 2 lines Changed paths: M /trunk/main.c [main.c] GNU coding style ------------------------------------------------------------------------ r1584 | bouvierc | 2011-06-20 16:12:14 +0200 (Mon, 20 Jun 2011) | 3 lines Changed paths: M /trunk/main.c Add a test to forbid nonpositive integers. ------------------------------------------------------------------------ r1583 | bouvierc | 2011-06-20 15:11:36 +0200 (Mon, 20 Jun 2011) | 3 lines Changed paths: M /trunk/auxi.c Fix a bug for number of digits with negative number. ------------------------------------------------------------------------ r1582 | zimmerma | 2011-06-15 07:43:23 +0200 (Wed, 15 Jun 2011) | 2 lines Changed paths: M /trunk/champions.h [champions.h] minimal size for ECM is now 67 digits ------------------------------------------------------------------------ r1581 | bouvierc | 2011-06-01 15:04:35 +0200 (Wed, 01 Jun 2011) | 5 lines Changed paths: A /trunk/gpu/gpu_ecm A /trunk/gpu/gpu_ecm/Makefile A /trunk/gpu/gpu_ecm/cudaarith.cu A /trunk/gpu/gpu_ecm/cudaarith.h A /trunk/gpu/gpu_ecm/cudautils.cu A /trunk/gpu/gpu_ecm/cudautils.h A /trunk/gpu/gpu_ecm/gpu_ecm A /trunk/gpu/gpu_ecm/main.cu A /trunk/gpu/gpu_ecm/main.h A /trunk/gpu/gpu_ecm/obj A /trunk/gpu/gpu_ecm/test.sh A /trunk/gpu/gpu_ecm/utils.cu A /trunk/gpu/gpu_ecm/utils.h gpu_ecm for GPU of compute capability 1.3 and above Optimize for 1024bits modulus but can be compiled for 256 and 512bits modulus with make v=1 and make v=2 ------------------------------------------------------------------------ r1580 | brian_gladman | 2011-05-31 23:44:26 +0200 (Tue, 31 May 2011) | 1 line Changed paths: M /trunk/build.vc10/tests.py add overall timing to the Windows test program (tests.py) ------------------------------------------------------------------------ r1579 | brian_gladman | 2011-05-31 17:33:52 +0200 (Tue, 31 May 2011) | 1 line Changed paths: M /trunk/build.vc10/config.h M /trunk/build.vc10/ecm/ecm.vcxproj M /trunk/build.vc10/libecm/libecm.vcxproj M /trunk/build.vc10/tests.py update Windows builds to add missing defines and add OpenMP support ------------------------------------------------------------------------ r1578 | zimmerma | 2011-05-18 12:11:45 +0200 (Wed, 18 May 2011) | 2 lines Changed paths: M /trunk/champions.h [champions.h] updated for P+1 ------------------------------------------------------------------------ r1577 | zimmerma | 2011-05-05 13:05:45 +0200 (Thu, 05 May 2011) | 2 lines Changed paths: M /trunk/champions.h [champions.h] updated P+1 size ------------------------------------------------------------------------ r1576 | zimmerma | 2011-05-04 13:19:18 +0200 (Wed, 04 May 2011) | 2 lines Changed paths: M /trunk/test.ecm [test.ecm] added some tests to exercise patch from David Cleaver (r1575) ------------------------------------------------------------------------ r1575 | zimmerma | 2011-05-04 12:45:37 +0200 (Wed, 04 May 2011) | 4 lines Changed paths: M /trunk/TODO M /trunk/configure.in M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/lucas.c M /trunk/pp1.c added patch from David Cleaver to allow B1>=2^32 on machines where "unsigned long" has 32 bits only, by using "unsigned long long" ------------------------------------------------------------------------ r1574 | bouvierc | 2011-04-21 16:30:48 +0200 (Thu, 21 Apr 2011) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm_cc13/cudaarith.cu Correct a little mistake in the compare function. ------------------------------------------------------------------------ r1573 | bouvierc | 2011-04-20 17:40:57 +0200 (Wed, 20 Apr 2011) | 2 lines Changed paths: M /trunk/gpu/gpu_ecm_cc13/Makefile M /trunk/gpu/gpu_ecm_cc13/cudaarith.cu M /trunk/gpu/gpu_ecm_cc13/cudaarith.h M /trunk/gpu/gpu_ecm_cc13/cudautils.cu M /trunk/gpu/gpu_ecm_cc13/main.cu M /trunk/gpu/gpu_ecm_cc13/test.sh M /trunk/gpu/gpu_ecm_cc13/utils.h Some improvements (especially on multiplication). 30% gain in time. ------------------------------------------------------------------------ r1572 | bouvierc | 2011-04-14 15:33:22 +0200 (Thu, 14 Apr 2011) | 1 line Changed paths: M /trunk/gpu/gpu_ecm_cc13/cudautils.cu M /trunk/gpu/gpu_ecm_cc13/main.cu Corrections of the copy between CPU and GPU which only worked for half of the curves. ------------------------------------------------------------------------ r1571 | bouvierc | 2011-04-13 10:46:45 +0200 (Wed, 13 Apr 2011) | 1 line Changed paths: A /trunk/gpu/gpu_ecm_cc13 A /trunk/gpu/gpu_ecm_cc13/Makefile A /trunk/gpu/gpu_ecm_cc13/cudaarith.cu A /trunk/gpu/gpu_ecm_cc13/cudaarith.h A /trunk/gpu/gpu_ecm_cc13/cudautils.cu A /trunk/gpu/gpu_ecm_cc13/main.cu A /trunk/gpu/gpu_ecm_cc13/obj A /trunk/gpu/gpu_ecm_cc13/test.sh A /trunk/gpu/gpu_ecm_cc13/utils.h Implementation of ecm for NVIDIA GPU of compute capability 1.3\n./test.sh provides a example. ------------------------------------------------------------------------ r1570 | zimmerma | 2011-04-08 13:56:38 +0200 (Fri, 08 Apr 2011) | 2 lines Changed paths: M /trunk/TODO [TODO] added one item ------------------------------------------------------------------------ r1569 | zimmerma | 2011-04-08 08:31:37 +0200 (Fri, 08 Apr 2011) | 2 lines Changed paths: M /trunk/README [README] updated OpenPFGW url ------------------------------------------------------------------------ r1568 | zimmerma | 2011-03-30 13:05:10 +0200 (Wed, 30 Mar 2011) | 2 lines Changed paths: M /trunk/champions.h [champions.h] updated for P-1 ------------------------------------------------------------------------ r1567 | brian_gladman | 2011-03-30 11:01:16 +0200 (Wed, 30 Mar 2011) | 1 line Changed paths: M /trunk/build.vc10/ecm/ecm.vcxproj Adjust stack size options for Windows ------------------------------------------------------------------------ r1566 | zimmerma | 2011-03-10 11:50:33 +0100 (Thu, 10 Mar 2011) | 2 lines Changed paths: M /trunk/champions.h [champions.h] updated P+1 minimum digits ------------------------------------------------------------------------ r1565 | zimmerma | 2011-01-18 17:44:09 +0100 (Tue, 18 Jan 2011) | 2 lines Changed paths: M /trunk/main.c [main.c] removed trailing blank (commit test) ------------------------------------------------------------------------ r1564 | brian_gladman | 2011-01-14 17:46:08 +0100 (Fri, 14 Jan 2011) | 1 line Changed paths: M /trunk/build.vc10/ecm/ecm.vcxproj M /trunk/build.vc10/ecm/ecm.vcxproj.filters M /trunk/build.vc10/libecm/libecm.vcxproj M /trunk/build.vc10/libecm/libecm.vcxproj.filters Add champions.h to Windows builds ------------------------------------------------------------------------ r1563 | zimmerma | 2011-01-12 14:55:15 +0100 (Wed, 12 Jan 2011) | 3 lines Changed paths: M /trunk/main.c [main.c] print assertions too (if enabled) in the startup line, and cleaned up the corresponding code ------------------------------------------------------------------------ r1562 | kruppa | 2011-01-06 16:48:16 +0100 (Thu, 06 Jan 2011) | 2 lines Changed paths: M /trunk/x86_64/mulredc.m4 Better asm code for AMD cpus ------------------------------------------------------------------------ r1561 | kruppa | 2011-01-06 13:49:16 +0100 (Thu, 06 Jan 2011) | 2 lines Changed paths: M /trunk/Makefile.am A /trunk/champions.h M /trunk/main.c Put champions list in own file to avoid frequent updates to main.c ------------------------------------------------------------------------ r1560 | brian_gladman | 2010-12-30 16:19:42 +0100 (Thu, 30 Dec 2010) | 1 line Changed paths: M /trunk/build.vc10/config.h minor change to Windows win32 build configuration ------------------------------------------------------------------------ r1559 | kruppa | 2010-12-17 17:18:11 +0100 (Fri, 17 Dec 2010) | 2 lines Changed paths: M /trunk/pm1.c M /trunk/pm1fs2.c M /trunk/pp1.c Fix spurious error messages introduced in last commit ------------------------------------------------------------------------ r1558 | kruppa | 2010-12-17 17:11:47 +0100 (Fri, 17 Dec 2010) | 4 lines Changed paths: M /trunk/pm1.c M /trunk/pm1fs2.c Bugfix for choosing between NTT and non-NTT. Needs more testing before doing choice for P+1 the same way ------------------------------------------------------------------------ r1557 | zimmerma | 2010-12-17 16:14:22 +0100 (Fri, 17 Dec 2010) | 2 lines Changed paths: M /trunk/main.c [main.c] removed one blank (test commit) ------------------------------------------------------------------------ r1556 | kruppa | 2010-12-17 14:18:02 +0100 (Fri, 17 Dec 2010) | 2 lines Changed paths: M /trunk/configure.in Link Woltman's GWNUM with -lpthread, abort if GWNUM+OpenMP is requested ------------------------------------------------------------------------ r1555 | zimmerma | 2010-12-09 15:32:17 +0100 (Thu, 09 Dec 2010) | 2 lines Changed paths: M /trunk/main.c [main.c] removed trailing blank (test) ------------------------------------------------------------------------ r1554 | zimmerma | 2010-12-05 22:21:25 +0100 (Sun, 05 Dec 2010) | 2 lines Changed paths: M /trunk/main.c [main.c] now recognizes MPIR ------------------------------------------------------------------------ r1553 | zimmerma | 2010-11-24 16:13:11 +0100 (Wed, 24 Nov 2010) | 2 lines Changed paths: M /trunk/main.c [main.c] we now need at least a p66 to enter the ECM top ten! ------------------------------------------------------------------------ r1552 | brian_gladman | 2010-11-22 20:17:10 +0100 (Mon, 22 Nov 2010) | 1 line Changed paths: M /trunk/build.vc10/ecm/ecm.vcxproj M /trunk/build.vc10/libecm/libecm.vcxproj M /trunk/build.vc10/mp_lib.props M /trunk/build.vc10/tune/tune.vcxproj minor Windows build changes ------------------------------------------------------------------------ r1551 | brian_gladman | 2010-11-22 13:33:06 +0100 (Mon, 22 Nov 2010) | 1 line Changed paths: M /trunk/build.vc10/config.h M /trunk/build.vc10/ecm/ecm.vcxproj M /trunk/build.vc10/ecm.sln M /trunk/build.vc10/libecm/libecm.vcxproj minor Windows build changes ------------------------------------------------------------------------ r1550 | zimmerma | 2010-11-19 15:03:23 +0100 (Fri, 19 Nov 2010) | 3 lines Changed paths: M /trunk/mul_fft.c [mul_fft.c] check for malloc return value in main memory allocation, and point to -maxmem option ------------------------------------------------------------------------ r1549 | zimmerma | 2010-11-18 21:14:06 +0100 (Thu, 18 Nov 2010) | 2 lines Changed paths: M /trunk/pm1fs2.c [pm1fs2.c] fixed typos in comments ------------------------------------------------------------------------ r1548 | zimmerma | 2010-11-18 20:57:40 +0100 (Thu, 18 Nov 2010) | 2 lines Changed paths: M /trunk/pm1.c [pm1.c] p-1 -> P-1 ------------------------------------------------------------------------ r1547 | zimmerma | 2010-11-12 15:12:10 +0100 (Fri, 12 Nov 2010) | 2 lines Changed paths: M /trunk/NEWS [NEWS] added more stuff ------------------------------------------------------------------------ r1546 | zimmerma | 2010-11-12 15:01:48 +0100 (Fri, 12 Nov 2010) | 3 lines Changed paths: M /trunk/NEWS M /trunk/configure.in [configure.in] fixed problem with SSE2 support (http://trac.sagemath.org/sage_trac/ticket/10252) ------------------------------------------------------------------------ r1545 | zimmerma | 2010-11-10 11:16:13 +0100 (Wed, 10 Nov 2010) | 2 lines Changed paths: M /trunk/main.c [main.c] removed trailing blank ------------------------------------------------------------------------ r1544 | zimmerma | 2010-11-10 11:04:02 +0100 (Wed, 10 Nov 2010) | 2 lines Changed paths: M /trunk/main.c [main.c] removed another trailing blank ------------------------------------------------------------------------ r1543 | zimmerma | 2010-11-10 10:52:14 +0100 (Wed, 10 Nov 2010) | 2 lines Changed paths: M /trunk/main.c [main.c] removed another trailing blank (another commit log test) ------------------------------------------------------------------------ r1542 | zimmerma | 2010-11-10 10:36:46 +0100 (Wed, 10 Nov 2010) | 2 lines Changed paths: M /trunk/main.c [main.c] removed trailing blank (commit log test) ------------------------------------------------------------------------ r1541 | zimmerma | 2010-11-08 17:50:20 +0100 (Mon, 08 Nov 2010) | 2 lines Changed paths: M /trunk/main.c [main.c] removed trailing blank ------------------------------------------------------------------------ r1540 | zimmerma | 2010-11-08 07:53:35 +0100 (Mon, 08 Nov 2010) | 2 lines Changed paths: M /trunk/main.c [main.c] removed trailing space ------------------------------------------------------------------------ r1539 | brian_gladman | 2010-10-30 19:31:28 +0200 (Sat, 30 Oct 2010) | 1 line Changed paths: A /trunk/build.vc10 A /trunk/build.vc10/assembler A /trunk/build.vc10/assembler/a_win32a_mulredc.asm A /trunk/build.vc10/assembler/a_win32a_redc.asm A /trunk/build.vc10/assembler/a_win32p_mulredc.asm A /trunk/build.vc10/assembler/a_win32p_redc.asm A /trunk/build.vc10/assembler/a_x64_mulredc.asm A /trunk/build.vc10/assembler/a_x64_redc.asm A /trunk/build.vc10/assembler/mulredc.h A /trunk/build.vc10/assembler/test_mulredc.c A /trunk/build.vc10/config.h A /trunk/build.vc10/ecm A /trunk/build.vc10/ecm/ecm.vcxproj A /trunk/build.vc10/ecm/ecm.vcxproj.filters A /trunk/build.vc10/ecm-params.h A /trunk/build.vc10/ecm-params.h.win32.amd A /trunk/build.vc10/ecm-params.h.win32.intel A /trunk/build.vc10/ecm-params.h.x64.amd A /trunk/build.vc10/ecm-params.h.x64.intel A /trunk/build.vc10/ecm.sln A /trunk/build.vc10/file_copy.bat A /trunk/build.vc10/libecm A /trunk/build.vc10/libecm/libecm.vcxproj A /trunk/build.vc10/libecm/libecm.vcxproj.filters A /trunk/build.vc10/mp_lib.props A /trunk/build.vc10/mul_fft-params.h.win32.amd A /trunk/build.vc10/mul_fft-params.h.win32.intel A /trunk/build.vc10/mul_fft-params.h.x64.amd A /trunk/build.vc10/mul_fft-params.h.x64.intel A /trunk/build.vc10/readme.txt A /trunk/build.vc10/tests.py A /trunk/build.vc10/tune A /trunk/build.vc10/tune/tune.vcxproj A /trunk/build.vc10/tune/tune.vcxproj.filters A /trunk/build.vc10/vsyasm.props A /trunk/build.vc10/vsyasm.targets A /trunk/build.vc10/vsyasm.xml Add a Visual Studio 2010 build ------------------------------------------------------------------------ r1538 | brian_gladman | 2010-10-30 19:07:41 +0200 (Sat, 30 Oct 2010) | 1 line Changed paths: M /trunk/longlong.h Further correction of _PROTO define ------------------------------------------------------------------------ r1537 | brian_gladman | 2010-10-30 16:57:34 +0200 (Sat, 30 Oct 2010) | 2 lines Changed paths: M /trunk/mul_fft.c Correct bug in definition of _PROTO in mul_fft.c ------------------------------------------------------------------------ r1536 | zimmerma | 2010-10-22 10:28:43 +0200 (Fri, 22 Oct 2010) | 2 lines Changed paths: M /trunk/main.c [main.c] updated champions table ------------------------------------------------------------------------ r1535 | zimmerma | 2010-09-10 16:58:27 +0200 (Fri, 10 Sep 2010) | 2 lines Changed paths: A /trunk/gpu/modular_arithmetic.h [modular_arithmetic.h] header file for routines in modular_arithmetic.c ------------------------------------------------------------------------ r1534 | zimmerma | 2010-09-10 16:57:45 +0200 (Fri, 10 Sep 2010) | 2 lines Changed paths: A /trunk/gpu/stage1-c.c [stage1-c.c] new file, plain-C version of stage1.c ------------------------------------------------------------------------ r1533 | zimmerma | 2010-09-10 16:41:33 +0200 (Fri, 10 Sep 2010) | 3 lines Changed paths: M /trunk/gpu/makefile M /trunk/gpu/modular_arithmetic.c M /trunk/gpu/prototype.c M /trunk/gpu/prototype.h now stage1-c seems to give similar results as stage1 (but the efficiency can still be improved) ------------------------------------------------------------------------ r1532 | zimmerma | 2010-09-08 13:34:01 +0200 (Wed, 08 Sep 2010) | 2 lines Changed paths: M /trunk/gpu/makefile M /trunk/gpu/modular_arithmetic.c cleanup in file modular_arithmetic.c ------------------------------------------------------------------------ r1531 | zimmerma | 2010-09-08 13:19:50 +0200 (Wed, 08 Sep 2010) | 2 lines Changed paths: M /trunk/gpu/makefile M /trunk/gpu/prototype.c M /trunk/gpu/stage1.c small changes to avoid compiler warnings ------------------------------------------------------------------------ r1530 | zimmerma | 2010-09-08 11:50:53 +0200 (Wed, 08 Sep 2010) | 3 lines Changed paths: M /trunk/gpu/makefile M /trunk/gpu/prototype.c M /trunk/gpu/stage1.c fixed computation of d=(a+2)/4 mod N in prototype.c, added copyright notice, and started to clean up code ------------------------------------------------------------------------ r1529 | zimmerma | 2010-09-08 11:04:36 +0200 (Wed, 08 Sep 2010) | 2 lines Changed paths: M /trunk/INSTALL [INSTALL] updated info about sparc problem ------------------------------------------------------------------------ r1528 | zimmerma | 2010-09-07 08:37:27 +0200 (Tue, 07 Sep 2010) | 2 lines Changed paths: M /trunk/INSTALL [INSTALL] added know problem on sparc with GCC 4.4.5 ------------------------------------------------------------------------ r1527 | kruppa | 2010-07-31 15:35:01 +0200 (Sat, 31 Jul 2010) | 3 lines Changed paths: M /trunk/athlon/Makefile.am M /trunk/pentium4/Makefile.am M /trunk/powerpc64/Makefile.am M /trunk/x86_64/Makefile.am Avoid spurious dependency of libmulredc on GMP and libm. ------------------------------------------------------------------------ r1526 | kruppa | 2010-07-31 15:33:57 +0200 (Sat, 31 Jul 2010) | 5 lines Changed paths: M /trunk/x86_64/mulredc.m4 M /trunk/x86_64/mulredc1.asm M /trunk/x86_64/mulredc1.m4 PIC-ify calls to abort(). We always call abort@plt now, which on Linux seems to work even in a static library, but may not be portable. Needs testing. ------------------------------------------------------------------------ r1525 | zimmerma | 2010-07-28 11:09:59 +0200 (Wed, 28 Jul 2010) | 2 lines Changed paths: M /trunk/Makefile.am [Makefile.am] patch from Laurent Fousse (libecm.so was not linked against gmp) ------------------------------------------------------------------------ r1524 | kruppa | 2010-07-24 13:35:53 +0200 (Sat, 24 Jul 2010) | 3 lines Changed paths: M /trunk/Makefile.am Removed linker flags from LDADD to avoid spurious dependencies Include mulredc in libecm.la only if asm redc is actually used ------------------------------------------------------------------------ r1523 | zimmerma | 2010-07-09 11:14:01 +0200 (Fri, 09 Jul 2010) | 2 lines Changed paths: M /trunk/gpu/makefile M /trunk/gpu/modular_arithmetic.c fixed bug in mul() ------------------------------------------------------------------------ r1522 | zimmerma | 2010-07-09 10:29:10 +0200 (Fri, 09 Jul 2010) | 2 lines Changed paths: M /trunk/gpu/makefile [makefile] added target ------------------------------------------------------------------------ r1521 | feltin | 2010-07-08 17:07:38 +0200 (Thu, 08 Jul 2010) | 1 line Changed paths: A /trunk/gpu/getprime.c A /trunk/gpu/getprime.h M /trunk/gpu/modular_arithmetic.c ------------------------------------------------------------------------ r1520 | feltin | 2010-07-07 17:09:57 +0200 (Wed, 07 Jul 2010) | 2 lines Changed paths: A /trunk/gpu/modular_arithmetic.c Modular arithmetic version in C without using the GMP library ------------------------------------------------------------------------ r1519 | zimmerma | 2010-07-02 16:34:15 +0200 (Fri, 02 Jul 2010) | 2 lines Changed paths: M /trunk/Makefile.am [Makefile.am] better fix for #10648 (contributed from Vincent Lefèvre) ------------------------------------------------------------------------ r1518 | zimmerma | 2010-07-01 17:38:51 +0200 (Thu, 01 Jul 2010) | 4 lines Changed paths: M /trunk/configure.in [configure.in] better fix from Vincent Lefevre: even on x86_64, we might use GMP with ABI=32, in which case we shouldn't use the (64-bit) assembly redc from x86_64 ------------------------------------------------------------------------ r1517 | zimmerma | 2010-07-01 17:31:34 +0200 (Thu, 01 Jul 2010) | 3 lines Changed paths: M /trunk/Makefile.am [Makefile.am] patch to solve bug #10648 from tracker, however I'm not sure this solution is the right one, if needed we can revert it. ------------------------------------------------------------------------ r1516 | zimmerma | 2010-06-30 15:50:19 +0200 (Wed, 30 Jun 2010) | 2 lines Changed paths: M /trunk/configure.in [configure.in] patch from Vincent Lefevre (see bug tracker #10646) ------------------------------------------------------------------------ r1515 | zimmerma | 2010-06-30 14:54:20 +0200 (Wed, 30 Jun 2010) | 2 lines Changed paths: M /trunk/Makefile.am [Makefile.am] added comment ------------------------------------------------------------------------ r1514 | zimmerma | 2010-06-30 14:28:04 +0200 (Wed, 30 Jun 2010) | 4 lines Changed paths: M /trunk/configure.in [configure.in] fixed bug reported by Vincent Lefevre: incorrect configure --help output for --enable-asm-redc (#10649 on bug tracker) ------------------------------------------------------------------------ r1513 | zimmerma | 2010-06-30 14:18:56 +0200 (Wed, 30 Jun 2010) | 2 lines Changed paths: M /trunk/configure.in [configure.in] fixed typo ------------------------------------------------------------------------ r1512 | feltin | 2010-06-21 11:03:53 +0200 (Mon, 21 Jun 2010) | 2 lines Changed paths: M /trunk/gpu/makefile M /trunk/gpu/prototype.c M /trunk/gpu/prototype.h M /trunk/gpu/stage1.c Stage1 of algorithm ECM (version 2) ------------------------------------------------------------------------ r1511 | zimmerma | 2010-06-15 16:25:05 +0200 (Tue, 15 Jun 2010) | 2 lines Changed paths: M /trunk/INSTALL [INSTALL] new section with known problems ------------------------------------------------------------------------ r1510 | feltin | 2010-06-15 15:48:17 +0200 (Tue, 15 Jun 2010) | 2 lines Changed paths: A /trunk/gpu/makefile A /trunk/gpu/prototype.c A /trunk/gpu/prototype.h A /trunk/gpu/stage1.c first version of ECM (using GMP) ------------------------------------------------------------------------ r1509 | zimmerma | 2010-06-15 09:15:29 +0200 (Tue, 15 Jun 2010) | 2 lines Changed paths: A /trunk/gpu [gpu] new subdirectory to experiment with GPU code for stage 1 ------------------------------------------------------------------------ r1508 | zimmerma | 2010-05-22 12:39:45 +0200 (Sat, 22 May 2010) | 2 lines Changed paths: M /trunk/main.c [main.c] updated smallest champion size for P-1 ------------------------------------------------------------------------ r1507 | zimmerma | 2010-05-17 00:38:19 +0200 (Mon, 17 May 2010) | 2 lines Changed paths: M /trunk/main.c [main.c] now we need at least 64 digits to enter the ECM champion list! ------------------------------------------------------------------------ r1506 | kruppa | 2010-04-27 13:52:26 +0200 (Tue, 27 Apr 2010) | 2 lines Changed paths: M /trunk/build.vc9/Makefile.am M /trunk/build.vc9/assembler/Makefile.am A /trunk/build.vc9/tune/Makefile.am M /trunk/configure.in Added missing files for Visual C build ------------------------------------------------------------------------ r1505 | zimmerma | 2010-04-24 23:05:26 +0200 (Sat, 24 Apr 2010) | 2 lines Changed paths: M /trunk/TODO [TODO] updated done item ------------------------------------------------------------------------ r1504 | zimmerma | 2010-04-24 22:59:28 +0200 (Sat, 24 Apr 2010) | 3 lines Changed paths: M /trunk/configure.in M /trunk/mpzspm.c M /trunk/mpzspv.c M /trunk/sp.h M /trunk/spm.c implemented fast conversion from mpz_t to RNS (hard-coded threshold is 2^7 moduli, which seems close to optimal on a 64-bit machine) ------------------------------------------------------------------------ r1502 | kruppa | 2010-04-19 11:19:49 +0200 (Mon, 19 Apr 2010) | 3 lines Changed paths: M /trunk/ChangeLog Last commits for 6.3 added ------------------------------------------------------------------------ r1501 | kruppa | 2010-04-18 22:14:22 +0200 (Sun, 18 Apr 2010) | 5 lines Version 6.3. Assertions off by default. Added new thresholds to 64-bit parameter files in build.vc9/. ------------------------------------------------------------------------ r1500 | kruppa | 2010-04-17 01:40:06 +0200 (Sat, 17 Apr 2010) | 4 lines Include ecm-params.h.core2 in distribution Updated record factor sizes ------------------------------------------------------------------------ r1499 | kruppa | 2010-04-17 01:26:15 +0200 (Sat, 17 Apr 2010) | 4 lines Specify source dir path for /powerpc64/powerpc-defs.m4 include to make out-of-source builds work ------------------------------------------------------------------------ r1498 | kruppa | 2010-04-16 23:41:03 +0200 (Fri, 16 Apr 2010) | 4 lines Detect Core 2 cpus if /proc/cpuinfo exists and use correct parameter file; warn about choosing right paramters if no /proc/cpuinfo exists on x86_64 ------------------------------------------------------------------------ r1497 | kruppa | 2010-04-16 18:11:31 +0200 (Fri, 16 Apr 2010) | 4 lines Threshold 100 for using multi-threading in mpzspv_from_mpzv() was too low for ECM, actually increased run-time. Threshold increased to 16384 ------------------------------------------------------------------------ r1496 | kruppa | 2010-04-15 12:11:20 +0200 (Thu, 15 Apr 2010) | 2 lines Add underscores to abort call on systems that need it ------------------------------------------------------------------------ r1495 | kruppa | 2010-04-14 23:16:48 +0200 (Wed, 14 Apr 2010) | 2 lines Test for mpn_sqr() and if it doesn't exist, fall back to mpn_mul() ------------------------------------------------------------------------ r1494 | kruppa | 2010-04-13 19:15:12 +0200 (Tue, 13 Apr 2010) | 4 lines Fixed missing return value that caused compiler warning Removed OMP critical that was left over from debugging ------------------------------------------------------------------------ r1493 | kruppa | 2010-04-11 21:46:36 +0200 (Sun, 11 Apr 2010) | 3 lines Small changes: check if --with-gmp dir exists, clarification of comment, remove duplicate case ------------------------------------------------------------------------ r1492 | kruppa | 2010-04-11 00:44:00 +0200 (Sun, 11 Apr 2010) | 4 lines Get addresses of input operands to mpres_mul() after realloc for result. Input operands may change address if input and output are the same mpz_t, and reading from the old, now free()'d, address makes valgrind unhappy ------------------------------------------------------------------------ r1491 | kruppa | 2010-04-10 23:45:24 +0200 (Sat, 10 Apr 2010) | 2 lines ASSERT input < modulus was too strict, NTT code itself does not fully reduce ------------------------------------------------------------------------ r1490 | kruppa | 2010-04-10 23:34:11 +0200 (Sat, 10 Apr 2010) | 2 lines Fixed missing mod reductions that could lead to NTT overflows ------------------------------------------------------------------------ r1489 | zimmerma | 2010-04-09 20:57:52 +0200 (Fri, 09 Apr 2010) | 2 lines [TODO] corrected item about mpn_redc_2 ------------------------------------------------------------------------ r1488 | zimmerma | 2010-04-09 19:45:57 +0200 (Fri, 09 Apr 2010) | 2 lines [TODO] forgot to commit 2nd item in last change ------------------------------------------------------------------------ r1487 | zimmerma | 2010-04-09 19:45:29 +0200 (Fri, 09 Apr 2010) | 2 lines [TODO] added two items suggested by T. Granlund ------------------------------------------------------------------------ r1486 | zimmerma | 2010-04-09 19:31:09 +0200 (Fri, 09 Apr 2010) | 2 lines bumped version number to 6.3-rc4 (just to try "make dist") ------------------------------------------------------------------------ r1485 | zimmerma | 2010-04-09 19:25:07 +0200 (Fri, 09 Apr 2010) | 4 lines [ecm-params.h.alpha-ev56] new parameter file for Alpha ev56 [Makefile.am] added ecm-params.h.alpha-ev56 in "make dist" [configure.in] now takes new default parameter files ------------------------------------------------------------------------ r1484 | kruppa | 2010-04-09 19:10:09 +0200 (Fri, 09 Apr 2010) | 4 lines Small changes to make building outside of the source directory work. Make libmulredc.a depend on config.m4 in all asm subdirectories. ------------------------------------------------------------------------ r1483 | zimmerma | 2010-04-09 19:06:09 +0200 (Fri, 09 Apr 2010) | 2 lines [README.dev] added item to make a new release ------------------------------------------------------------------------ r1482 | zimmerma | 2010-04-09 19:03:13 +0200 (Fri, 09 Apr 2010) | 3 lines [ecm-params.h.pentium4] updated parameters for ecm-6.3 [Makefile.am] added hppa2.0 default parameters ------------------------------------------------------------------------ r1481 | kruppa | 2010-04-09 17:44:31 +0200 (Fri, 09 Apr 2010) | 4 lines Looks like gas wants "#" as comment separater and Apple Mac OS X assembler wants ";" so now we use M4 to discard comments ------------------------------------------------------------------------ r1480 | kruppa | 2010-04-09 16:13:09 +0200 (Fri, 09 Apr 2010) | 4 lines Make -printconfig print whether Windows ABI is used for assembler functions Q&D (and #define'd out) test of mulredc1_*() functions ------------------------------------------------------------------------ r1479 | kruppa | 2010-04-09 16:02:40 +0200 (Fri, 09 Apr 2010) | 3 lines Likewise fixed incorrect reading of inv_m and missing quotes ------------------------------------------------------------------------ r1478 | kruppa | 2010-04-09 15:27:37 +0200 (Fri, 09 Apr 2010) | 6 lines Fixed incorrect quoting which broke the asserts and the switch to MS ABI in mulredc.m4. Fixed incorrect reading of inv_m from the stack. No longer include x86_64/redc.asm in build or distribution. ------------------------------------------------------------------------ r1477 | zimmerma | 2010-04-08 16:07:04 +0200 (Thu, 08 Apr 2010) | 2 lines [ecm-params.h.hppa2.0] parameter file for hppa2.0 ------------------------------------------------------------------------ r1476 | zimmerma | 2010-04-08 15:19:34 +0200 (Thu, 08 Apr 2010) | 2 lines [sp.h] fix to make umul_ppmm() work properly on hppa2.0 (gcc61.fsffrance.org) ------------------------------------------------------------------------ r1475 | zimmerma | 2010-04-08 15:07:29 +0200 (Thu, 08 Apr 2010) | 2 lines [ecm-params.h.pentium-m] updated ------------------------------------------------------------------------ r1474 | zimmerma | 2010-04-07 17:48:22 +0200 (Wed, 07 Apr 2010) | 3 lines [ecm-params.h.ia64] new parameter file [Makefile.am] added ecm-params.h.ia64 in make dist ------------------------------------------------------------------------ r1473 | kruppa | 2010-04-07 17:36:19 +0200 (Wed, 07 Apr 2010) | 6 lines Changed comments from C++ style "//" to assembler style "#" to avoid .S files which cause trouble on case-insensitive filesystems. Removed mulredc*.asm files from SVN as these are generated code. Added rules to Makefile.am to generate the mulredc*.asm files. ------------------------------------------------------------------------ r1472 | zimmerma | 2010-04-07 15:13:59 +0200 (Wed, 07 Apr 2010) | 3 lines [ecm-params.h.sparc64] parameter file for sparc64 [Makefile.am] added ecm-params.h.sparc64 to make dist ------------------------------------------------------------------------ r1471 | zimmerma | 2010-04-07 14:00:27 +0200 (Wed, 07 Apr 2010) | 2 lines [Makefile.am] added new parameter files to make dist ------------------------------------------------------------------------ r1470 | zimmerma | 2010-04-07 13:52:53 +0200 (Wed, 07 Apr 2010) | 2 lines [ecm-params.h.athlon] added comment (this is for Opteron) ------------------------------------------------------------------------ r1469 | zimmerma | 2010-04-07 13:04:26 +0200 (Wed, 07 Apr 2010) | 3 lines [ecm-params.h.armv5tel] new parameter file for ARM [ecm-params.h.mips64el,ecm-params.h.powerpc970] added version of GMP used ------------------------------------------------------------------------ r1468 | zimmerma | 2010-04-07 11:12:54 +0200 (Wed, 07 Apr 2010) | 2 lines [ecm-params.h.mips64el] new parameter file for MIPS64 ------------------------------------------------------------------------ r1467 | zimmerma | 2010-04-07 09:56:05 +0200 (Wed, 07 Apr 2010) | 2 lines [ecm-params.h.powerpc970] updated tuning parameters for 6.3 ------------------------------------------------------------------------ r1466 | kruppa | 2010-04-06 17:25:50 +0200 (Tue, 06 Apr 2010) | 3 lines Test if compiler understands __attribute__((hot)) at configure time Marked some hot-spot functions of ECM accordingly ------------------------------------------------------------------------ r1465 | kruppa | 2010-04-02 14:41:42 +0200 (Fri, 02 Apr 2010) | 3 lines Removed duplicate LT_PREREQ, lowered requirement from 2.2.6b to 2.2.6. ------------------------------------------------------------------------ r1464 | kruppa | 2010-04-01 17:16:34 +0200 (Thu, 01 Apr 2010) | 6 lines Separate thresholds for mulredc*() functions for squaring and general multiplication, since GMP mpn_sqr() is faster than mpn_mul_n() Use __gmpn_redc_1() function if configure finds it, although at this time it doesn't seem to be faster than the loop over mpn_addmul_1() ------------------------------------------------------------------------ r1463 | kruppa | 2010-03-31 11:35:48 +0200 (Wed, 31 Mar 2010) | 2 lines Print TUNE_MULREDC_THRESH with -printconfig ------------------------------------------------------------------------ r1462 | kruppa | 2010-03-31 10:27:19 +0200 (Wed, 31 Mar 2010) | 2 lines Fixed uninitialised variable in assertion ------------------------------------------------------------------------ r1461 | zimmerma | 2010-03-31 10:22:47 +0200 (Wed, 31 Mar 2010) | 2 lines [README.dev] added two reminders for making a new release ------------------------------------------------------------------------ r1460 | kruppa | 2010-03-31 01:53:51 +0200 (Wed, 31 Mar 2010) | 9 lines Switch assembly code to Windows ABI under MinGW Assembly redc3() function disabled on x86_84, GMP is faster New threshold for tuning: TUNE_MULREDC_THRESH, determines when to switch from asm mulredc*() functions to GMP Changed functions called by mpres_mul() to reduce call overhead Added "longcheck" target which runs the test scripts with different parameters and with valgrind, if configure found valgrind ------------------------------------------------------------------------ r1459 | kruppa | 2010-03-18 17:46:07 +0100 (Thu, 18 Mar 2010) | 3 lines Make asm code switch to Windows 64 ABI if WINDOWS64_ABI is defined in config.m4. Completely untested. mulredc1.m4 is to be done yet ------------------------------------------------------------------------ r1458 | kruppa | 2010-03-17 23:10:02 +0100 (Wed, 17 Mar 2010) | 2 lines Auto-generated files, should not be in SVN ------------------------------------------------------------------------ r1457 | kruppa | 2010-03-17 22:03:23 +0100 (Wed, 17 Mar 2010) | 2 lines Added -printconfig parameter ------------------------------------------------------------------------ r1456 | kruppa | 2010-03-17 19:01:40 +0100 (Wed, 17 Mar 2010) | 5 lines Try to import CC and CFLAGS from gmp.h (copied from MPFR 2.4.2) Detect SSE2 support by test compilation Use CCASFLAGS with CCAS Stricter quoting ------------------------------------------------------------------------ r1455 | kruppa | 2010-03-17 18:58:36 +0100 (Wed, 17 Mar 2010) | 3 lines Commented out .asm -> .S rule to avoid problems on case-insensitive filesystems ------------------------------------------------------------------------ r1454 | kruppa | 2010-03-16 17:47:49 +0100 (Tue, 16 Mar 2010) | 2 lines Truncating pointer conversion bug fixed, pointed out by David Cleaver ------------------------------------------------------------------------ r1453 | kruppa | 2010-03-16 17:34:39 +0100 (Tue, 16 Mar 2010) | 5 lines Change use of CFLAGS, CCASFLAGS and LDFLAGS when checking for underscores to match that in the resulting Makefile. Fixes error when -m64 was added to CCASFLAGS. ------------------------------------------------------------------------ r1452 | kruppa | 2010-03-10 22:15:44 +0100 (Wed, 10 Mar 2010) | 8 lines Use autoconf's test to determine how to enable OpenMP in the compiler. Enable OpenMP only for those targets that need it to avoid spurious library dependencies. Link GSL only to rho in test-drive mode to avoid spurious library dependency. Compile GSL-dependent code in rho.c only in test-drive mode. More consistent quoting in configure.in ------------------------------------------------------------------------ r1451 | kruppa | 2010-03-10 13:59:21 +0100 (Wed, 10 Mar 2010) | 3 lines Keep linking to GMP library while checking for GMP functions such as __gmpn_add_nc and __gmpn_mod_34lsub1 ------------------------------------------------------------------------ r1450 | zimmerma | 2010-03-09 11:35:21 +0100 (Tue, 09 Mar 2010) | 3 lines [ecm.c] clean up the code to print expected number of curves and time also changed to print from 35 to 80 digits instead of 20 to 65 ------------------------------------------------------------------------ r1449 | kruppa | 2010-03-07 21:53:40 +0100 (Sun, 07 Mar 2010) | 4 lines powerpc64 needs -m64 flag for gcc to produce 64 bit build. Without the flag, it produces a 32 bit build seemingly successfully, but the 64-bit mulredc asm code produces incorrect arithmetic. ------------------------------------------------------------------------ r1447 | kruppa | 2010-03-07 17:25:37 +0100 (Sun, 07 Mar 2010) | 8 lines Changed rules for building manpage ecm.1. Previously the man page would be built if ecm.1 was missing or was older than ecm.xml, even if xsltproc or docbook.xsl were not found by ./configure, causing "make" to exit with error. Furthermore, listing ecm.1 in two output variables would cause "make install" to try to install the same man page twice, leading to a warning. ------------------------------------------------------------------------ r1446 | kruppa | 2010-03-07 16:18:06 +0100 (Sun, 07 Mar 2010) | 2 lines File locking item removed, it's done ------------------------------------------------------------------------ r1445 | kruppa | 2010-03-07 02:23:44 +0100 (Sun, 07 Mar 2010) | 2 lines Changed paths: M /trunk/README Point out explicitly that -save saves after stage 2 ------------------------------------------------------------------------ r1444 | kruppa | 2010-03-07 00:47:56 +0100 (Sun, 07 Mar 2010) | 2 lines Changed paths: M /trunk/resume.c Process LF, CR, and CR/LF as newline when reading save files ------------------------------------------------------------------------ r1443 | kruppa | 2010-03-05 15:53:07 +0100 (Fri, 05 Mar 2010) | 2 lines Changed paths: M /trunk/INSTALL Refer to latest GMP version 5.0.1 ------------------------------------------------------------------------ r1442 | kruppa | 2010-03-03 17:18:47 +0100 (Wed, 03 Mar 2010) | 8 lines Changed paths: M /trunk/Makefile.am M /trunk/configure.in On Darwin x86_64 systems, see if we need to pass -m64 to gcc to get 64 bit code. Don't add the GMP library to LIBS, but let Makefile.am add it to LDADD instead, to avoid GMP getting copied into the GMP-ECM libraries which is non-portable and seems to break linking on Darwin systems. ------------------------------------------------------------------------ r1441 | kruppa | 2010-03-03 17:13:40 +0100 (Wed, 03 Mar 2010) | 5 lines Changed paths: M /trunk/ecm-impl.h M /trunk/mpmod.c M /trunk/rho.c M /trunk/schoen_strass.c Fixed operator precedence bug in schoen_strass.c Compile Buchstab_omega() in rho.c only with GSL to avoid warning Added mpres_equal() function ------------------------------------------------------------------------ r1440 | kruppa | 2010-03-03 15:48:45 +0100 (Wed, 03 Mar 2010) | 2 lines Changed paths: M /trunk/pm1fs2.c Hide omp critical pragma if OMP isn't used to avoid compiler warning ------------------------------------------------------------------------ r1439 | kruppa | 2010-02-15 18:34:20 +0100 (Mon, 15 Feb 2010) | 5 lines Changed paths: M /trunk/configure.in M /trunk/ecm-ecm.h M /trunk/main.c M /trunk/resume.c Removed some dead code from checkpoint writing. Changed writing save file lines always to append to the file, with file locking if fcntl() is available. ------------------------------------------------------------------------ r1438 | kruppa | 2010-02-10 15:17:53 +0100 (Wed, 10 Feb 2010) | 3 lines Changed paths: M /trunk/phiP.gp Slight cleanups ------------------------------------------------------------------------ r1437 | kruppa | 2010-02-10 14:41:32 +0100 (Wed, 10 Feb 2010) | 3 lines Changed paths: M /trunk/pm1fs2.c Previous commint included P values where code could not factor phi(P). Fixed ------------------------------------------------------------------------ r1436 | kruppa | 2010-02-10 13:41:29 +0100 (Wed, 10 Feb 2010) | 3 lines Changed paths: M /trunk/pm1fs2.c More P values to allow larger B2 in a single run ------------------------------------------------------------------------ r1435 | kruppa | 2010-02-10 01:49:03 +0100 (Wed, 10 Feb 2010) | 3 lines Changed paths: M /trunk/pm1fs2.c More parallelization while building f(x) ------------------------------------------------------------------------ r1434 | kruppa | 2010-02-01 17:39:17 +0100 (Mon, 01 Feb 2010) | 2 lines Changed paths: M /trunk/pm1fs2.c Cleanup in list_scale_V(), slightly more parallelization ------------------------------------------------------------------------ r1433 | kruppa | 2010-02-01 14:48:52 +0100 (Mon, 01 Feb 2010) | 3 lines Changed paths: M /trunk/bestd.c M /trunk/ecm-impl.h M /trunk/ks-multiply.c M /trunk/mpmod.c M /trunk/pm1fs2.c M /trunk/schoen_strass.c M /trunk/sp.h M /trunk/stage2.c Replaced __GMP_BITS_PER_MP_LIMB and most mp_bits_per_limb by GMP_NUMB_BITS ------------------------------------------------------------------------ r1432 | zimmerma | 2010-01-30 22:21:43 +0100 (Sat, 30 Jan 2010) | 2 lines Changed paths: M /trunk/ChangeLog M /trunk/INSTALL M /trunk/Makefile.am M /trunk/NEWS M /trunk/build.vc9/config.h M /trunk/ecm.h preparation for the release of ecm-6.3 ------------------------------------------------------------------------ r1431 | kruppa | 2010-01-23 20:55:47 +0100 (Sat, 23 Jan 2010) | 2 lines Changed paths: M /trunk/pm1fs2.c Make "one-pass" P+1 stage 2 use parallel transforms ------------------------------------------------------------------------ r1430 | kruppa | 2010-01-22 23:36:34 +0100 (Fri, 22 Jan 2010) | 5 lines Changed paths: M /trunk/mpmod.c Changed __GMP_BITS_PER_MP_LIMB to GMP_NUMB_BITS Bugfix in expensive assert check for mulredc Cleanup in powering functions ------------------------------------------------------------------------ r1429 | kruppa | 2010-01-22 21:42:31 +0100 (Fri, 22 Jan 2010) | 3 lines Changed paths: M /trunk/configure.in Enable asm mulredc by default only on x86_64 and 64 bit PowerPC Check for GSL ------------------------------------------------------------------------ r1428 | kruppa | 2010-01-22 21:23:19 +0100 (Fri, 22 Jan 2010) | 3 lines Changed paths: M /trunk/bench_mulredc.c Slightly more readable output ------------------------------------------------------------------------ r1427 | kruppa | 2010-01-20 14:20:32 +0100 (Wed, 20 Jan 2010) | 3 lines Changed paths: M /trunk/rho.gp Make results agree better with those from rho.c Added functions for P+1 prob, and for small B2 ------------------------------------------------------------------------ r1426 | zimmerma | 2010-01-08 19:03:00 +0100 (Fri, 08 Jan 2010) | 2 lines Changed paths: M /trunk/sp.h [sp.h] define __GMP_BITS_PER_MP_LIMB from GMP_LIMB_BITS when undefined ------------------------------------------------------------------------ r1425 | kruppa | 2009-11-06 03:31:28 +0100 (Fri, 06 Nov 2009) | 5 lines Changed paths: M /trunk/rho.c Functions for counting and estimating smooth and rough numbers. For small B2, estimate stage 2 probability with sum instead of integral. ------------------------------------------------------------------------ r1424 | kruppa | 2009-11-01 15:53:30 +0100 (Sun, 01 Nov 2009) | 2 lines Changed paths: M /trunk/Makefile.am Add target "rho" to test code in rho.c ------------------------------------------------------------------------ r1423 | kruppa | 2009-11-01 15:39:42 +0100 (Sun, 01 Nov 2009) | 3 lines Changed paths: M /trunk/rho.c Functions for computing \Phi(x,y) and \Psi(x,y) exactly, and an estimate for Phi(x,y) (number of y-rough numbers) ------------------------------------------------------------------------ r1422 | zimmerma | 2009-08-31 10:23:26 +0200 (Mon, 31 Aug 2009) | 3 lines Changed paths: M /trunk/tune.c [tune.c] replace obsolete mpz_random by mpz_urandomb (suggested by Jason Moxham) ------------------------------------------------------------------------ r1421 | brian_gladman | 2009-08-11 17:35:55 +0200 (Tue, 11 Aug 2009) | 1 line Changed paths: A /trunk/build.vc9/mp_lib.vsprops ------------------------------------------------------------------------ r1420 | brian_gladman | 2009-08-11 17:35:17 +0200 (Tue, 11 Aug 2009) | 1 line Changed paths: M /trunk/build.vc9/ecm/ecm.vcproj M /trunk/build.vc9/libecm/libecm.vcproj M /trunk/build.vc9/readme.txt M /trunk/build.vc9/tests.py M /trunk/build.vc9/tune/tune.vcproj Update Windows VC++ build to use standard output directories and to use MPIR by default. ------------------------------------------------------------------------ r1419 | kruppa | 2009-05-28 21:48:48 +0200 (Thu, 28 May 2009) | 3 lines Changed paths: M /trunk/main.c Allow -printconfig as only parameter, exit after printing config. Print tuning parameters, too. ------------------------------------------------------------------------ r1418 | zimmerma | 2009-05-18 13:59:13 +0200 (Mon, 18 May 2009) | 2 lines Changed paths: M /trunk/mpzspv.c [mpzspv.c] get rid of malloc_usable_size (and thus malloc.h) ------------------------------------------------------------------------ r1417 | zimmerma | 2009-05-18 13:19:15 +0200 (Mon, 18 May 2009) | 2 lines Changed paths: M /trunk/ecm-ecm.h M /trunk/ecm-impl.h M /trunk/ks-multiply.c M /trunk/test.pm1 [ks-multiply.c] check allocation failure in kronecker_schonhage ------------------------------------------------------------------------ r1416 | zimmerma | 2009-05-18 13:08:21 +0200 (Mon, 18 May 2009) | 2 lines Changed paths: M /trunk/mpzspm.c [mpzspm.c] fixed typo ------------------------------------------------------------------------ r1415 | kruppa | 2009-05-17 23:20:05 +0200 (Sun, 17 May 2009) | 3 lines Changed paths: M /trunk/mpzspm.c M /trunk/pm1fs2.c M /trunk/sp.c M /trunk/spm.c M /trunk/stage2.c Added some error handling for out-of-memory conditions to NTT code ------------------------------------------------------------------------ r1414 | zimmerma | 2009-05-12 16:07:12 +0200 (Tue, 12 May 2009) | 2 lines Changed paths: M /trunk/build.vc9/assembler/test_mulredc.c M /trunk/ecm_ntt.c M /trunk/getprime.c M /trunk/main.c M /trunk/mpzspm.c M /trunk/mpzspv.c M /trunk/pm1.c M /trunk/pm1fs2.c M /trunk/polyeval.c M /trunk/rho.c M /trunk/schoen_strass.c M /trunk/sets_long.c M /trunk/sp.c M /trunk/spm.c M /trunk/stage2.c M /trunk/test_mulredc.c M /trunk/tune.c check return value of malloc in several places (bug reported by Torbjörn Granlund) ------------------------------------------------------------------------ r1413 | zimmerma | 2009-04-25 23:34:58 +0200 (Sat, 25 Apr 2009) | 2 lines Changed paths: M /trunk/NEWS [NEWS] updated with changes between ecm-6.2.2 and ecm-6.2.3 ------------------------------------------------------------------------ r1412 | kruppa | 2009-04-21 17:13:49 +0200 (Tue, 21 Apr 2009) | 2 lines Changed paths: M /trunk/mul_fft-params.h.default Missing endline at end of file ------------------------------------------------------------------------ r1411 | kruppa | 2009-04-20 00:02:05 +0200 (Mon, 20 Apr 2009) | 2 lines Changed paths: M /trunk/build.vc9/assembler/a_x64_mulredc.asm Ported recent improvements ------------------------------------------------------------------------ r1410 | kruppa | 2009-04-18 22:47:23 +0200 (Sat, 18 Apr 2009) | 4 lines Changed paths: M /trunk/configure.in Cleanup of tests for asm redc code, test list of cpu types only once Print configuration at end of configure Various cleanups (or messups, as the case may be) ------------------------------------------------------------------------ r1409 | zimmerma | 2009-04-18 19:01:08 +0200 (Sat, 18 Apr 2009) | 2 lines Changed paths: M /trunk/TODO [TODO] added item ------------------------------------------------------------------------ r1408 | kruppa | 2009-04-18 17:21:14 +0200 (Sat, 18 Apr 2009) | 3 lines Changed paths: M /trunk/main.c Fixed old bug: if last line did not end in newline, only one curve would be run on that number in spite of -c parameter ------------------------------------------------------------------------ r1407 | kruppa | 2009-04-18 16:03:45 +0200 (Sat, 18 Apr 2009) | 3 lines Changed paths: M /trunk/main.c Added -printconfig option which prints configuration optinons use for building GMP-ECM ------------------------------------------------------------------------ r1406 | zimmerma | 2009-04-18 15:40:44 +0200 (Sat, 18 Apr 2009) | 2 lines Changed paths: M /trunk/main.c [main.c] print --enable-asm-redc in header line if used ------------------------------------------------------------------------ r1405 | zimmerma | 2009-04-18 15:07:04 +0200 (Sat, 18 Apr 2009) | 2 lines Changed paths: M /trunk/README.dev [README.dev] added item ------------------------------------------------------------------------ r1404 | zimmerma | 2009-04-18 15:03:09 +0200 (Sat, 18 Apr 2009) | 3 lines Changed paths: M /trunk/configure.in [configure.in] applied patch from Peter Jeremy (http://gforge.inria.fr/tracker/index.php?func=detail&aid=7639&group_id=135&atid=623) ------------------------------------------------------------------------ r1403 | kruppa | 2009-04-16 17:51:46 +0200 (Thu, 16 Apr 2009) | 3 lines Changed paths: M /trunk/INSTALL Added warning about incompatible GMP header/library Updated GMP version to 4.3.0 ------------------------------------------------------------------------ r1402 | kruppa | 2009-04-16 17:21:21 +0200 (Thu, 16 Apr 2009) | 2 lines Changed paths: M /trunk/x86_64/mulredc.m4 M /trunk/x86_64/mulredc10.asm M /trunk/x86_64/mulredc11.asm M /trunk/x86_64/mulredc12.asm M /trunk/x86_64/mulredc13.asm M /trunk/x86_64/mulredc14.asm M /trunk/x86_64/mulredc15.asm M /trunk/x86_64/mulredc16.asm M /trunk/x86_64/mulredc17.asm M /trunk/x86_64/mulredc18.asm M /trunk/x86_64/mulredc19.asm M /trunk/x86_64/mulredc2.asm M /trunk/x86_64/mulredc20.asm M /trunk/x86_64/mulredc3.asm M /trunk/x86_64/mulredc4.asm M /trunk/x86_64/mulredc5.asm M /trunk/x86_64/mulredc6.asm M /trunk/x86_64/mulredc7.asm M /trunk/x86_64/mulredc8.asm M /trunk/x86_64/mulredc9.asm Moving memory load ahead was missing for last unroll step ------------------------------------------------------------------------ r1401 | kruppa | 2009-04-16 16:39:05 +0200 (Thu, 16 Apr 2009) | 2 lines Changed paths: M /trunk/README Replaced long obsolete files list by basic usage examples ------------------------------------------------------------------------ r1400 | kruppa | 2009-04-16 16:32:19 +0200 (Thu, 16 Apr 2009) | 3 lines Changed paths: M /trunk/configure.in GMP 4.3.0 and newer always have three parts in version string (including patchlevel, even if it's zero). This broke a test in configure ------------------------------------------------------------------------ r1399 | kruppa | 2009-04-10 22:50:33 +0200 (Fri, 10 Apr 2009) | 4 lines Changed paths: M /trunk/x86_64/mulredc.m4 M /trunk/x86_64/mulredc10.asm M /trunk/x86_64/mulredc11.asm M /trunk/x86_64/mulredc12.asm M /trunk/x86_64/mulredc13.asm M /trunk/x86_64/mulredc14.asm M /trunk/x86_64/mulredc15.asm M /trunk/x86_64/mulredc16.asm M /trunk/x86_64/mulredc17.asm M /trunk/x86_64/mulredc18.asm M /trunk/x86_64/mulredc19.asm M /trunk/x86_64/mulredc20.asm M /trunk/x86_64/mulredc3.asm M /trunk/x86_64/mulredc4.asm M /trunk/x86_64/mulredc5.asm M /trunk/x86_64/mulredc6.asm M /trunk/x86_64/mulredc7.asm M /trunk/x86_64/mulredc8.asm M /trunk/x86_64/mulredc9.asm Moved memory load ahead one instruction. Slight speedup on Opteron/Phenom (~1% for 20 words), noticable speedup on Core 2 (~8% for 20 words) ------------------------------------------------------------------------ r1398 | zimmerma | 2009-04-01 18:25:52 +0200 (Wed, 01 Apr 2009) | 2 lines Changed paths: M /trunk/hecm/Makefile [hecm/Makefile] link against the libecm.a from .., not /usr/lib/libecm.a! ------------------------------------------------------------------------ r1397 | zimmerma | 2009-04-01 17:00:18 +0200 (Wed, 01 Apr 2009) | 2 lines Changed paths: M /trunk/README.dev [README.dev] check config.guess is recent enough ------------------------------------------------------------------------ r1396 | zimmerma | 2009-04-01 16:39:47 +0200 (Wed, 01 Apr 2009) | 2 lines Changed paths: D /trunk/configfsf.guess D /trunk/configfsf.sub [configfsf.guess,configfsf.sub] removed unused files ------------------------------------------------------------------------ r1395 | zimmerma | 2009-04-01 14:18:03 +0200 (Wed, 01 Apr 2009) | 2 lines Changed paths: M /trunk/README.dev A /trunk/patch-config.guess.diff [patch-config.guess.diff] patch to fix config.guess on Mac OS X / PowerPC ------------------------------------------------------------------------ r1394 | kruppa | 2009-03-30 15:50:08 +0200 (Mon, 30 Mar 2009) | 4 lines Changed paths: M /trunk/ecm-impl.h M /trunk/ecm2.c M /trunk/stage2.c Code to find factor of group order of elliptic curve in stage 2 could segfault if factor (of input number) was re-discovered during initialisation of arithmetic progressions on the curve ------------------------------------------------------------------------ r1393 | kruppa | 2009-03-30 15:44:52 +0200 (Mon, 30 Mar 2009) | 2 lines Changed paths: M /trunk/ecm.c Bugfix: rhotable did not get freed if a factor was found in stage 1 ------------------------------------------------------------------------ r1392 | brian_gladman | 2009-03-29 18:16:04 +0200 (Sun, 29 Mar 2009) | 1 line Changed paths: M /trunk/build.vc9/ecm.sln M /trunk/build.vc9/tune/tune.vcproj correct error in Windows tune builld ------------------------------------------------------------------------ r1391 | brian_gladman | 2009-03-29 13:44:30 +0200 (Sun, 29 Mar 2009) | 1 line Changed paths: D /trunk/build.vc9/ecm-params.win32.amd.h D /trunk/build.vc9/ecm-params.win32.intel.h D /trunk/build.vc9/ecm-params.x64.amd.h D /trunk/build.vc9/ecm-params.x64.intel.h ------------------------------------------------------------------------ r1390 | brian_gladman | 2009-03-29 13:43:13 +0200 (Sun, 29 Mar 2009) | 1 line Changed paths: A /trunk/build.vc9/ecm-params.h.win32.amd A /trunk/build.vc9/ecm-params.h.win32.intel A /trunk/build.vc9/ecm-params.h.x64.amd A /trunk/build.vc9/ecm-params.h.x64.intel M /trunk/build.vc9/libecm/libecm.vcproj A /trunk/build.vc9/mul_fft-params.h.win32.amd A /trunk/build.vc9/mul_fft-params.h.win32.intel A /trunk/build.vc9/mul_fft-params.h.x64.amd A /trunk/build.vc9/mul_fft-params.h.x64.intel M /trunk/build.vc9/readme.txt M /trunk/build.vc9/tune/tune.vcproj Further update for Windows build ------------------------------------------------------------------------ r1389 | brian_gladman | 2009-03-29 00:05:29 +0100 (Sun, 29 Mar 2009) | 1 line Changed paths: M /trunk/build.vc9/tests.py correct test.py for new directory structure ------------------------------------------------------------------------ r1388 | brian_gladman | 2009-03-28 23:59:27 +0100 (Sat, 28 Mar 2009) | 2 lines Changed paths: M /trunk/build.vc9/assembler/a_x64_mulredc.asm A /trunk/build.vc9/assembler/mulredc.h M /trunk/build.vc9/config.h M /trunk/build.vc9/ecm/ecm.vcproj M /trunk/build.vc9/ecm-params.h A /trunk/build.vc9/ecm-params.win32.amd.h A /trunk/build.vc9/ecm-params.win32.intel.h A /trunk/build.vc9/ecm-params.x64.amd.h A /trunk/build.vc9/ecm-params.x64.intel.h M /trunk/build.vc9/ecm.sln M /trunk/build.vc9/libecm/libecm.vcproj M /trunk/build.vc9/readme.txt M /trunk/build.vc9/tests.py A /trunk/build.vc9/tune A /trunk/build.vc9/tune/tune.vcproj 1. Add tune to the windows GMP-ECM build 2. Add AMD and Intel build configurations (using tune output). ------------------------------------------------------------------------ r1387 | kruppa | 2009-03-28 22:53:42 +0100 (Sat, 28 Mar 2009) | 2 lines Changed paths: M /trunk/INSTALL Updated note, since --enable-asm-redc is default now ------------------------------------------------------------------------ r1386 | brian_gladman | 2009-03-28 17:48:45 +0100 (Sat, 28 Mar 2009) | 1 line Changed paths: M /trunk/build.vc9/libecm/libecm.vcproj M /trunk/build.vc9/tests.py M /trunk/ntt_gfp.c M /trunk/spv.c Convert SSE2 inline assembler for 32-bit Windows build ------------------------------------------------------------------------ r1385 | zimmerma | 2009-03-28 12:59:04 +0100 (Sat, 28 Mar 2009) | 2 lines Changed paths: M /trunk/README.dev [README.dev] more about INSTALL up-to-date ------------------------------------------------------------------------ r1384 | zimmerma | 2009-03-28 12:56:30 +0100 (Sat, 28 Mar 2009) | 2 lines Changed paths: M /trunk/INSTALL [INSTALL] latest release of GMP is 4.2.4 ------------------------------------------------------------------------ r1383 | kruppa | 2009-03-28 12:16:00 +0100 (Sat, 28 Mar 2009) | 2 lines Changed paths: M /trunk/INSTALL Added mention of --enable-asm-redc and --enable-sse2 ------------------------------------------------------------------------ r1382 | kruppa | 2009-03-28 11:56:24 +0100 (Sat, 28 Mar 2009) | 3 lines Changed paths: M /trunk/configure.in Enable SSE2 if config.guess identifies the system as "i786" which seems to be what recent autotools call a Pentium 4 (rather than pentium4 as before) ------------------------------------------------------------------------ r1381 | rcosset | 2009-03-27 10:24:54 +0100 (Fri, 27 Mar 2009) | 1 line Changed paths: M /trunk/hecm/auxi.h M /trunk/hecm/hecm.c M /trunk/hecm/morphismes.c Correction of typos in the comments of hecm ------------------------------------------------------------------------ r1380 | zimmerma | 2009-03-26 15:17:15 +0100 (Thu, 26 Mar 2009) | 4 lines Changed paths: M /trunk/test.ecm M /trunk/test.pm1 M /trunk/test.pp1 [test.pm1] added new test, updated copyright years, added comments for return code [test.pp1,test.ecm] updated copyright years ------------------------------------------------------------------------ r1379 | kruppa | 2009-03-25 13:42:39 +0100 (Wed, 25 Mar 2009) | 3 lines Changed paths: M /trunk/Makefile.am M /trunk/configure.in Removed the horrible FASTOBJ kludge. There must be a better way to specify the order in which object files are to appear on the link command. ------------------------------------------------------------------------ r1378 | kruppa | 2009-03-24 17:00:44 +0100 (Tue, 24 Mar 2009) | 3 lines Changed paths: M /trunk/mpmod.c Bugfix: in ecm_redc_n(), a carry was not propagated correctly if xp[n - 1] was zero but tp[n - 1] was non-zero. ------------------------------------------------------------------------ r1377 | rcosset | 2009-03-24 11:51:01 +0100 (Tue, 24 Mar 2009) | 1 line Changed paths: M /trunk/hecm/hecm.c Print the imput number in hecm. ------------------------------------------------------------------------ r1376 | rcosset | 2009-03-24 11:40:39 +0100 (Tue, 24 Mar 2009) | 1 line Changed paths: A /trunk/hecm A /trunk/hecm/Jacobi.c A /trunk/hecm/Jacobi.h A /trunk/hecm/Makefile A /trunk/hecm/ariKS.c A /trunk/hecm/ariKS.h A /trunk/hecm/auxi.c A /trunk/hecm/auxi.h A /trunk/hecm/generation.c A /trunk/hecm/generation.h A /trunk/hecm/hecm.c A /trunk/hecm/hecm.h A /trunk/hecm/morphismes.c A /trunk/hecm/morphismes.h A /trunk/hecm/stage1HECM.c A /trunk/hecm/stage2HECM.c Added a new software gmp-hecm based on gmp-ecm. HECM used decomposable hyperelliptic curves of genus 2 instead of elliptic curves. Thus it does two run of ECM in parallel. The used of Kummer surfaces with small parameters make it quicker for numbers >= 10^300. ------------------------------------------------------------------------ r1375 | kruppa | 2009-03-23 15:23:23 +0100 (Mon, 23 Mar 2009) | 2 lines Changed paths: M /trunk/NEWS M /trunk/configure.in Updated NEWS with 6.2.2 release, bumped version to 6.3 ------------------------------------------------------------------------ r1373 | kruppa | 2009-03-23 00:00:25 +0100 (Mon, 23 Mar 2009) | 3 lines Changed paths: M /trunk/ntt_gfp.c M /trunk/spv.c MacOS assembler doesn't like binary constants in asm code, replaced by hex. Patch supplied by "jedirock" on mersenneforum.org ------------------------------------------------------------------------ r1372 | rcosset | 2009-03-19 14:01:26 +0100 (Thu, 19 Mar 2009) | 1 line Changed paths: M /trunk/mpmod.c Added a few commentary in mpmod.c ------------------------------------------------------------------------ r1371 | zimmerma | 2009-03-18 17:42:46 +0100 (Wed, 18 Mar 2009) | 2 lines Changed paths: M /trunk/mpmod.c [mpmod.c] added some FIXME's ------------------------------------------------------------------------ r1370 | kruppa | 2009-03-18 01:13:21 +0100 (Wed, 18 Mar 2009) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/mpmod.c Merged in Romain's modifications ------------------------------------------------------------------------ r1369 | kruppa | 2009-03-18 00:20:59 +0100 (Wed, 18 Mar 2009) | 6 lines Changed paths: M /trunk/x86_64/Makefile.am M /trunk/x86_64/mulredc.h M /trunk/x86_64/mulredc.m4 D /trunk/x86_64/mulredc1.h M /trunk/x86_64/mulredc10.asm M /trunk/x86_64/mulredc11.asm M /trunk/x86_64/mulredc12.asm M /trunk/x86_64/mulredc13.asm M /trunk/x86_64/mulredc14.asm M /trunk/x86_64/mulredc15.asm M /trunk/x86_64/mulredc16.asm M /trunk/x86_64/mulredc17.asm M /trunk/x86_64/mulredc18.asm M /trunk/x86_64/mulredc19.asm M /trunk/x86_64/mulredc2.asm M /trunk/x86_64/mulredc20.asm M /trunk/x86_64/mulredc3.asm M /trunk/x86_64/mulredc4.asm M /trunk/x86_64/mulredc5.asm M /trunk/x86_64/mulredc6.asm M /trunk/x86_64/mulredc7.asm M /trunk/x86_64/mulredc8.asm M /trunk/x86_64/mulredc9.asm Removed mulredc1.h, prototypes are in mulredc.h now Removed superfluous #include in mulredc.h Some small changes in mulredc.m4: replace some movq by movl, change for-loop so it works with LENGTH=2 ------------------------------------------------------------------------ r1368 | kruppa | 2009-03-17 23:34:51 +0100 (Tue, 17 Mar 2009) | 3 lines Changed paths: M /trunk/Makefile.am M /trunk/athlon/Makefile.am M /trunk/pentium4/Makefile.am M /trunk/powerpc64/Makefile.am M /trunk/x86_64/Makefile.am The include path to mulredc.h must use $(srcdir) or a build with separate source/build directories (as used by make distcheck) fails ------------------------------------------------------------------------ r1367 | kruppa | 2009-03-17 22:16:56 +0100 (Tue, 17 Mar 2009) | 5 lines Changed paths: M /trunk/configure.in New config.guess don't call a Pentium 4 "pentium4" any more, but "i786", the corresponding case was missing in the first test for asm eligibility. And autoconf changes the m4 quote characters from ` and ' to [ and ], so they need to be quoted for shell character-set matching. I hate autotools ------------------------------------------------------------------------ r1366 | kruppa | 2009-03-17 20:54:47 +0100 (Tue, 17 Mar 2009) | 6 lines Changed paths: M /trunk/Makefile.am D /trunk/asmredc.h A /trunk/bench_mulredc.c M /trunk/configure.in M /trunk/mpmod.c A /trunk/test_mulredc.c Build mulredc code in subdirectories if enabled, and link to it from the top directory Moved bench_mulredc.c and test_mulredc.c to top directory rather than having identical copies in each subdir Did I mention that I hate autotools? ------------------------------------------------------------------------ r1365 | kruppa | 2009-03-17 20:45:30 +0100 (Tue, 17 Mar 2009) | 4 lines Changed paths: D /trunk/config.guess D /trunk/config.sub Removed config.guess and config.sub, they aren't sources and developers with different versions of autotools installed will overwrite each others' config.guess/config.sub all the time ------------------------------------------------------------------------ r1364 | kruppa | 2009-03-17 20:41:07 +0100 (Tue, 17 Mar 2009) | 4 lines Changed paths: M /trunk/athlon/Makefile.am M /trunk/athlon/mulredc.h M /trunk/pentium4/Makefile.am M /trunk/powerpc64/Makefile.am D /trunk/powerpc64/bench.c A /trunk/powerpc64/mulredc.h D /trunk/powerpc64/test_mulredc.c Build mulredc as library in powerpc64 Removed extaneous prototypes, EXTRA_DIST entries ------------------------------------------------------------------------ r1363 | kruppa | 2009-03-17 20:34:31 +0100 (Tue, 17 Mar 2009) | 3 lines Changed paths: M /trunk/pentium4/Makefile.am D /trunk/pentium4/bench.c A /trunk/pentium4/mulredc.h D /trunk/pentium4/test_mulredc.c Build mulredc as library ------------------------------------------------------------------------ r1362 | kruppa | 2009-03-17 20:28:22 +0100 (Tue, 17 Mar 2009) | 2 lines Changed paths: M /trunk/athlon/Makefile.am D /trunk/athlon/bench.c A /trunk/athlon/mulredc.h D /trunk/athlon/test_mulredc.c Build mulredc as library ------------------------------------------------------------------------ r1361 | kruppa | 2009-03-17 20:08:17 +0100 (Tue, 17 Mar 2009) | 2 lines Changed paths: M /trunk/x86_64/Makefile.am Forgot to add mulredc.h to distribution ------------------------------------------------------------------------ r1360 | kruppa | 2009-03-17 19:55:46 +0100 (Tue, 17 Mar 2009) | 3 lines Changed paths: M /trunk/x86_64/Makefile.am D /trunk/x86_64/bench.c A /trunk/x86_64/mulredc.h D /trunk/x86_64/test_mulredc.c Build mulredc as library, so GMP-ECM can link it (with ld rather than ln) Removed bench and test_mulredc, are being moved to parent dir ------------------------------------------------------------------------ r1359 | kruppa | 2009-03-17 15:16:40 +0100 (Tue, 17 Mar 2009) | 2 lines Changed paths: D /trunk/x86_64/Makefile.dev The targets from Makefile.dev are now in Makefile.am ------------------------------------------------------------------------ r1358 | zimmerma | 2009-03-17 11:00:52 +0100 (Tue, 17 Mar 2009) | 6 lines Changed paths: A /trunk/m4 added empty directory m4, since autoreconf seems to require it: patate% autoreconf -i aclocal: couldn't open directory `m4': No such file or directory autoreconf: aclocal failed with exit status: 1 ------------------------------------------------------------------------ r1357 | kruppa | 2009-03-17 00:00:55 +0100 (Tue, 17 Mar 2009) | 3 lines Changed paths: M /trunk/x86_64/redc.asm Use RIP-relative addressing instead of horrible call/pop for computed jump. Fixes a compilation error on MacOS ------------------------------------------------------------------------ r1356 | kruppa | 2009-03-15 22:58:34 +0100 (Sun, 15 Mar 2009) | 2 lines Changed paths: M /trunk/configure.in Added i686-apple-darwin* to list of hosts that understand asm code ------------------------------------------------------------------------ r1355 | kruppa | 2009-03-12 16:02:33 +0100 (Thu, 12 Mar 2009) | 5 lines Changed paths: M /trunk/tune.c Fixed generation of NTT_GFP_TWIDDLE_DI[FT]_BREAKOVER values Avoid calling cputime() excessively often when timing short functions Fixed access to uninitialised memory ------------------------------------------------------------------------ r1354 | kruppa | 2009-03-12 15:53:28 +0100 (Thu, 12 Mar 2009) | 2 lines Changed paths: M /trunk/ecm-params.h.powerpc970 NTT_GFP_TWIDDLE_DI[FT]_BREAKOVER was not in log_2() form ------------------------------------------------------------------------ r1353 | kruppa | 2009-03-08 20:49:41 +0100 (Sun, 08 Mar 2009) | 3 lines Changed paths: M /trunk/Makefile.am M /trunk/configure.in M /trunk/ecm-params.h.athlon64 M /trunk/ecm-params.h.pentium3 M /trunk/ecm-params.h.pentium4 A /trunk/mul_fft-params.h.athlon64 A /trunk/mul_fft-params.h.default A /trunk/mul_fft-params.h.pentium3 A /trunk/mul_fft-params.h.pentium4 M /trunk/mul_fft.c Moved parameters for Schönhage-Strassen into separate file so that tune output does not overwrite them ------------------------------------------------------------------------ r1352 | zimmerma | 2009-03-04 14:45:34 +0100 (Wed, 04 Mar 2009) | 2 lines Changed paths: M /trunk/ecmfactor2.c [ecmfactor2.c] added wrapper function, and fixed example which did not work ------------------------------------------------------------------------ r1351 | zimmerma | 2009-03-03 15:32:13 +0100 (Tue, 03 Mar 2009) | 2 lines Changed paths: M /trunk/main.c [main.c] update champions sizes ------------------------------------------------------------------------ r1350 | zimmerma | 2009-02-27 17:34:43 +0100 (Fri, 27 Feb 2009) | 2 lines Changed paths: M /trunk/NEWS M /trunk/configure.in M /trunk/powerpc64/powerpc-defs.m4 the powerpc64 assembly code from Philip McLaughlin now works on Linux too ------------------------------------------------------------------------ r1349 | zimmerma | 2009-02-27 16:56:18 +0100 (Fri, 27 Feb 2009) | 2 lines Changed paths: M /trunk/acinclude.m4 [acinclude.m4] too many quotes ------------------------------------------------------------------------ r1348 | zimmerma | 2009-02-27 16:45:36 +0100 (Fri, 27 Feb 2009) | 3 lines Changed paths: M /trunk/acinclude.m4 M /trunk/configure.in M /trunk/powerpc64/Makefile.am M /trunk/powerpc64/Makefile.dev M /trunk/powerpc64/mulredc.m4 M /trunk/powerpc64/mulredc1.asm M /trunk/powerpc64/mulredc10.asm M /trunk/powerpc64/mulredc11.asm M /trunk/powerpc64/mulredc12.asm M /trunk/powerpc64/mulredc13.asm M /trunk/powerpc64/mulredc14.asm M /trunk/powerpc64/mulredc15.asm M /trunk/powerpc64/mulredc16.asm M /trunk/powerpc64/mulredc17.asm M /trunk/powerpc64/mulredc18.asm M /trunk/powerpc64/mulredc19.asm M /trunk/powerpc64/mulredc2.asm M /trunk/powerpc64/mulredc20.asm M /trunk/powerpc64/mulredc3.asm M /trunk/powerpc64/mulredc4.asm M /trunk/powerpc64/mulredc5.asm M /trunk/powerpc64/mulredc6.asm M /trunk/powerpc64/mulredc7.asm M /trunk/powerpc64/mulredc8.asm M /trunk/powerpc64/mulredc9.asm M /trunk/powerpc64/mulredc_1_2.m4 A /trunk/powerpc64/powerpc-defs.m4 M /trunk/powerpc64/redc.asm adapt PowerPC assembly files so that they can be used under Linux too (does not yet work) ------------------------------------------------------------------------ r1347 | kruppa | 2009-02-25 14:51:06 +0100 (Wed, 25 Feb 2009) | 2 lines Changed paths: M /trunk/pp1.c Replaced some mpres_mul_ui() by 2 with mpres_add() ------------------------------------------------------------------------ r1346 | kruppa | 2009-02-24 23:18:20 +0100 (Tue, 24 Feb 2009) | 2 lines Changed paths: M /trunk/x86_64/bench.c Fixed stupid error message about label at end of compound statement ------------------------------------------------------------------------ r1345 | kruppa | 2009-02-24 23:16:05 +0100 (Tue, 24 Feb 2009) | 5 lines Changed paths: M /trunk/x86_64/redc.asm If a redc.s file was generated somehow, it was not run through the C preprocessor before assembly (only .S fils are), causing address generation for a computed jump to go wrong. Added a tripwire so that assembly fails if preprocessor isn't used. ------------------------------------------------------------------------ r1344 | kruppa | 2009-02-24 19:49:27 +0100 (Tue, 24 Feb 2009) | 2 lines Changed paths: M /trunk/x86_64/Makefile.am M /trunk/x86_64/bench.c A /trunk/x86_64/mulredc1.h A /trunk/x86_64/mulredc1.m4 M /trunk/x86_64/test_mulredc.c For Romain: mulredc code for n x 1 products ------------------------------------------------------------------------ r1343 | kruppa | 2009-02-21 22:17:27 +0100 (Sat, 21 Feb 2009) | 2 lines Changed paths: D /trunk/powerpc64/Makefile.in Makefile.in should not be in repository, only Makefile.am ------------------------------------------------------------------------ r1342 | zimmerma | 2009-02-18 23:06:52 +0100 (Wed, 18 Feb 2009) | 3 lines Changed paths: M /trunk/tune.c [tune.c] NTT_GFP_TWIDDLE_DIF_BREAKOVER/NTT_GFP_TWIDDLE_DIT_BREAKOVER should be the logarithm in base 2 of the corresponding thresholds ------------------------------------------------------------------------ r1341 | kruppa | 2009-02-16 17:03:05 +0100 (Mon, 16 Feb 2009) | 4 lines Changed paths: M /trunk/ecm-impl.h M /trunk/pm1fs2.c M /trunk/sets_long.c Moved maxS() from pm1fs2.c to sets_long.c as sets_max(), as it depends on how sets_long.c picks sets. Included test of sets_max() in the self-test. Fixed some typos in comments. ------------------------------------------------------------------------ r1340 | kruppa | 2009-02-15 16:09:34 +0100 (Sun, 15 Feb 2009) | 2 lines Changed paths: D /trunk/m4 Delete empty directory ------------------------------------------------------------------------ r1339 | zimmerma | 2009-02-12 10:58:27 +0100 (Thu, 12 Feb 2009) | 2 lines Changed paths: M /trunk/Makefile.am [Makefile.am] forgot ecm-params.h.powerpc970 in make dist ------------------------------------------------------------------------ r1338 | kruppa | 2009-02-11 23:20:32 +0100 (Wed, 11 Feb 2009) | 4 lines Changed paths: M /trunk/x86_64/bench.c M /trunk/x86_64/mulredc.m4 M /trunk/x86_64/mulredc10.asm M /trunk/x86_64/mulredc11.asm M /trunk/x86_64/mulredc12.asm M /trunk/x86_64/mulredc13.asm M /trunk/x86_64/mulredc14.asm M /trunk/x86_64/mulredc15.asm M /trunk/x86_64/mulredc16.asm M /trunk/x86_64/mulredc17.asm M /trunk/x86_64/mulredc18.asm M /trunk/x86_64/mulredc19.asm M /trunk/x86_64/mulredc20.asm M /trunk/x86_64/mulredc3.asm M /trunk/x86_64/mulredc4.asm M /trunk/x86_64/mulredc5.asm M /trunk/x86_64/mulredc6.asm M /trunk/x86_64/mulredc7.asm M /trunk/x86_64/mulredc8.asm M /trunk/x86_64/mulredc9.asm Added Phil McLaughlin's suggestion to remove a useless carry propagation. Made bench.c use getrusage() if available which has better resolution than clock() or times(). ------------------------------------------------------------------------ r1337 | kruppa | 2009-02-11 15:26:46 +0100 (Wed, 11 Feb 2009) | 2 lines Changed paths: M /trunk/x86_64/Makefile.am Add targets for bench and test_mulredc ------------------------------------------------------------------------ r1336 | kruppa | 2009-02-11 15:18:50 +0100 (Wed, 11 Feb 2009) | 2 lines Changed paths: M /trunk/x86_64/bench.c M /trunk/x86_64/test_mulredc.c Made bench compile again, fixed some -pedantic warnings in test_mulredc.c ------------------------------------------------------------------------ r1335 | zimmerma | 2009-02-11 10:00:35 +0100 (Wed, 11 Feb 2009) | 2 lines Changed paths: M /trunk/powerpc64/README [powerpc64/README] added reference to LGPL license ------------------------------------------------------------------------ r1334 | zimmerma | 2009-02-10 09:58:20 +0100 (Tue, 10 Feb 2009) | 2 lines Changed paths: A /trunk/ecm-params.h.powerpc970 [ecm-params.h.powerpc970] default tuning parameters for powerpc64 ------------------------------------------------------------------------ r1333 | zimmerma | 2009-02-10 09:50:43 +0100 (Tue, 10 Feb 2009) | 3 lines Changed paths: M /trunk/Makefile.am M /trunk/configure.in A /trunk/powerpc64 A /trunk/powerpc64/Makefile.am A /trunk/powerpc64/Makefile.dev A /trunk/powerpc64/Makefile.in A /trunk/powerpc64/README A /trunk/powerpc64/bench.c A /trunk/powerpc64/generate_all A /trunk/powerpc64/mulredc.m4 A /trunk/powerpc64/mulredc1.asm A /trunk/powerpc64/mulredc10.asm A /trunk/powerpc64/mulredc11.asm A /trunk/powerpc64/mulredc12.asm A /trunk/powerpc64/mulredc13.asm A /trunk/powerpc64/mulredc14.asm A /trunk/powerpc64/mulredc15.asm A /trunk/powerpc64/mulredc16.asm A /trunk/powerpc64/mulredc17.asm A /trunk/powerpc64/mulredc18.asm A /trunk/powerpc64/mulredc19.asm A /trunk/powerpc64/mulredc2.asm A /trunk/powerpc64/mulredc20.asm A /trunk/powerpc64/mulredc3.asm A /trunk/powerpc64/mulredc4.asm A /trunk/powerpc64/mulredc5.asm A /trunk/powerpc64/mulredc6.asm A /trunk/powerpc64/mulredc7.asm A /trunk/powerpc64/mulredc8.asm A /trunk/powerpc64/mulredc9.asm A /trunk/powerpc64/mulredc_1_2.m4 A /trunk/powerpc64/redc.asm A /trunk/powerpc64/test_mulredc.c incorporated asm redc code for powerpc64 from Philip McLaughlin (still to be tested) ------------------------------------------------------------------------ r1332 | kruppa | 2009-01-18 16:35:23 +0100 (Sun, 18 Jan 2009) | 2 lines Changed paths: M /trunk/x86_64/mulredc.m4 Replaced xorq by xorl, added an assert. Cosmetic change, mostly ------------------------------------------------------------------------ r1331 | kruppa | 2009-01-18 16:33:38 +0100 (Sun, 18 Jan 2009) | 2 lines Changed paths: M /trunk/techdocs/mulrecip.tex Small fixes, cleanups ------------------------------------------------------------------------ r1330 | kruppa | 2009-01-18 16:32:10 +0100 (Sun, 18 Jan 2009) | 2 lines Changed paths: M /trunk/rho.gp Small cleanups in comments ------------------------------------------------------------------------ r1329 | kruppa | 2009-01-16 15:35:39 +0100 (Fri, 16 Jan 2009) | 3 lines Changed paths: M /trunk/mpzspm.c Use outputf() instead of printf() for error messages. Print some timing in mpzspm_init() with DEVVERBOSE. ------------------------------------------------------------------------ r1328 | kruppa | 2009-01-16 15:02:57 +0100 (Fri, 16 Jan 2009) | 3 lines Changed paths: M /trunk/rho.c pmeprob() should not access "go" if it might be a NULL pointer. Added function for P-1 probability for factors in a known residue class. ------------------------------------------------------------------------ r1327 | zimmerma | 2009-01-07 12:41:32 +0100 (Wed, 07 Jan 2009) | 5 lines Changed paths: M /trunk/ecm.c M /trunk/ecm.h M /trunk/factor.c M /trunk/main.c Added patch from Philip McLaughlin which adds new option -nobase2s2 that disables base-2 arithmetic in Step 2. This is experimental, and might be removed or replaced by another mechanism later on; in particular, it seems if breaks the binary compatibility. ------------------------------------------------------------------------ r1326 | zimmerma | 2009-01-07 11:28:03 +0100 (Wed, 07 Jan 2009) | 5 lines Changed paths: M /trunk/ecm.c M /trunk/mpmod.c [mpmod.c] added code to compare base-2 arithmetic to default one (disabled for now) [ecm.c] isbase2() was called twice -> called only once now, should yield a small speedup, especially for small B1 ------------------------------------------------------------------------ r1325 | zimmerma | 2008-12-19 16:01:05 +0100 (Fri, 19 Dec 2008) | 2 lines Changed paths: D /trunk/m4/libtool.m4 D /trunk/m4/ltoptions.m4 D /trunk/m4/ltsugar.m4 D /trunk/m4/ltversion.m4 D /trunk/m4/lt~obsolete.m4 removed m4/* files ------------------------------------------------------------------------ r1324 | zimmerma | 2008-12-14 15:05:29 +0100 (Sun, 14 Dec 2008) | 4 lines Changed paths: M /trunk/Makefile.am M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/ecm.h A /trunk/ecmfactor2.c M /trunk/main.c M /trunk/pm1.c M /trunk/pp1.c (unfinished) change to enable the use of GMP-ECM stage 2 from HECM, where a curve is given in Weierstrass form (see example in ecmfactor2.c). It compiles, but remains to be debugged... ------------------------------------------------------------------------ r1323 | zimmerma | 2008-12-14 13:45:34 +0100 (Sun, 14 Dec 2008) | 3 lines Changed paths: M /trunk/Makefile.am M /trunk/configure.in A /trunk/m4 A /trunk/m4/libtool.m4 A /trunk/m4/ltoptions.m4 A /trunk/m4/ltsugar.m4 A /trunk/m4/ltversion.m4 A /trunk/m4/lt~obsolete.m4 [configure.in,Makefile.am] switch to automake >= 1.10, and added macros suggested by autoreconf -i ------------------------------------------------------------------------ r1322 | brian_gladman | 2008-11-24 10:46:01 +0100 (Mon, 24 Nov 2008) | 1 line Changed paths: M /trunk/build.vc9/ecm/ecm.vcproj M /trunk/build.vc9/libecm/libecm.vcproj M /trunk/build.vc9/readme.txt Update VC++ build project to assume that the GMP root directory is named 'GMP' not 'GMP-version' ------------------------------------------------------------------------ r1321 | brian_gladman | 2008-11-24 10:21:23 +0100 (Mon, 24 Nov 2008) | 1 line Changed paths: M /trunk/build.vc9/ecm/ecm.vcproj M /trunk/build.vc9/libecm/libecm.vcproj Update MSVC builds to use GMP-4.2.4 ------------------------------------------------------------------------ r1320 | zimmerma | 2008-10-12 14:13:23 +0200 (Sun, 12 Oct 2008) | 2 lines Changed paths: M /trunk/configure.in [configure.in] changed version to 6.2.2 ------------------------------------------------------------------------ r1319 | brian_gladman | 2008-09-02 18:15:48 +0200 (Tue, 02 Sep 2008) | 1 line Changed paths: M /trunk/build.vc9/ecm/ecm.vcproj M /trunk/build.vc9/libecm/libecm.vcproj correction to VC++ build project to set the GMP include directory to gmp-4.2.3 ------------------------------------------------------------------------ r1318 | brian_gladman | 2008-08-30 22:03:27 +0200 (Sat, 30 Aug 2008) | 2 lines Changed paths: M /trunk/build.vc9/ecm/ecm.vcproj M /trunk/build.vc9/ecm-params.h M /trunk/build.vc9/readme.txt Update the Visual Studio build to use GMP-4.2.3 ------------------------------------------------------------------------ r1317 | zimmerma | 2008-07-17 11:28:30 +0200 (Thu, 17 Jul 2008) | 2 lines Changed paths: M /trunk/configure.in fixed split infinitive (thanks Paul Leyland) ------------------------------------------------------------------------ r1316 | zimmerma | 2008-06-13 20:50:28 +0200 (Fri, 13 Jun 2008) | 2 lines Changed paths: M /trunk/AUTHORS updated Dave's address ------------------------------------------------------------------------ r1315 | zimmerma | 2008-06-13 02:17:41 +0200 (Fri, 13 Jun 2008) | 2 lines Changed paths: M /trunk/TODO added new item ------------------------------------------------------------------------ r1314 | zimmerma | 2008-06-12 23:53:56 +0200 (Thu, 12 Jun 2008) | 2 lines Changed paths: M /trunk/TODO added comment ------------------------------------------------------------------------ r1313 | zimmerma | 2008-06-12 23:26:27 +0200 (Thu, 12 Jun 2008) | 2 lines Changed paths: M /trunk/listz.c compile list_mul_low only if KS_MULTIPLY is nto defined, to avoid a warning ------------------------------------------------------------------------ r1312 | kruppa | 2008-06-12 02:38:03 +0200 (Thu, 12 Jun 2008) | 2 lines Changed paths: M /trunk/pm1.c Bugfix: new P-1 stage 2 called pm1prob() with uninitialised value for S ------------------------------------------------------------------------ r1311 | zimmerma | 2008-06-09 09:56:58 +0200 (Mon, 09 Jun 2008) | 2 lines Changed paths: M /trunk/configure.in removed --with-gmp-build option (no longer needed as we don't need gmp-impl.h) ------------------------------------------------------------------------ r1310 | zimmerma | 2008-06-06 05:49:50 +0200 (Fri, 06 Jun 2008) | 2 lines Changed paths: M /trunk/TODO added two items ------------------------------------------------------------------------ r1309 | zimmerma | 2008-06-05 03:50:16 +0200 (Thu, 05 Jun 2008) | 3 lines Changed paths: M /trunk/README.dev M /trunk/mul_fft.c added tags corresponding to 6.2 and 6.2.1 in README.dev commented out unused function in mul_fft.c ------------------------------------------------------------------------ r1307 | kruppa | 2008-06-04 05:43:25 +0200 (Wed, 04 Jun 2008) | 2 lines Changed paths: M /trunk/ChangeLog M /trunk/NEWS M /trunk/build.vc9/config.h Set version to 6.2.1 in trunk/build.vc9/config.h, updated NEWS, ChangeLog ------------------------------------------------------------------------ r1306 | kruppa | 2008-06-04 01:53:32 +0200 (Wed, 04 Jun 2008) | 2 lines Changed paths: M /trunk/countsmooth.c Make countsmooth compile again ------------------------------------------------------------------------ r1305 | brian_gladman | 2008-05-30 17:11:07 +0200 (Fri, 30 May 2008) | 1 line Changed paths: M /trunk/build.vc9/config.h M /trunk/build.vc9/ecm/ecm.vcproj M /trunk/build.vc9/libecm/libecm.vcproj M /trunk/build.vc9/tests.py Revert Windows build to use GMP-4.2.1 ------------------------------------------------------------------------ r1304 | kruppa | 2008-05-28 16:56:05 +0200 (Wed, 28 May 2008) | 2 lines Changed paths: M /trunk/pm1.c M /trunk/rho.c Print message about -go with P-1 probabilities ------------------------------------------------------------------------ r1303 | kruppa | 2008-05-28 16:33:34 +0200 (Wed, 28 May 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c Print stage 2 time if factor is found ------------------------------------------------------------------------ r1302 | kruppa | 2008-05-28 16:09:12 +0200 (Wed, 28 May 2008) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/pm1.c M /trunk/rho.c Print probability of finding factors for P-1 ------------------------------------------------------------------------ r1301 | zimmerma | 2008-05-28 10:56:30 +0200 (Wed, 28 May 2008) | 4 lines Changed paths: M /trunk/main.c added warning for -go n1 -go n2 -> only n2 is taken into account (we could modify the code to take both into account, but we also can write -go "n1*n2") ------------------------------------------------------------------------ r1300 | zimmerma | 2008-05-28 09:20:57 +0200 (Wed, 28 May 2008) | 2 lines Changed paths: M /trunk/README.dev M /trunk/configure.in changed the version to 6.2.1, and added hints in README.dev ------------------------------------------------------------------------ r1298 | zimmerma | 2008-05-27 23:04:26 +0200 (Tue, 27 May 2008) | 2 lines Changed paths: M /trunk/sp.h fixed compilation problem on IA65, EV56, ARM ------------------------------------------------------------------------ r1297 | kruppa | 2008-05-27 17:52:50 +0200 (Tue, 27 May 2008) | 2 lines Changed paths: M /trunk/ecm.c M /trunk/main.c M /trunk/resume.c Print success probabilities only if B1 == B2min ------------------------------------------------------------------------ r1296 | kruppa | 2008-05-27 17:09:21 +0200 (Tue, 27 May 2008) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/pm1.c M /trunk/pp1.c Increased default B2 for new P-/+1 stage 2 ------------------------------------------------------------------------ r1295 | jasonp | 2008-05-26 18:33:19 +0200 (Mon, 26 May 2008) | 1 line Changed paths: M /trunk/tune.c fix to previous commit ------------------------------------------------------------------------ r1294 | jasonp | 2008-05-26 17:46:51 +0200 (Mon, 26 May 2008) | 1 line Changed paths: M /trunk/ecm-params.h.alpha-ev5 M /trunk/ecm-params.h.alpha-ev6 M /trunk/ecm-params.h.athlon M /trunk/ecm-params.h.athlon64 M /trunk/ecm-params.h.core2 M /trunk/ecm-params.h.default M /trunk/ecm-params.h.pentium-m M /trunk/ecm-params.h.pentium3 M /trunk/ecm-params.h.pentium4 M /trunk/ecm-params.h.powerpc7450 M /trunk/ntt_gfp.c M /trunk/sp.h M /trunk/spm.c M /trunk/tune.c allow tuning of the breakover point between recursive and iterative NTTs ------------------------------------------------------------------------ r1293 | kruppa | 2008-05-25 15:22:55 +0200 (Sun, 25 May 2008) | 3 lines Changed paths: A /trunk/rho.gp Pari/GP file for estimating ECM probability of success, GMP-ECM's rho.c is a port of this file. ------------------------------------------------------------------------ r1292 | brian_gladman | 2008-05-16 18:22:41 +0200 (Fri, 16 May 2008) | 1 line Changed paths: M /trunk/build.vc9/config.h set version to 6.2 for MSVC ------------------------------------------------------------------------ r1291 | zimmerma | 2008-05-16 17:39:54 +0200 (Fri, 16 May 2008) | 2 lines Changed paths: M /trunk/mpzspv.c removed useless comment (and comment on comment) ------------------------------------------------------------------------ r1290 | kruppa | 2008-05-16 16:43:57 +0200 (Fri, 16 May 2008) | 3 lines Changed paths: M /trunk/Makefile.am A /trunk/ecm-params.h.pentium-m D /trunk/ecm-params.h.pentiumm Renamed parameter file from pentiumm to pentium-m, as that is what GMP uses for the architecture name. gcc uses parameter "-march pentium-m", too ------------------------------------------------------------------------ r1289 | kruppa | 2008-05-16 14:41:48 +0200 (Fri, 16 May 2008) | 2 lines Changed paths: M /trunk/ChangeLog Added latest changes ------------------------------------------------------------------------ r1288 | kruppa | 2008-05-16 14:38:03 +0200 (Fri, 16 May 2008) | 2 lines Changed paths: M /trunk/configure.in Set version to 6.2, set assertions to off by default ------------------------------------------------------------------------ r1287 | kruppa | 2008-05-16 14:27:26 +0200 (Fri, 16 May 2008) | 2 lines Changed paths: M /trunk/NEWS Added item: bugfix of Lucas chains for primes close to 3^32 ------------------------------------------------------------------------ r1286 | kruppa | 2008-05-16 14:14:06 +0200 (Fri, 16 May 2008) | 2 lines Changed paths: M /trunk/TODO Extended note telling why B2min 30 limbs ------------------------------------------------------------------------ r1277 | brian_gladman | 2008-05-13 21:24:37 +0200 (Tue, 13 May 2008) | 1 line Changed paths: M /trunk/build.vc9/tests.py ------------------------------------------------------------------------ r1276 | kruppa | 2008-05-12 01:16:36 +0200 (Mon, 12 May 2008) | 2 lines Changed paths: M /trunk/ecm.c M /trunk/lucas.c Fixed integer overflows in PRAC ------------------------------------------------------------------------ r1275 | kruppa | 2008-05-12 01:15:36 +0200 (Mon, 12 May 2008) | 3 lines Changed paths: M /trunk/makesmooth.gp Make it work better with larger stage 1 primes by using nextprime() instead of forprime() ------------------------------------------------------------------------ r1274 | kruppa | 2008-05-11 21:42:05 +0200 (Sun, 11 May 2008) | 2 lines Changed paths: M /trunk/test.pp1 Added test case for bug in PRAC code for P+1 ------------------------------------------------------------------------ r1273 | kruppa | 2008-05-09 18:12:22 +0200 (Fri, 09 May 2008) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/pm1.c M /trunk/pm1fs2.c M /trunk/pp1.c Parameter selection adjusted for P+1 which prefers smaller s_2 ------------------------------------------------------------------------ r1272 | brian_gladman | 2008-05-06 15:20:29 +0200 (Tue, 06 May 2008) | 1 line Changed paths: M /trunk/sp.h improved MSVC assembler code ------------------------------------------------------------------------ r1271 | zimmerma | 2008-05-06 14:31:31 +0200 (Tue, 06 May 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c replaced s_2 = ... by k = s_2 = ... (continued) ------------------------------------------------------------------------ r1269 | kruppa | 2008-05-06 14:19:27 +0200 (Tue, 06 May 2008) | 2 lines Changed paths: M /trunk/ChangeLog Added lastest changes ------------------------------------------------------------------------ r1268 | kruppa | 2008-05-06 14:17:43 +0200 (Tue, 06 May 2008) | 2 lines Changed paths: M /trunk/pm1.c M /trunk/pp1.c Added "k = s_2 =" to -v output for new stage 2 ------------------------------------------------------------------------ r1266 | kruppa | 2008-05-06 13:59:54 +0200 (Tue, 06 May 2008) | 2 lines Changed paths: M /trunk/ChangeLog Added lastest changes ------------------------------------------------------------------------ r1265 | kruppa | 2008-05-06 13:58:23 +0200 (Tue, 06 May 2008) | 2 lines Changed paths: M /trunk/README.dev M /trunk/configure.in Switched assertions to on by default (change to off for official release) ------------------------------------------------------------------------ r1264 | kruppa | 2008-05-06 13:45:04 +0200 (Tue, 06 May 2008) | 2 lines Changed paths: M /trunk/README Added remark explaining that s_2 in new stage 2 is similar to k in old one ------------------------------------------------------------------------ r1263 | kruppa | 2008-05-05 19:20:59 +0200 (Mon, 05 May 2008) | 4 lines Changed paths: M /trunk/TODO Added Torbjorn Granlund's suggestion for faster mpn_mod_1() Added item on rewriting mpmod.c to use mpn_*, not mpz_* (long term goal) Removed item on dynamic library, mostly done ------------------------------------------------------------------------ r1262 | kruppa | 2008-05-05 18:28:41 +0200 (Mon, 05 May 2008) | 2 lines Changed paths: M /trunk/README Added remark about much improved performance in 64 bit mode ------------------------------------------------------------------------ r1261 | kruppa | 2008-05-05 18:10:05 +0200 (Mon, 05 May 2008) | 3 lines Changed paths: M /trunk/sp.h Added sp_add() C code for modulus with MSB=0, fixed comments for sp_add() asm macro ------------------------------------------------------------------------ r1260 | kruppa | 2008-05-05 17:26:30 +0200 (Mon, 05 May 2008) | 3 lines Changed paths: M /trunk/pm1fs2.c Mostly rewrote parameter selection to minimize estimated cost, allow smaller increments of B2 ------------------------------------------------------------------------ r1259 | zimmerma | 2008-05-03 00:19:56 +0200 (Sat, 03 May 2008) | 2 lines Changed paths: M /trunk/README.dev we should look in TODO for a new release ------------------------------------------------------------------------ r1258 | zimmerma | 2008-05-03 00:18:44 +0200 (Sat, 03 May 2008) | 2 lines Changed paths: M /trunk/TODO added bug with GWNUM interface ------------------------------------------------------------------------ r1257 | kruppa | 2008-05-02 23:52:10 +0200 (Fri, 02 May 2008) | 7 lines Changed paths: M /trunk/configure.in M /trunk/ecm-gmp.h M /trunk/ecm-impl.h M /trunk/ks-multiply.c M /trunk/mpmod.c M /trunk/mpzspv.c M /trunk/mul_fft.c M /trunk/pm1fs2.c M /trunk/schoen_strass.c Fixed broken help string for --enable-sse2 in configure Mangled names of mpn_fft_best_k() and mpn_fft_next_size(), moved prototypes of mpn_fft_*() functions to ecm-impl.h Declared some functions static that were used only locally and polluted the namespace ------------------------------------------------------------------------ r1256 | brian_gladman | 2008-05-02 21:42:15 +0200 (Fri, 02 May 2008) | 3 lines Changed paths: M /trunk/build.vc9/config.h M /trunk/build.vc9/ecm-params.h M /trunk/build.vc9/libecm/libecm.vcproj M /trunk/longlong.h add add MSVC intrinsics to longlong.h update build files ------------------------------------------------------------------------ r1255 | brian_gladman | 2008-05-01 23:52:03 +0200 (Thu, 01 May 2008) | 1 line Changed paths: M /trunk/build.vc9/tests.py ------------------------------------------------------------------------ r1254 | brian_gladman | 2008-05-01 23:45:32 +0200 (Thu, 01 May 2008) | 1 line Changed paths: M /trunk/build.vc9/config.h M /trunk/build.vc9/tests.py update version (noticed by Paul) ------------------------------------------------------------------------ r1253 | brian_gladman | 2008-05-01 21:43:33 +0200 (Thu, 01 May 2008) | 1 line Changed paths: M /trunk/build.vc9/readme.txt ------------------------------------------------------------------------ r1252 | zimmerma | 2008-05-01 21:41:43 +0200 (Thu, 01 May 2008) | 3 lines Changed paths: M /trunk/INSTALL M /trunk/configure.in INSTALL: added pointer to build.vc9/readme.txt for Windows/VC++ configure.in: changed version to 6.2-rc2 ------------------------------------------------------------------------ r1251 | brian_gladman | 2008-05-01 21:36:11 +0200 (Thu, 01 May 2008) | 1 line Changed paths: M /trunk/build.vc9/readme.txt further update to the VC++ readme file. ------------------------------------------------------------------------ r1250 | brian_gladman | 2008-05-01 20:42:25 +0200 (Thu, 01 May 2008) | 1 line Changed paths: M /trunk/build.vc9/config.h M /trunk/build.vc9/readme.txt minor non critical changes to readme.txt and config.h ------------------------------------------------------------------------ r1249 | brian_gladman | 2008-05-01 19:20:31 +0200 (Thu, 01 May 2008) | 1 line Changed paths: M /trunk/build.vc9/tests.py ------------------------------------------------------------------------ r1248 | brian_gladman | 2008-05-01 11:38:19 +0200 (Thu, 01 May 2008) | 1 line Changed paths: M /trunk/build.vc9/readme.txt M /trunk/build.vc9/tests.py ------------------------------------------------------------------------ r1247 | zimmerma | 2008-05-01 01:05:46 +0200 (Thu, 01 May 2008) | 2 lines Changed paths: M /trunk/Makefile.am M /trunk/configure.in changes to have Brian's VC build files included in "make dist" ------------------------------------------------------------------------ r1246 | zimmerma | 2008-05-01 01:04:51 +0200 (Thu, 01 May 2008) | 2 lines Changed paths: A /trunk/build.vc9/Makefile.am A /trunk/build.vc9/assembler/Makefile.am A /trunk/build.vc9/ecm/Makefile.am A /trunk/build.vc9/libecm/Makefile.am needed makefiles to have the VC build files included in "make dist" ------------------------------------------------------------------------ r1245 | kruppa | 2008-04-30 17:34:22 +0200 (Wed, 30 Apr 2008) | 2 lines Changed paths: M /trunk/configure.in Use -W compiler flag instead of -Wextra so it works with older gcc version ------------------------------------------------------------------------ r1241 | kruppa | 2008-04-29 19:11:41 +0200 (Tue, 29 Apr 2008) | 2 lines Changed paths: M /trunk/ChangeLog Added the most recent changes ------------------------------------------------------------------------ r1240 | kruppa | 2008-04-29 16:16:58 +0200 (Tue, 29 Apr 2008) | 2 lines Changed paths: M /trunk/mul_fft.c More compiler warnings fixed... ------------------------------------------------------------------------ r1239 | zimmerma | 2008-04-29 16:06:38 +0200 (Tue, 29 Apr 2008) | 2 lines Changed paths: M /trunk/NEWS M /trunk/ecm.h put ecm.h under LGPL (as it should have been from the beginning...) ------------------------------------------------------------------------ r1238 | kruppa | 2008-04-29 15:36:30 +0200 (Tue, 29 Apr 2008) | 2 lines Changed paths: M /trunk/auxlib.c M /trunk/longlong.h M /trunk/mul_fft.c Fixed some more compiler warnings ------------------------------------------------------------------------ r1237 | kruppa | 2008-04-29 15:14:20 +0200 (Tue, 29 Apr 2008) | 4 lines Changed paths: M /trunk/configure.in Probing for compiler warning flags was unreliable: with Sun CC, -pedantic succeeds in the test, but fails during compiling. Adding warning flags only if we use GCC now ------------------------------------------------------------------------ r1236 | kruppa | 2008-04-29 14:01:38 +0200 (Tue, 29 Apr 2008) | 7 lines Changed paths: M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/ecm_ntt.c M /trunk/eval.c M /trunk/main.c M /trunk/pm1fs2.c M /trunk/polyeval.c M /trunk/pp1.c M /trunk/random.c M /trunk/schoen_strass.c M /trunk/sp.h M /trunk/stage2.c Changed some "#if HAVE_*" to "#ifdef HAVE_*" to avoid warnings with -Wundef This implies that "#define HAVE_FOO 0" makes the ifdef succeed which is counter-intuitive. A test that can properly distinguish macros that are undefined, defined to empty token, defined to 0 or defined to 1 seems to require token concatenation and two-level expansion which is horrible. ------------------------------------------------------------------------ r1235 | kruppa | 2008-04-28 20:38:54 +0200 (Mon, 28 Apr 2008) | 2 lines Changed paths: M /trunk/ecm.1 Man page was out of date, re-made from ecm.xml ------------------------------------------------------------------------ r1234 | kruppa | 2008-04-28 20:36:32 +0200 (Mon, 28 Apr 2008) | 5 lines Changed paths: M /trunk/Makefile.am M /trunk/configure.in Fixed bug, test for matching version in gmp.h and libgmp failed due to missing include path if there was no gmp.h in a default include directory. Significant rewrite that hopefully is both more correct and cleaner. Added tests for compiler warning flags. ------------------------------------------------------------------------ r1232 | kruppa | 2008-04-28 16:58:54 +0200 (Mon, 28 Apr 2008) | 2 lines Changed paths: M /trunk/ChangeLog Added recent changes to ChangeLog ------------------------------------------------------------------------ r1231 | kruppa | 2008-04-28 16:38:58 +0200 (Mon, 28 Apr 2008) | 2 lines Changed paths: M /trunk/INSTALL Some updates for 6.2rc1 ------------------------------------------------------------------------ r1230 | kruppa | 2008-04-28 16:24:21 +0200 (Mon, 28 Apr 2008) | 3 lines Changed paths: M /trunk/Makefile.am Added .s and .S files to CLEANFILES, as they did not get cleaned up by default ------------------------------------------------------------------------ r1229 | kruppa | 2008-04-28 12:14:50 +0200 (Mon, 28 Apr 2008) | 2 lines Changed paths: M /trunk/test.ecm M /trunk/test.pm1 M /trunk/test.pp1 Added a test in each file with an input too large for mulredc*() ------------------------------------------------------------------------ r1228 | kruppa | 2008-04-28 01:05:06 +0200 (Mon, 28 Apr 2008) | 3 lines Changed paths: M /trunk/configure.in Fixed problem with ./configure script not cunning correctly if --with-gmp was given and --enable-shared was not. ------------------------------------------------------------------------ r1227 | zimmerma | 2008-04-27 13:28:14 +0200 (Sun, 27 Apr 2008) | 2 lines Changed paths: M /trunk/configure.in fixed warning "AC_CANONICAL_HOST invoked multiple times" ------------------------------------------------------------------------ r1226 | zimmerma | 2008-04-27 10:30:00 +0200 (Sun, 27 Apr 2008) | 2 lines Changed paths: M /trunk/README.dev we also need to check INSTALL for a new release ------------------------------------------------------------------------ r1225 | zimmerma | 2008-04-27 10:28:35 +0200 (Sun, 27 Apr 2008) | 2 lines Changed paths: M /trunk/INSTALL update GMP latest version ------------------------------------------------------------------------ r1224 | kruppa | 2008-04-25 17:55:01 +0200 (Fri, 25 Apr 2008) | 2 lines Changed paths: M /trunk/Makefile.am M /trunk/configure.in Trying to make --enable-shared and --with-gmp work together ------------------------------------------------------------------------ r1223 | kruppa | 2008-04-25 14:59:34 +0200 (Fri, 25 Apr 2008) | 4 lines Changed paths: M /trunk/Makefile.am M /trunk/configure.in M /trunk/x86_64/redc.asm Fixed problem with compiling x86_64 assembly routines for dynamic library, a computed jump referenced absolute address of label in .text. Now we compute target address relative to rip ------------------------------------------------------------------------ r1222 | kruppa | 2008-04-25 11:44:59 +0200 (Fri, 25 Apr 2008) | 3 lines Changed paths: M /trunk/pm1.c M /trunk/pp1.c Fixed implicit conversion of int constant to double which caused warning in Visual C ------------------------------------------------------------------------ r1221 | kruppa | 2008-04-25 11:32:24 +0200 (Fri, 25 Apr 2008) | 3 lines Changed paths: M /trunk/makesmooth.gp Added function to produce primes where a given value is a quadratic non-residue, to make testing P+1 easier ------------------------------------------------------------------------ r1220 | kruppa | 2008-04-24 17:57:18 +0200 (Thu, 24 Apr 2008) | 2 lines Changed paths: M /trunk/acinclude.m4 A missing "-c" flag in test compilation of .s files caused configure to fail ------------------------------------------------------------------------ r1219 | lfousse | 2008-04-24 15:37:07 +0200 (Thu, 24 Apr 2008) | 2 lines Changed paths: M /trunk/Makefile.am M /trunk/configure.in Produce a shared library with libtool. ------------------------------------------------------------------------ r1218 | brian_gladman | 2008-04-20 17:09:00 +0200 (Sun, 20 Apr 2008) | 1 line Changed paths: A /trunk/build.vc9/assembler/a_win32a_mulredc.asm A /trunk/build.vc9/assembler/a_win32a_redc.asm M /trunk/build.vc9/assembler/a_win32p_mulredc.asm M /trunk/build.vc9/assembler/a_win32p_redc.asm M /trunk/build.vc9/assembler/a_x64_mulredc.asm M /trunk/build.vc9/libecm/libecm.vcproj add win32 athlon assembler support for Visual C build ------------------------------------------------------------------------ r1217 | brian_gladman | 2008-04-18 12:33:20 +0200 (Fri, 18 Apr 2008) | 1 line Changed paths: A /trunk/build.vc9/assembler/a_win32p_mulredc.asm A /trunk/build.vc9/assembler/a_win32p_redc.asm M /trunk/build.vc9/assembler/a_x64_mulredc.asm M /trunk/build.vc9/config.h M /trunk/build.vc9/libecm/libecm.vcproj M /trunk/build.vc9/tests.py Add 32-bit pentium assembler support for VC++ build ------------------------------------------------------------------------ r1216 | kruppa | 2008-04-17 15:25:51 +0200 (Thu, 17 Apr 2008) | 2 lines Changed paths: M /trunk/README Updated to NTT and SchönhageStrassen sections ------------------------------------------------------------------------ r1215 | kruppa | 2008-04-17 11:30:23 +0200 (Thu, 17 Apr 2008) | 2 lines Changed paths: M /trunk/mpzspv.c Marked floating-point constant "float" to match other operands ------------------------------------------------------------------------ r1214 | brian_gladman | 2008-04-16 15:17:10 +0200 (Wed, 16 Apr 2008) | 1 line Changed paths: A /trunk/build.vc9/readme.txt A /trunk/build.vc9/yasm.rules Add short description of how to use YASM with VC++ for assembler support and mention Python tests file ------------------------------------------------------------------------ r1213 | brian_gladman | 2008-04-16 13:58:44 +0200 (Wed, 16 Apr 2008) | 1 line Changed paths: M /trunk/build.vc9/assembler/a_x64_mulredc.asm M /trunk/build.vc9/config.h M /trunk/build.vc9/libecm/libecm.vcproj M /trunk/build.vc9/tests.py enable assembler build with VC++ ------------------------------------------------------------------------ r1212 | brian_gladman | 2008-04-16 12:32:11 +0200 (Wed, 16 Apr 2008) | 1 line Changed paths: M /trunk/build.vc9/assembler/a_x64_mulredc.asm M /trunk/build.vc9/config.h M /trunk/build.vc9/libecm/libecm.vcproj ------------------------------------------------------------------------ r1211 | brian_gladman | 2008-04-16 11:56:01 +0200 (Wed, 16 Apr 2008) | 1 line Changed paths: A /trunk/build.vc9/assembler A /trunk/build.vc9/assembler/a_x64_mulredc.asm A /trunk/build.vc9/assembler/a_x64_redc.asm A /trunk/build.vc9/assembler/test_mulredc.c M /trunk/build.vc9/config.h A /trunk/build.vc9/tests.py Added YASM assembler code for AMD64 VC++ build ------------------------------------------------------------------------ r1210 | zimmerma | 2008-04-15 18:22:55 +0200 (Tue, 15 Apr 2008) | 2 lines Changed paths: M /trunk/README changed comment about efficiency of NTT to match new code ------------------------------------------------------------------------ r1209 | kruppa | 2008-04-15 17:25:10 +0200 (Tue, 15 Apr 2008) | 3 lines Changed paths: M /trunk/pm1fs2.c M /trunk/sets_long.c Disabled P > 2^30 in 32 bit machines as they lead to overflow in integer arithmetic in sets_long.c. This limits B2-B2min to about 10^15. ------------------------------------------------------------------------ r1208 | kruppa | 2008-04-15 16:36:06 +0200 (Tue, 15 Apr 2008) | 3 lines Changed paths: M /trunk/ecm-impl.h M /trunk/pm1fs2.c M /trunk/sets_long.c There was an integer overflow problem in sets_sumset_minmax() and maxS() on 32 bit machines. Changed arithmetic to use GMP for these. ------------------------------------------------------------------------ r1207 | kruppa | 2008-04-15 14:40:15 +0200 (Tue, 15 Apr 2008) | 2 lines Changed paths: M /trunk/auxlib.c M /trunk/ecm-impl.h M /trunk/pm1.c M /trunk/pp1.c Fixed overflow when converting maxmem from double -> size_t ------------------------------------------------------------------------ r1206 | kruppa | 2008-04-15 14:25:32 +0200 (Tue, 15 Apr 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c Print elapsed real time for stage 2 in verbose mode with multi-threading ------------------------------------------------------------------------ r1205 | kruppa | 2008-04-15 11:03:21 +0200 (Tue, 15 Apr 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c Fixed small output inconsistency in devverbose mode ------------------------------------------------------------------------ r1204 | kruppa | 2008-04-14 18:46:32 +0200 (Mon, 14 Apr 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c Some changes to avoid integer overflow in memory estimation ------------------------------------------------------------------------ r1203 | kruppa | 2008-04-14 16:40:01 +0200 (Mon, 14 Apr 2008) | 3 lines Changed paths: M /trunk/mpzspm.c mpzspm_init() could miss a prime just below SP_MAX, reducing possible input size for a given transform length. ------------------------------------------------------------------------ r1202 | kruppa | 2008-04-14 16:38:42 +0200 (Mon, 14 Apr 2008) | 3 lines Changed paths: M /trunk/README Mention OMP_NUM_THREADS for OpenMP, explicit limits for input size and transform length on 32 bit machines. ------------------------------------------------------------------------ r1201 | kruppa | 2008-04-14 15:40:52 +0200 (Mon, 14 Apr 2008) | 4 lines Changed paths: M /trunk/auxlib.c Print number to be factored in decimal in checkpoint files. Conversion to decimal used to be slow, but is fast enough in recent GMP releases that this is not an issue any more ------------------------------------------------------------------------ r1200 | kruppa | 2008-04-14 14:14:19 +0200 (Mon, 14 Apr 2008) | 3 lines Changed paths: M /trunk/pm1.c Made code more linear to avoid having two if() braches with almost identical code. ------------------------------------------------------------------------ r1199 | kruppa | 2008-04-13 23:32:31 +0200 (Sun, 13 Apr 2008) | 3 lines Changed paths: M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/pm1.c M /trunk/pp1.c Put code to print "Using B1=..." line in function rather than having 3 almost identical copies in pm1.c, pp1.c and ecm.c. ------------------------------------------------------------------------ r1198 | zimmerma | 2008-04-13 10:48:25 +0200 (Sun, 13 Apr 2008) | 2 lines Changed paths: M /trunk/ecm.c removed trailing blank ------------------------------------------------------------------------ r1197 | brian_gladman | 2008-04-12 17:09:39 +0200 (Sat, 12 Apr 2008) | 1 line Changed paths: M /trunk/README.dev M /trunk/build.vc9/config.h Update VC++ build to match configure.in version number ------------------------------------------------------------------------ r1196 | zimmerma | 2008-04-12 14:25:54 +0200 (Sat, 12 Apr 2008) | 2 lines Changed paths: M /trunk/AUTHORS added Brian in author list ------------------------------------------------------------------------ r1195 | zimmerma | 2008-04-12 14:23:54 +0200 (Sat, 12 Apr 2008) | 6 lines Changed paths: M /trunk/ChangeLog M /trunk/NEWS M /trunk/README M /trunk/configure.in NEWS: added some important items configure.in: changed version to 6.2-rc1 ChangeLog: added missing entries since 6.1 README: changes after complete pass (several issues remain to be solved by Jason and/or Alex) ------------------------------------------------------------------------ r1194 | brian_gladman | 2008-04-11 23:06:30 +0200 (Fri, 11 Apr 2008) | 1 line Changed paths: A /trunk/build.vc9 A /trunk/build.vc9/config.h A /trunk/build.vc9/ecm A /trunk/build.vc9/ecm/ecm.vcproj A /trunk/build.vc9/ecm-params.h A /trunk/build.vc9/ecm.sln A /trunk/build.vc9/file_copy.bat A /trunk/build.vc9/libecm A /trunk/build.vc9/libecm/libecm.vcproj ------------------------------------------------------------------------ r1193 | kruppa | 2008-04-11 15:58:37 +0200 (Fri, 11 Apr 2008) | 3 lines Changed paths: M /trunk/README Some updates for new release, mention new stage 2 and that it doesn't work with Brent-Suyama ------------------------------------------------------------------------ r1192 | kruppa | 2008-04-11 14:26:11 +0200 (Fri, 11 Apr 2008) | 2 lines Changed paths: M /trunk/ecm.1 M /trunk/ecm.xml Updated man page ------------------------------------------------------------------------ r1191 | kruppa | 2008-04-10 13:47:40 +0200 (Thu, 10 Apr 2008) | 2 lines Changed paths: M /trunk/sp.h Fixed compiler warning about shift-by-32 on 32 bit systems. ------------------------------------------------------------------------ r1190 | zimmerma | 2008-04-10 08:38:03 +0200 (Thu, 10 Apr 2008) | 2 lines Changed paths: M /trunk/mul_fft.c pragma was ill-positioned ------------------------------------------------------------------------ r1189 | zimmerma | 2008-04-09 21:01:59 +0200 (Wed, 09 Apr 2008) | 2 lines Changed paths: M /trunk/mul_fft.c incorporated patches for VC++ v9 from Brian Gladman ------------------------------------------------------------------------ r1188 | kruppa | 2008-04-09 18:38:23 +0200 (Wed, 09 Apr 2008) | 4 lines Changed paths: M /trunk/mpzspm.c M /trunk/mpzspv.c M /trunk/sp.h Added conversion routines sp_t <-> mpz_t to overcome portability issues on systems where sp_t is wider than unsigned long and mpz_*_ui() functions can't be used. ------------------------------------------------------------------------ r1187 | kruppa | 2008-04-09 16:31:36 +0200 (Wed, 09 Apr 2008) | 3 lines Changed paths: M /trunk/ecm.c In verbose mode if a factor was found, memory for table of Dickman rho values was not freed. ------------------------------------------------------------------------ r1186 | kruppa | 2008-04-09 16:17:08 +0200 (Wed, 09 Apr 2008) | 2 lines Changed paths: M /trunk/ecm-ecm.h M /trunk/main.c M /trunk/sets_long.c Changed some #if to #ifdef . Include alloca.h in sets_long.c ------------------------------------------------------------------------ r1185 | kruppa | 2008-04-09 16:15:28 +0200 (Wed, 09 Apr 2008) | 3 lines Changed paths: M /trunk/configure.in Check for setpriority() function (code tested for HAVE_SETPRIORITY, but configure never defined it) ------------------------------------------------------------------------ r1184 | kruppa | 2008-04-09 16:13:10 +0200 (Wed, 09 Apr 2008) | 3 lines Changed paths: M /trunk/tune.c Replaced %zd conversion in printf() by %ld with a typecast, z modifier is C99 and probably not very portable. ------------------------------------------------------------------------ r1183 | kruppa | 2008-04-09 15:33:41 +0200 (Wed, 09 Apr 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c Fixed compiler warnings, bug (had comparison instead of assignment) ------------------------------------------------------------------------ r1182 | kruppa | 2008-04-09 15:32:10 +0200 (Wed, 09 Apr 2008) | 2 lines Changed paths: M /trunk/sp.h Fixed data type (was unsigned long instead of sp_t), removed unused variables ------------------------------------------------------------------------ r1181 | kruppa | 2008-04-09 15:30:47 +0200 (Wed, 09 Apr 2008) | 3 lines Changed paths: M /trunk/mul_fft.c Fixed some compiler warnings. Removed inclusion of longlong.h, as it expects certain data types for arithmetic on one-word integers to be defined. ------------------------------------------------------------------------ r1180 | kruppa | 2008-04-09 15:28:02 +0200 (Wed, 09 Apr 2008) | 2 lines Changed paths: M /trunk/ecm-impl.h Made parameter types for ntt_*() functions use the typedefs from sp.h ------------------------------------------------------------------------ r1179 | zimmerma | 2008-04-09 14:23:04 +0200 (Wed, 09 Apr 2008) | 4 lines Changed paths: M /trunk/polyeval.c M /trunk/stage2.c removed extra argument of polyeval() in stage2.c polyeval.c: polyeval() and polyeval_tellegen() must always be compiled, since they are needed in tune.c. ------------------------------------------------------------------------ r1178 | kruppa | 2008-04-09 10:40:54 +0200 (Wed, 09 Apr 2008) | 3 lines Changed paths: M /trunk/spm.c Fixed wrong type which broke arithmetic on machines where unsigned long has less width than sp_t. ------------------------------------------------------------------------ r1177 | kruppa | 2008-04-08 18:48:10 +0200 (Tue, 08 Apr 2008) | 2 lines Changed paths: M /trunk/sp.h Fixed typo in precompiler condition ------------------------------------------------------------------------ r1176 | kruppa | 2008-04-08 17:46:54 +0200 (Tue, 08 Apr 2008) | 3 lines Changed paths: M /trunk/ecm-impl.h M /trunk/ecm2.c M /trunk/pm1.c M /trunk/pp1.c M /trunk/stage2.c Renamed mis-named function. Fixed output of uninitialised "dickson_a" value in ECM stage 2. ------------------------------------------------------------------------ r1175 | kruppa | 2008-04-08 16:51:35 +0200 (Tue, 08 Apr 2008) | 3 lines Changed paths: M /trunk/mul_fft.c Fixed bug in MPN_FFT_STORE: the non-asm version wrote too little data due to bad pointer type. ------------------------------------------------------------------------ r1174 | zimmerma | 2008-04-08 15:55:08 +0200 (Tue, 08 Apr 2008) | 2 lines Changed paths: M /trunk/mul_fft.c added MPN_ZERO if not defined ------------------------------------------------------------------------ r1173 | kruppa | 2008-04-08 15:02:33 +0200 (Tue, 08 Apr 2008) | 3 lines Changed paths: M /trunk/main.c M /trunk/mul_fft.c M /trunk/sp.h Included some changes suggested by Brian Gladman to allow compiling under MS VC 9. ------------------------------------------------------------------------ r1172 | kruppa | 2008-04-08 13:03:51 +0200 (Tue, 08 Apr 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c Fixed some "variable sized arrays" ------------------------------------------------------------------------ r1171 | zimmerma | 2008-04-08 12:55:37 +0200 (Tue, 08 Apr 2008) | 2 lines Changed paths: M /trunk/longlong.h M /trunk/mul_fft.c define dummy versions of __builtin_constant_p and __builtin_expect when not gcc ------------------------------------------------------------------------ r1170 | kruppa | 2008-04-08 12:49:05 +0200 (Tue, 08 Apr 2008) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/pm1fs2.c Fixed some compiler warnings with -pedantic ------------------------------------------------------------------------ r1169 | zimmerma | 2008-04-08 12:00:42 +0200 (Tue, 08 Apr 2008) | 2 lines Changed paths: M /trunk/TODO added TODO item ------------------------------------------------------------------------ r1168 | zimmerma | 2008-04-08 11:38:43 +0200 (Tue, 08 Apr 2008) | 5 lines Changed paths: M /trunk/Makefile.am M /trunk/asmredc.h M /trunk/getprime.c M /trunk/mul_fft.c Makefile.am: added -pedantic asmredc.h, mul_fft.c, getprime.c: - changed C++ style comments //... to C style /* ... */ - fixed type declarations inbetween instructions ------------------------------------------------------------------------ r1167 | jasonp | 2008-04-08 04:30:45 +0200 (Tue, 08 Apr 2008) | 1 line Changed paths: M /trunk/sp.h force the size of small prime residues to explicitly match up with a GMP word ------------------------------------------------------------------------ r1166 | zimmerma | 2008-04-03 21:44:25 +0200 (Thu, 03 Apr 2008) | 3 lines Changed paths: M /trunk/ecm.c reduce the number of tried Lucas chains in PRAC for small numbers (thanks to Pierrick who noticed the overhead with MPFQ) ------------------------------------------------------------------------ r1165 | kruppa | 2008-03-25 19:40:32 +0100 (Tue, 25 Mar 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c M /trunk/pp1.c Some unsaved edits that were missing in last commit ------------------------------------------------------------------------ r1164 | kruppa | 2008-03-25 19:33:25 +0100 (Tue, 25 Mar 2008) | 4 lines Changed paths: M /trunk/configure.in M /trunk/ecm-impl.h M /trunk/median.c M /trunk/pm1.c M /trunk/pm1fs2.c M /trunk/pp1.c Added code for automatic parameters selection for new P-1 and P+1 stage 2 according to available memory. Report if there's an error (e.g. out-of-memory condition) in TMulKS(). ------------------------------------------------------------------------ r1163 | jasonp | 2008-03-23 00:29:40 +0100 (Sun, 23 Mar 2008) | 4 lines Changed paths: M /trunk/ntt_gfp.c M /trunk/sp.h M /trunk/spv.c Use 31-bit primes for the NTT on 32-bit systems. This is slightly slower than using 30-bit primes but allows arithmetic on larger polynomials ------------------------------------------------------------------------ r1162 | kruppa | 2008-03-21 18:49:16 +0100 (Fri, 21 Mar 2008) | 3 lines Changed paths: M /trunk/mpzspm.c M /trunk/pm1.c M /trunk/pp1.c M /trunk/sp.h New P+1 and P-1 stage 2 now checks maximal supported transform length for NTT, reduces lmax accordingly ------------------------------------------------------------------------ r1161 | kruppa | 2008-03-20 15:39:11 +0100 (Thu, 20 Mar 2008) | 14 lines Changed paths: M /trunk/Makefile.am M /trunk/configure.in M /trunk/main.c M /trunk/pm1fs2.c Andreas Schickel reported a problem with new stage 2 on 32 bit: there are so few suitable NTT primes < 2^30 that only short transform lengths (~2^19) are possible. Instead of requiring primes == 1 (mod 3l), code for building F with weighted convolutions is separate now and uses primes == 1 (mod 3l/4), the convolution for multipoint evaluation uses primes == 1 (mod l). This improves the situation a little, but the possible transform lengths (~20^20) are still rather small. Also fixed out of bound array access in ntt_sqr_reciprocal(). Print time stamp for each curve now instead of for each new number, requested by Andreas. ------------------------------------------------------------------------ r1160 | kruppa | 2008-03-20 11:49:17 +0100 (Thu, 20 Mar 2008) | 2 lines Changed paths: M /trunk/mul_fft.c Enable assertions if so specified in config.h (were always off!) ------------------------------------------------------------------------ r1159 | kruppa | 2008-03-20 11:39:56 +0100 (Thu, 20 Mar 2008) | 2 lines Changed paths: M /trunk/mul_fft.c Added two missing ASSERT for NULL pointer after alloc ------------------------------------------------------------------------ r1158 | jasonp | 2008-03-20 04:16:17 +0100 (Thu, 20 Mar 2008) | 1 line Changed paths: M /trunk/ntt_gfp.c M /trunk/sp.h M /trunk/spm.c do not store NTT scratch array on the stack ------------------------------------------------------------------------ r1157 | kruppa | 2008-03-19 19:52:40 +0100 (Wed, 19 Mar 2008) | 5 lines Changed paths: M /trunk/pm1fs2.c Revert bug-"fix" from just before, it breaks the NTT. Test for NULL pointer from mpzspm_init(). Changed diagnostic output during parameter selection to TRACE level. ------------------------------------------------------------------------ r1156 | kruppa | 2008-03-19 19:29:52 +0100 (Wed, 19 Mar 2008) | 3 lines Changed paths: M /trunk/mpzspm.c M /trunk/pm1fs2.c Fixed bug: required transform length overestimated, reduced possible transform length on 32 bit machines ------------------------------------------------------------------------ r1155 | kruppa | 2008-03-19 17:57:20 +0100 (Wed, 19 Mar 2008) | 3 lines Changed paths: M /trunk/Makefile.am Added ecm-params.h.pentium3 and ecm-params.h.pentium4 to list of files to put in distribution. ------------------------------------------------------------------------ r1154 | kruppa | 2008-03-19 16:30:28 +0100 (Wed, 19 Mar 2008) | 2 lines Changed paths: M /trunk/Makefile.am Added rule to remove config.m4 on distclean, to make distcheck work ------------------------------------------------------------------------ r1153 | zimmerma | 2008-03-19 13:51:39 +0100 (Wed, 19 Mar 2008) | 2 lines Changed paths: A /trunk/ecm-params.h.pentiumm tuning parameters for pentium M ------------------------------------------------------------------------ r1152 | kruppa | 2008-03-19 12:36:38 +0100 (Wed, 19 Mar 2008) | 2 lines Changed paths: M /trunk/AUTHORS M /trunk/NEWS Added Jason, fixed typo ------------------------------------------------------------------------ r1151 | kruppa | 2008-03-19 11:46:37 +0100 (Wed, 19 Mar 2008) | 2 lines Changed paths: A /trunk/ecm-params.h.pentium4 Parameter file for Pentium 4 ------------------------------------------------------------------------ r1150 | kruppa | 2008-03-19 11:45:55 +0100 (Wed, 19 Mar 2008) | 2 lines Changed paths: M /trunk/ecm-params.h.athlon64 Updated parameters for new NTT ------------------------------------------------------------------------ r1149 | kruppa | 2008-03-19 11:45:40 +0100 (Wed, 19 Mar 2008) | 2 lines Changed paths: M /trunk/NEWS Updated NEWS for 6.2 ------------------------------------------------------------------------ r1148 | kruppa | 2008-03-18 20:18:06 +0100 (Tue, 18 Mar 2008) | 3 lines Changed paths: M /trunk/configure.in Added --enable-sse2 option. Default is using SSE2 on Pentium 4, not using it on other architectures. ------------------------------------------------------------------------ r1147 | kruppa | 2008-03-18 20:16:59 +0100 (Tue, 18 Mar 2008) | 4 lines Changed paths: M /trunk/ecm-impl.h M /trunk/pm1.c M /trunk/pm1fs2.c M /trunk/pp1.c Made -maxmem work with new P+-1 stage 2, also chooses one pass evaluation (computing convolutions for both coordinates, adding in transform space to save an inverse transform) for P+1 if memory allows. ------------------------------------------------------------------------ r1146 | kruppa | 2008-03-14 15:14:15 +0100 (Fri, 14 Mar 2008) | 4 lines Changed paths: M /trunk/sp.h Re-wrote asm macro for sp_add, sp_sub. Fixes (I hope!) a subtle bug in operand constraints (edx could be used as input operand in spite of being marked "clobbered") and I hope is a bit faster, too. ------------------------------------------------------------------------ r1145 | kruppa | 2008-03-13 15:19:32 +0100 (Thu, 13 Mar 2008) | 2 lines Changed paths: M /trunk/configure.in autoconf doesn't like space in macro ------------------------------------------------------------------------ r1144 | kruppa | 2008-03-13 14:55:54 +0100 (Thu, 13 Mar 2008) | 2 lines Changed paths: M /trunk/Fgw.c Thrown out lots of dead code ------------------------------------------------------------------------ r1143 | kruppa | 2008-03-13 14:52:22 +0100 (Thu, 13 Mar 2008) | 4 lines Changed paths: M /trunk/ecm.c M /trunk/lucas.c Replaced floating-point constants for PRAC by their reciprocals and division by multiplication. Some speedup for P+1 with very small numbers, for other cases very little speedup. ------------------------------------------------------------------------ r1142 | kruppa | 2008-02-27 18:45:43 +0100 (Wed, 27 Feb 2008) | 3 lines Changed paths: M /trunk/Fgw.c Added assert to GWNUM ECM stage 1 interface to test that output residue fits in allocated space. ------------------------------------------------------------------------ r1141 | kruppa | 2008-02-27 15:54:00 +0100 (Wed, 27 Feb 2008) | 4 lines Changed paths: M /trunk/Fgw.c M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/mpmod.c M /trunk/schoen_strass.c Removed code to use GWNUM for individual multiplications modulo Fermat numbers - I think it was broken, never got used much anyway and if we really want it, will need a rewrite from scratch ------------------------------------------------------------------------ r1140 | kruppa | 2008-02-27 15:43:21 +0100 (Wed, 27 Feb 2008) | 2 lines Changed paths: M /trunk/ecm.c Removed some remaining dead Montgomery roots code ------------------------------------------------------------------------ r1139 | kruppa | 2008-02-27 12:03:47 +0100 (Wed, 27 Feb 2008) | 3 lines Changed paths: M /trunk/pm1fs2.c Added check of result to ntt_sqr_reciprocal(), hoping to find an elusive bug that appears on my AMD Athlon ------------------------------------------------------------------------ r1138 | kruppa | 2008-02-27 01:36:13 +0100 (Wed, 27 Feb 2008) | 2 lines Changed paths: M /trunk/mpzspv.c Fixed segfault in mpzspv_verify(). ------------------------------------------------------------------------ r1137 | kruppa | 2008-02-26 23:46:13 +0100 (Tue, 26 Feb 2008) | 2 lines Changed paths: M /trunk/x86_64/README Added comment on generating mulredc{1,2}.asm from Python script ------------------------------------------------------------------------ r1136 | kruppa | 2008-02-26 23:42:50 +0100 (Tue, 26 Feb 2008) | 2 lines Changed paths: M /trunk/x86_64/Makefile.am M /trunk/x86_64/Makefile.dev M /trunk/x86_64/generate_all Updated Makefiles/scripts to make mulredc asm code from autogen.py/mulredc.m4 ------------------------------------------------------------------------ r1135 | kruppa | 2008-02-26 23:24:25 +0100 (Tue, 26 Feb 2008) | 2 lines Changed paths: M /trunk/x86_64/mulredc1.asm mulredc1.asm as generated by Python script (without my old comment edits) ------------------------------------------------------------------------ r1134 | kruppa | 2008-02-26 19:59:22 +0100 (Tue, 26 Feb 2008) | 3 lines Changed paths: M /trunk/configure.in M /trunk/ntt_gfp.c M /trunk/spv.c configure now defines HAS_SSE2 if running on Pentium 4 so NTT uses Jason Papadopoulos' SSE2 code. ------------------------------------------------------------------------ r1133 | kruppa | 2008-02-26 19:21:56 +0100 (Tue, 26 Feb 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c Parallelized pp1_sequence_h(), last function that hadn't been done yet ------------------------------------------------------------------------ r1132 | kruppa | 2008-02-22 17:53:40 +0100 (Fri, 22 Feb 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c M /trunk/sets_long.c Fixed typo, spelt out ANTS ------------------------------------------------------------------------ r1131 | kruppa | 2008-02-22 17:42:15 +0100 (Fri, 22 Feb 2008) | 4 lines Changed paths: M /trunk/auxlib.c M /trunk/mpzspm.c M /trunk/mpzspv.c M /trunk/ntt_gfp.c M /trunk/pm1fs2.c M /trunk/sets_long.c M /trunk/sp.c M /trunk/sp.h M /trunk/spm.c M /trunk/spv.c M /trunk/tune.c Included patch by Jason to fix out-of-bounds array access. Updated copyright information. ------------------------------------------------------------------------ r1130 | kruppa | 2008-02-21 19:36:26 +0100 (Thu, 21 Feb 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c Removed dead code, made output of timing info more consistent ------------------------------------------------------------------------ r1129 | kruppa | 2008-02-21 17:18:34 +0100 (Thu, 21 Feb 2008) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/mpmod.c Changed _ui funtions to take unsigned longs, as GMP does ------------------------------------------------------------------------ r1128 | kruppa | 2008-02-21 15:07:54 +0100 (Thu, 21 Feb 2008) | 2 lines Changed paths: M /trunk/getprime.c Fixed access to uninitialised data in getprime() ------------------------------------------------------------------------ r1127 | kruppa | 2008-02-21 12:14:35 +0100 (Thu, 21 Feb 2008) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/getprime.c M /trunk/mpmod.c M /trunk/mul_fft.c Fixed some compiler warnings. ------------------------------------------------------------------------ r1126 | kruppa | 2008-02-21 11:42:33 +0100 (Thu, 21 Feb 2008) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/ecm2.c M /trunk/pm1.c M /trunk/pp1.c M /trunk/stage2.c Fixed ugly typecast ------------------------------------------------------------------------ r1125 | kruppa | 2008-02-20 15:34:07 +0100 (Wed, 20 Feb 2008) | 3 lines Changed paths: M /trunk/ecm-impl.h M /trunk/ecm2.c Removed code for ECM stage 2 roots in Montgomery form. Never worked and probably never will. ------------------------------------------------------------------------ r1124 | kruppa | 2008-02-20 15:19:11 +0100 (Wed, 20 Feb 2008) | 2 lines Changed paths: M /trunk/mpzspm.c M /trunk/mpzspv.c M /trunk/ntt_gfp.c M /trunk/pm1fs2.c M /trunk/sp.c M /trunk/sp.h M /trunk/spm.c M /trunk/spv.c M /trunk/tune.c Merging Jason Papadopoulos' new SSE2 NTT code ------------------------------------------------------------------------ r1123 | kruppa | 2008-02-20 15:18:35 +0100 (Wed, 20 Feb 2008) | 2 lines Changed paths: M /trunk/techdocs/convolv.tex Cleanups ------------------------------------------------------------------------ r1122 | kruppa | 2008-02-20 13:56:45 +0100 (Wed, 20 Feb 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c Some small cleanup of timing/residue output ------------------------------------------------------------------------ r1121 | kruppa | 2008-02-19 23:53:22 +0100 (Tue, 19 Feb 2008) | 2 lines Changed paths: M /trunk/stage2.c Small changes (mostly comments) ------------------------------------------------------------------------ r1120 | kruppa | 2008-02-19 19:03:30 +0100 (Tue, 19 Feb 2008) | 2 lines Changed paths: M /trunk/pm1.c Made new stage 2 the default. Fixed small memory leak. ------------------------------------------------------------------------ r1119 | kruppa | 2008-02-19 18:59:20 +0100 (Tue, 19 Feb 2008) | 3 lines Changed paths: M /trunk/pp1.c Fixed bug in computing roots of G when i0 < -6. Made the new stage 2 the default. Fixed small memory leak. ------------------------------------------------------------------------ r1118 | zimmerma | 2008-02-18 18:36:44 +0100 (Mon, 18 Feb 2008) | 2 lines Changed paths: M /trunk/README.dev A /trunk/testlong.pp1 added new (long) test file for P+1 ------------------------------------------------------------------------ r1117 | zimmerma | 2008-02-18 14:38:34 +0100 (Mon, 18 Feb 2008) | 3 lines Changed paths: M /trunk/test.pp1 added test cases that exhibit bug in 6.1.3 with polynomials of degree > 1 (and maybe in earlier versions) ------------------------------------------------------------------------ r1116 | kruppa | 2008-02-18 11:46:12 +0100 (Mon, 18 Feb 2008) | 2 lines Changed paths: M /trunk/techdocs/buildpoly.tex Something about converting polynomial bases, forgot what's it about by now ------------------------------------------------------------------------ r1115 | zimmerma | 2008-02-12 23:40:53 +0100 (Tue, 12 Feb 2008) | 2 lines Changed paths: M /trunk/main.c update P+1 top-ten bound ------------------------------------------------------------------------ r1114 | jasonp | 2008-01-30 08:04:04 +0100 (Wed, 30 Jan 2008) | 1 line Changed paths: M /trunk/Makefile.am allow 'make check' to work in MinGW ------------------------------------------------------------------------ r1113 | kruppa | 2008-01-29 16:57:28 +0100 (Tue, 29 Jan 2008) | 4 lines Changed paths: M /trunk/pm1fs2.c Fixed small memory leak (S_2). Allocate enough memory for mpz_t's to avoid reallocs. ------------------------------------------------------------------------ r1112 | kruppa | 2008-01-29 16:54:45 +0100 (Tue, 29 Jan 2008) | 2 lines Changed paths: M /trunk/mpmod.c Fixed compiler warning: parantheses around assignment as truth value ------------------------------------------------------------------------ r1111 | kruppa | 2008-01-29 16:53:58 +0100 (Tue, 29 Jan 2008) | 2 lines Changed paths: M /trunk/mpzspm.c Minor changes: replace mpz_add by mpz_mul_2exp, changes to comments ------------------------------------------------------------------------ r1110 | kruppa | 2008-01-23 18:09:04 +0100 (Wed, 23 Jan 2008) | 2 lines Changed paths: M /trunk/mpzspv.c Multi-threading pragmas for mpzspv_from_mpzv() ------------------------------------------------------------------------ r1109 | kruppa | 2008-01-21 23:20:26 +0100 (Mon, 21 Jan 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c A little more parallelism in ntt_sqr_recip() to improve timings ------------------------------------------------------------------------ r1108 | kruppa | 2008-01-21 20:11:24 +0100 (Mon, 21 Jan 2008) | 2 lines Changed paths: M /trunk/auxlib.c M /trunk/ecm-impl.h M /trunk/mpzspv.c M /trunk/pm1fs2.c M /trunk/sp.h Some more parallelization to get nicer timings for the final paper ------------------------------------------------------------------------ r1107 | kruppa | 2008-01-21 20:10:57 +0100 (Mon, 21 Jan 2008) | 3 lines Changed paths: M /trunk/mpmod.c ECM_MOD_MPZ reduction uses aux_modulus now, but that was not copied by mpmod_copy(). Fixed. ------------------------------------------------------------------------ r1106 | kruppa | 2008-01-21 14:57:23 +0100 (Mon, 21 Jan 2008) | 2 lines Changed paths: A /trunk/phiP.gp A pari script to make P values for new stage 2 ------------------------------------------------------------------------ r1105 | kruppa | 2008-01-21 01:16:13 +0100 (Mon, 21 Jan 2008) | 4 lines Changed paths: M /trunk/pm1.c M /trunk/pm1fs2.c M /trunk/pp1.c M /trunk/sp.h Changed ntt_sqr_recip() to use Montgomery's idea of using a primitive 3rd root of unity for the weight signal. ------------------------------------------------------------------------ r1104 | zimmerma | 2008-01-17 08:52:20 +0100 (Thu, 17 Jan 2008) | 2 lines Changed paths: M /trunk/mpmod.c fixed a bug in mpres_mpz_mod when n=1 ------------------------------------------------------------------------ r1103 | zimmerma | 2008-01-16 11:56:32 +0100 (Wed, 16 Jan 2008) | 2 lines Changed paths: M /trunk/mpmod.c fixed bug in mpres_mpz_mod in case T has more than 2n limbs ------------------------------------------------------------------------ r1102 | zimmerma | 2008-01-16 10:42:19 +0100 (Wed, 16 Jan 2008) | 5 lines Changed paths: M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/ecm.h M /trunk/ecm2.c M /trunk/main.c M /trunk/mpmod.c M /trunk/pp1.c mpmod_init code was duplicated: now call mpmod_init directly this also solved some inconsistencies in arithmetic options: in some cases ECM did not use mpz_mod although -mpzmod was given added new algorithm which speeds up -mpzmod arithmetic ------------------------------------------------------------------------ r1101 | kruppa | 2008-01-15 18:02:11 +0100 (Tue, 15 Jan 2008) | 2 lines Changed paths: M /trunk/mpzspm.c M /trunk/spm.c Allow computation of roots of unity whose order isn't a power of 2 ------------------------------------------------------------------------ r1100 | zimmerma | 2008-01-15 15:45:23 +0100 (Tue, 15 Jan 2008) | 3 lines Changed paths: M /trunk/TODO A /trunk/TODO.kunz added suggestions from Thomas Kunz, to make it easier to port GMP-ECM to specific architectures ------------------------------------------------------------------------ r1099 | kruppa | 2008-01-14 15:53:22 +0100 (Mon, 14 Jan 2008) | 3 lines Changed paths: M /trunk/pm1fs2.c Fixed bug in computation of g sequence for P-1 with many threads: negative value could get assigned to unsigned long. ------------------------------------------------------------------------ r1098 | kruppa | 2008-01-13 11:38:53 +0100 (Sun, 13 Jan 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c Modified ntt_sqr_recip to work as described in the paper, parallelized it ------------------------------------------------------------------------ r1097 | zimmerma | 2008-01-13 10:21:21 +0100 (Sun, 13 Jan 2008) | 2 lines Changed paths: M /trunk/TODO A /trunk/TODO.fat added suggestion from Peter Montgomery ------------------------------------------------------------------------ r1096 | kruppa | 2008-01-12 15:37:28 +0100 (Sat, 12 Jan 2008) | 3 lines Changed paths: M /trunk/ecm-impl.h M /trunk/pm1.c M /trunk/pm1fs2.c M /trunk/pp1.c If NTT is used, ensure s_1 < lmax/2 so that poly degrees stay just below a power of two while bulding F. ------------------------------------------------------------------------ r1095 | kruppa | 2008-01-11 20:37:24 +0100 (Fri, 11 Jan 2008) | 2 lines Changed paths: M /trunk/techdocs/mulrecip.tex More details on DWT mul for RLPs ------------------------------------------------------------------------ r1094 | kruppa | 2008-01-10 19:47:02 +0100 (Thu, 10 Jan 2008) | 2 lines Changed paths: A /trunk/techdocs/mulrecip.tex D /trunk/techdocs/mulrecipdwt.tex Added something on multiplying RLP without DWT/NTT ------------------------------------------------------------------------ r1093 | kruppa | 2008-01-10 19:44:08 +0100 (Thu, 10 Jan 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c Rewrote list_mul_reciprocal() to use less temp memory. ------------------------------------------------------------------------ r1092 | kruppa | 2008-01-09 22:44:22 +0100 (Wed, 09 Jan 2008) | 4 lines Changed paths: M /trunk/Makefile.am A /trunk/auxarith.c M /trunk/auxlib.c M /trunk/ecm-impl.h M /trunk/ecm2.c M /trunk/pm1fs2.c Moved functions for simple unsigned long arithmetic to auxarith.c Added function for squaring an RLP with a discrete weighted NTT of half length. ------------------------------------------------------------------------ r1091 | kruppa | 2008-01-07 23:04:55 +0100 (Mon, 07 Jan 2008) | 2 lines Changed paths: M /trunk/techdocs/mulrecipdwt.tex Small changes ------------------------------------------------------------------------ r1090 | kruppa | 2008-01-07 22:45:01 +0100 (Mon, 07 Jan 2008) | 2 lines Changed paths: A /trunk/techdocs/mulrecipdwt.tex A note on (hopefully) multiplying RLPs with a weighted FFT ------------------------------------------------------------------------ r1089 | kruppa | 2008-01-07 22:43:23 +0100 (Mon, 07 Jan 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c New function for squaring RLPs, simpler than general multiply one ------------------------------------------------------------------------ r1088 | kruppa | 2008-01-07 22:41:13 +0100 (Mon, 07 Jan 2008) | 2 lines Changed paths: M /trunk/pm1fs2.c M /trunk/sets_long.c Some more cleanups ------------------------------------------------------------------------ r1087 | kruppa | 2008-01-06 22:12:24 +0100 (Sun, 06 Jan 2008) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/pm1fs2.c M /trunk/sets_long.c M /trunk/x86_64/README M /trunk/x86_64/test_mulredc.c More cleanup and bugfixes ------------------------------------------------------------------------ r1086 | kruppa | 2008-01-02 11:22:55 +0100 (Wed, 02 Jan 2008) | 2 lines Changed paths: M /trunk/Makefile.am M /trunk/ecm-impl.h M /trunk/pm1fs2.c M /trunk/sets_long.c More code cleanup in sets_long.c, made pm1fs2.c use those functions ------------------------------------------------------------------------ r1085 | zimmerma | 2007-12-25 17:20:25 +0100 (Tue, 25 Dec 2007) | 2 lines Changed paths: M /trunk/main.c updated champions thresholds ------------------------------------------------------------------------ r1084 | kruppa | 2007-12-21 16:20:41 +0100 (Fri, 21 Dec 2007) | 3 lines Changed paths: M /trunk/ecm-impl.h A /trunk/sets_long.c Rewrote most of the operations on sets of longs for clarity, moved them into own source file. ------------------------------------------------------------------------ r1083 | kruppa | 2007-12-20 10:37:57 +0100 (Thu, 20 Dec 2007) | 2 lines Changed paths: M /trunk/mpmod.c Fixed bug in mpmod_copy() with 2^n-1 numbers. ------------------------------------------------------------------------ r1082 | kruppa | 2007-12-19 11:53:59 +0100 (Wed, 19 Dec 2007) | 4 lines Changed paths: M /trunk/mul_fft.c Fixed unparenthesized parameter in __GMP_ALLOCATE_FUNC_LIMBS macro. Changed copy of inputs in mpn_mul_fft_aux() to use malloc instead of alloca. Added ASSERT != NULL to temp space allocs. ------------------------------------------------------------------------ r1081 | kruppa | 2007-12-18 11:37:01 +0100 (Tue, 18 Dec 2007) | 2 lines Changed paths: M /trunk/factor.c Updated default parameters so ugly hack works outside of GMP-ECM ------------------------------------------------------------------------ r1080 | kruppa | 2007-12-18 11:00:28 +0100 (Tue, 18 Dec 2007) | 2 lines Changed paths: M /trunk/factor.c M /trunk/main.c Added ugly hack to pass B2scale parameter to library ------------------------------------------------------------------------ r1079 | kruppa | 2007-12-17 21:44:14 +0100 (Mon, 17 Dec 2007) | 2 lines Changed paths: M /trunk/x86_64/README Updated README for new m4 script. ------------------------------------------------------------------------ r1078 | zimmerma | 2007-12-17 14:25:56 +0100 (Mon, 17 Dec 2007) | 2 lines Changed paths: M /trunk/TODO reorganized, added a table of contents, and added a section "installation" ------------------------------------------------------------------------ r1077 | kruppa | 2007-12-17 13:38:13 +0100 (Mon, 17 Dec 2007) | 3 lines Changed paths: M /trunk/Makefile.am M /trunk/configure.in M /trunk/x86_64/mulredc.m4 M /trunk/x86_64/mulredc10.asm M /trunk/x86_64/mulredc11.asm M /trunk/x86_64/mulredc12.asm M /trunk/x86_64/mulredc13.asm M /trunk/x86_64/mulredc14.asm M /trunk/x86_64/mulredc15.asm M /trunk/x86_64/mulredc16.asm M /trunk/x86_64/mulredc17.asm M /trunk/x86_64/mulredc18.asm M /trunk/x86_64/mulredc19.asm M /trunk/x86_64/mulredc20.asm M /trunk/x86_64/mulredc3.asm M /trunk/x86_64/mulredc4.asm M /trunk/x86_64/mulredc5.asm M /trunk/x86_64/mulredc6.asm M /trunk/x86_64/mulredc7.asm M /trunk/x86_64/mulredc8.asm M /trunk/x86_64/mulredc9.asm If GMP is linked statically, link mpmod.o and GMP first to put speed critical functions close together, hoping to avoid cache collisions. ------------------------------------------------------------------------ r1076 | zimmerma | 2007-12-14 21:45:36 +0100 (Fri, 14 Dec 2007) | 4 lines Changed paths: M /trunk/Makefile.am Hard-coded compilation line for alternate binary (ecm2) with speed-critical routines close together. Should be removed once somebody figures out how to do this properly. ------------------------------------------------------------------------ r1075 | kruppa | 2007-12-13 15:24:22 +0100 (Thu, 13 Dec 2007) | 2 lines Changed paths: M /trunk/x86_64/mulredc.m4 M /trunk/x86_64/mulredc10.asm M /trunk/x86_64/mulredc11.asm M /trunk/x86_64/mulredc12.asm M /trunk/x86_64/mulredc13.asm M /trunk/x86_64/mulredc14.asm M /trunk/x86_64/mulredc15.asm M /trunk/x86_64/mulredc16.asm M /trunk/x86_64/mulredc17.asm M /trunk/x86_64/mulredc18.asm M /trunk/x86_64/mulredc19.asm M /trunk/x86_64/mulredc20.asm M /trunk/x86_64/mulredc3.asm M /trunk/x86_64/mulredc4.asm M /trunk/x86_64/mulredc5.asm M /trunk/x86_64/mulredc6.asm M /trunk/x86_64/mulredc7.asm M /trunk/x86_64/mulredc8.asm M /trunk/x86_64/mulredc9.asm More optimization of mulredc, up to 4% faster ------------------------------------------------------------------------ r1074 | kruppa | 2007-12-05 16:08:00 +0100 (Wed, 05 Dec 2007) | 4 lines Changed paths: M /trunk/mul_fft.c Some functions caused symbol conflict when linking GMP statically. Made those functions "static" in mul_fft.c, as they do not seem to be used outside of that file. ------------------------------------------------------------------------ r1073 | kruppa | 2007-12-05 15:25:20 +0100 (Wed, 05 Dec 2007) | 2 lines Changed paths: M /trunk/x86_64/bench.c M /trunk/x86_64/mulredc.m4 M /trunk/x86_64/mulredc10.asm M /trunk/x86_64/mulredc11.asm M /trunk/x86_64/mulredc12.asm M /trunk/x86_64/mulredc13.asm M /trunk/x86_64/mulredc14.asm M /trunk/x86_64/mulredc15.asm M /trunk/x86_64/mulredc16.asm M /trunk/x86_64/mulredc17.asm M /trunk/x86_64/mulredc18.asm M /trunk/x86_64/mulredc19.asm M /trunk/x86_64/mulredc20.asm M /trunk/x86_64/mulredc3.asm M /trunk/x86_64/mulredc4.asm M /trunk/x86_64/mulredc5.asm M /trunk/x86_64/mulredc6.asm M /trunk/x86_64/mulredc7.asm M /trunk/x86_64/mulredc8.asm M /trunk/x86_64/mulredc9.asm M /trunk/x86_64/test_mulredc.c Fixed comments to match code ------------------------------------------------------------------------ r1072 | kruppa | 2007-12-05 10:19:31 +0100 (Wed, 05 Dec 2007) | 3 lines Changed paths: M /trunk/mpmod.c Added an assertion to modmul_basecase which compares results with redc_basecase. Enable with -DWANT_ASSERT_EXPENSIVE ------------------------------------------------------------------------ r1071 | kruppa | 2007-12-05 10:10:32 +0100 (Wed, 05 Dec 2007) | 2 lines Changed paths: M /trunk/ntt_gfp.c Added DCT function, but does not work correctly yet - output is not a DCT-II ------------------------------------------------------------------------ r1070 | kruppa | 2007-12-05 10:09:19 +0100 (Wed, 05 Dec 2007) | 3 lines Changed paths: M /trunk/x86_64/mulredc.m4 Fixed m4 quotes to allow generation of .asm files (which get processed by m4 again during compilation) ------------------------------------------------------------------------ r1069 | kruppa | 2007-12-04 18:19:36 +0100 (Tue, 04 Dec 2007) | 4 lines Changed paths: M /trunk/x86_64/mulredc10.asm M /trunk/x86_64/mulredc11.asm M /trunk/x86_64/mulredc12.asm M /trunk/x86_64/mulredc13.asm M /trunk/x86_64/mulredc14.asm M /trunk/x86_64/mulredc15.asm M /trunk/x86_64/mulredc16.asm M /trunk/x86_64/mulredc17.asm M /trunk/x86_64/mulredc18.asm M /trunk/x86_64/mulredc19.asm M /trunk/x86_64/mulredc20.asm M /trunk/x86_64/mulredc3.asm M /trunk/x86_64/mulredc4.asm M /trunk/x86_64/mulredc5.asm M /trunk/x86_64/mulredc6.asm M /trunk/x86_64/mulredc7.asm M /trunk/x86_64/mulredc8.asm M /trunk/x86_64/mulredc9.asm Assembler files generated with m4 -DLENGTH=3 mulredc.m4 > mulredc3.asm etc. ------------------------------------------------------------------------ r1068 | kruppa | 2007-12-04 17:47:21 +0100 (Tue, 04 Dec 2007) | 2 lines Changed paths: M /trunk/x86_64/mulredc.m4 Somewhat faster. Speedup over Python script generated code is 7-10%. ------------------------------------------------------------------------ r1067 | zimmerma | 2007-12-04 09:39:32 +0100 (Tue, 04 Dec 2007) | 4 lines Changed paths: M /trunk/configure.in M /trunk/ecm-gmp.h M /trunk/ks-multiply.c M /trunk/listz.c M /trunk/mpmod.c M /trunk/schoen_strass.c do not use any more GMP's mpn_mul_fft (which was not public), and always use instead ecm_mpn_mul_fft (included in GMP-ECM, and faster). Yields small speedup for Fermat numbers. ------------------------------------------------------------------------ r1066 | zimmerma | 2007-12-03 21:07:30 +0100 (Mon, 03 Dec 2007) | 5 lines Changed paths: M /trunk/configure.in M /trunk/ecm-params.h.athlon64 A /trunk/ecm-params.h.pentium3 M /trunk/mul_fft.c configure.in: added tuning (ecm-params) for pentium3 mul_fft.c: added default values of parameters ecm-params.h.pentium3: new file with tuned values for pentium M ecm-params.h.athlon64: removed useless values ------------------------------------------------------------------------ r1065 | kruppa | 2007-12-03 21:04:20 +0100 (Mon, 03 Dec 2007) | 4 lines Changed paths: A /trunk/x86_64/mulredc.m4 Rewrite of ASM-generating script for mulredc, this time written in m4. Produces slightly faster (on Opteron) code than the old one, probably can be improved yet. ------------------------------------------------------------------------ r1064 | zimmerma | 2007-12-03 18:08:44 +0100 (Mon, 03 Dec 2007) | 2 lines Changed paths: A /trunk/mul_fft.c new FFT code, adapted for GMP-ECM ------------------------------------------------------------------------ r1063 | zimmerma | 2007-12-03 18:01:36 +0100 (Mon, 03 Dec 2007) | 4 lines Changed paths: M /trunk/Makefile.am M /trunk/ecm-params.h.athlon64 M /trunk/ks-multiply.c incorporated new FFT code into GMP-ECM: yields nice speedup in stage 2 with -no-ntt. Works so far only on x86_64, still remains to create parameter files on other architectures. ------------------------------------------------------------------------ r1062 | zimmerma | 2007-12-02 22:41:26 +0100 (Sun, 02 Dec 2007) | 2 lines Changed paths: M /trunk/TODO added efficiency item ------------------------------------------------------------------------ r1061 | kruppa | 2007-11-26 17:25:28 +0100 (Mon, 26 Nov 2007) | 3 lines Changed paths: M /trunk/countsmooth.c M /trunk/ecm-ecm.h M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/eval.c M /trunk/getprime.c M /trunk/pm1.c M /trunk/pp1.c M /trunk/trial.c P-1 and P+1 now skip from sqrt(B1) to B1done in stage 1 when resuming. Large speedup when increasing large B1 in small steps. ------------------------------------------------------------------------ r1060 | kruppa | 2007-11-26 16:57:42 +0100 (Mon, 26 Nov 2007) | 3 lines Changed paths: M /trunk/pm1fs2.c More P values, malloc() for spv's in parallel region (cpu binding tbd), some changes to comments. ------------------------------------------------------------------------ r1059 | kruppa | 2007-11-16 18:18:30 +0100 (Fri, 16 Nov 2007) | 2 lines Changed paths: M /trunk/pm1fs2.c Parallelized P+1 sequence g and DCT of h ------------------------------------------------------------------------ r1058 | zimmerma | 2007-11-16 15:42:26 +0100 (Fri, 16 Nov 2007) | 2 lines Changed paths: M /trunk/pm1fs2.c removed space before 'ms' to be coherent with previous versions of GMP-ECM ------------------------------------------------------------------------ r1057 | kruppa | 2007-11-16 15:04:50 +0100 (Fri, 16 Nov 2007) | 3 lines Changed paths: M /trunk/configure.in M /trunk/mpmod.c M /trunk/pm1fs2.c Some parallelization in the new P+-1 stage 2. Enable with --enable-openmp. Building f, sequence h for P-1, and sequences g and h for P+1 are TBD. ------------------------------------------------------------------------ r1056 | kruppa | 2007-11-09 17:24:26 +0100 (Fri, 09 Nov 2007) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/mpmod.c Added mpmod_copy() to clone a mpmod_t, i.e. for threads ------------------------------------------------------------------------ r1055 | kruppa | 2007-11-08 18:45:59 +0100 (Thu, 08 Nov 2007) | 3 lines Changed paths: M /trunk/ecm-impl.h M /trunk/pm1fs2.c M /trunk/pp1.c Added P+1 fast stage 2 variant that generates the coordinates of g one at a time. This saves about 30% memory. ------------------------------------------------------------------------ r1054 | kruppa | 2007-11-03 19:42:16 +0100 (Sat, 03 Nov 2007) | 3 lines Changed paths: M /trunk/ecm.c M /trunk/pm1.c M /trunk/pp1.c Fixed bug where resuming and immediately interrupting would produce a lower B1 value in save file than was in input file. ------------------------------------------------------------------------ r1053 | kruppa | 2007-10-30 23:53:18 +0100 (Tue, 30 Oct 2007) | 2 lines Changed paths: M /trunk/main.c Install signal handler only if a save file was specified. ------------------------------------------------------------------------ r1052 | kruppa | 2007-10-30 23:50:18 +0100 (Tue, 30 Oct 2007) | 2 lines Changed paths: M /trunk/pm1fs2.c Fixed parameters selection, was slow and produced suboptimal parameters. ------------------------------------------------------------------------ r1051 | kruppa | 2007-10-30 17:52:30 +0100 (Tue, 30 Oct 2007) | 2 lines Changed paths: M /trunk/pm1fs2.c Fixes bug in parameter selection, loop condition was wrong ------------------------------------------------------------------------ r1050 | kruppa | 2007-10-30 00:15:24 +0100 (Tue, 30 Oct 2007) | 2 lines Changed paths: M /trunk/mpzspv.c M /trunk/pm1fs2.c Cleanups. Convert from NTT in MPZSPV_NORMALISE_STRIDE blocks. ------------------------------------------------------------------------ r1049 | kruppa | 2007-10-29 16:54:27 +0100 (Mon, 29 Oct 2007) | 2 lines Changed paths: M /trunk/mpzspv.c M /trunk/pm1fs2.c Fixes bug where negative value in mpz_t was passed to mpzspv_to_ntt(). ------------------------------------------------------------------------ r1048 | kruppa | 2007-10-29 15:11:28 +0100 (Mon, 29 Oct 2007) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/pm1.c M /trunk/pm1fs2.c Restored non-NTT P-1 stage 2. Small cleanups. ------------------------------------------------------------------------ r1047 | zimmerma | 2007-10-29 14:22:00 +0100 (Mon, 29 Oct 2007) | 2 lines Changed paths: M /trunk/main.c updated champion size for P-1 ------------------------------------------------------------------------ r1046 | kruppa | 2007-10-27 18:42:22 +0200 (Sat, 27 Oct 2007) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/mpzspv.c M /trunk/pm1fs2.c M /trunk/pp1.c M /trunk/sp.h Added NTT variant of new P+1 stage 2 ------------------------------------------------------------------------ r1045 | kruppa | 2007-10-27 18:37:50 +0200 (Sat, 27 Oct 2007) | 2 lines Changed paths: A /trunk/makesmooth.gp PARI script for generating test numbers for P-1 and P+1 ------------------------------------------------------------------------ r1044 | kruppa | 2007-10-24 18:22:57 +0200 (Wed, 24 Oct 2007) | 2 lines Changed paths: M /trunk/pm1fs2.c Reduced temp memory use in list_mul_symmetric() ------------------------------------------------------------------------ r1043 | kruppa | 2007-10-23 15:52:20 +0200 (Tue, 23 Oct 2007) | 3 lines Changed paths: M /trunk/pm1.c M /trunk/pm1fs2.c M /trunk/pp1.c In new P-1 stage 2, only the lmax/2+1 distinct coefficients of the NTT of h are stored now. ------------------------------------------------------------------------ r1042 | kruppa | 2007-10-22 17:42:21 +0200 (Mon, 22 Oct 2007) | 2 lines Changed paths: M /trunk/mpzspm.c M /trunk/pm1fs2.c Small fix and cleanup of pm1_sequence_[gh]. ------------------------------------------------------------------------ r1041 | kruppa | 2007-10-22 05:00:35 +0200 (Mon, 22 Oct 2007) | 3 lines Changed paths: M /trunk/mpzspv.c M /trunk/pm1fs2.c M /trunk/sp.c M /trunk/sp.h M /trunk/spv.c New P-1 stage 2 changed to use NTT for convolution product. Beware: there is a bug, sometimes misses factors. To be fixed. ------------------------------------------------------------------------ r1040 | kruppa | 2007-10-19 17:25:10 +0200 (Fri, 19 Oct 2007) | 2 lines Changed paths: M /trunk/pm1fs2.c Rewrite of pp1_sequence_g() to use only 5 multiplications per g_i ------------------------------------------------------------------------ r1039 | kruppa | 2007-10-18 15:04:15 +0200 (Thu, 18 Oct 2007) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/pm1.c M /trunk/pm1fs2.c M /trunk/pp1.c Take -k parameter into account when choosing parameters for new stage 2 ------------------------------------------------------------------------ r1038 | kruppa | 2007-10-18 15:02:05 +0200 (Thu, 18 Oct 2007) | 2 lines Changed paths: M /trunk/main.c Print hostname of machine it's running on in verbose mode ------------------------------------------------------------------------ r1037 | kruppa | 2007-10-17 02:13:16 +0200 (Wed, 17 Oct 2007) | 3 lines Changed paths: M /trunk/pm1fs2.c Function for exponentiating in extension ring can take mpz_t now, fixes an unsigned long overflow ------------------------------------------------------------------------ r1036 | kruppa | 2007-10-16 16:02:30 +0200 (Tue, 16 Oct 2007) | 4 lines Changed paths: M /trunk/pm1.c M /trunk/pm1fs2.c P+1 does gcd on first coordinate in extension ring of product polynomial now, this fixes the problem with getting 0 in the second coordinate in the last point of evaluation and when m_1 is negative. ------------------------------------------------------------------------ r1035 | zimmerma | 2007-10-13 20:08:15 +0200 (Sat, 13 Oct 2007) | 3 lines Changed paths: M /trunk/eval.c got rid of quadratic memory reallocation in main eval routine (thanks to Alban Nonymous) ------------------------------------------------------------------------ r1034 | zimmerma | 2007-10-11 22:26:05 +0200 (Thu, 11 Oct 2007) | 2 lines Changed paths: M /trunk/auxi.c fixed copyright line ------------------------------------------------------------------------ r1033 | zimmerma | 2007-10-11 22:15:09 +0200 (Thu, 11 Oct 2007) | 2 lines Changed paths: M /trunk/auxi.c fixed efficiency issue in nb_digits: cost was O(n^2) for n-digit input ------------------------------------------------------------------------ r1032 | kruppa | 2007-10-10 13:58:51 +0200 (Wed, 10 Oct 2007) | 2 lines Changed paths: M /trunk/pm1fs2.c Sped up computation of g_i sequence for P-1 ------------------------------------------------------------------------ r1031 | kruppa | 2007-10-09 17:39:27 +0200 (Tue, 09 Oct 2007) | 4 lines Changed paths: M /trunk/ks-multiply.c Changed max(deg(A)+1, deg(B)+1) to min(...) in estimate of product coeff size. Added code to print message if resulting FFT size differs, currently disabled (enable by #define-ing TEST_OLD_S) ------------------------------------------------------------------------ r1030 | kruppa | 2007-10-09 12:02:13 +0200 (Tue, 09 Oct 2007) | 4 lines Changed paths: M /trunk/pm1fs2.c Sped up finding parameters for large lmax and small s_1. Timing output for h_i and g_i sequences. ------------------------------------------------------------------------ r1029 | zimmerma | 2007-10-06 18:13:47 +0200 (Sat, 06 Oct 2007) | 2 lines Changed paths: M /trunk/mpmod.c fixed typos in comments ------------------------------------------------------------------------ r1028 | kruppa | 2007-10-06 15:17:58 +0200 (Sat, 06 Oct 2007) | 2 lines Changed paths: M /trunk/mpmod.c mpres_mul_z_to_z() now always produces non-negative, fully reduced result ------------------------------------------------------------------------ r1027 | kruppa | 2007-10-06 15:17:17 +0200 (Sat, 06 Oct 2007) | 2 lines Changed paths: M /trunk/pm1fs2.c P+1 fast stage 2 aborts if m_1 < 0 until bug is fixed ------------------------------------------------------------------------ r1026 | kruppa | 2007-10-06 14:44:11 +0200 (Sat, 06 Oct 2007) | 2 lines Changed paths: M /trunk/pp1.c Print parameters correctly for new stage 2 ------------------------------------------------------------------------ r1025 | kruppa | 2007-10-05 21:29:18 +0200 (Fri, 05 Oct 2007) | 3 lines Changed paths: M /trunk/pm1fs2.c Fixed last step in P+1 stage 2 (accumulating product) which always has 0 in last term, causing N to be found as factor. TODO: find out why ------------------------------------------------------------------------ r1024 | kruppa | 2007-10-05 21:08:06 +0200 (Fri, 05 Oct 2007) | 3 lines Changed paths: M /trunk/test.pp1 Fixed test where 3^2-4 was a QR so P+1 really did P-1 (which just happened to work as well with the old code). Uses x0=6 now. ------------------------------------------------------------------------ r1023 | kruppa | 2007-10-05 19:32:33 +0200 (Fri, 05 Oct 2007) | 2 lines Changed paths: M /trunk/pp1.c Changed lmax to 2^20. ------------------------------------------------------------------------ r1022 | kruppa | 2007-10-05 17:56:05 +0200 (Fri, 05 Oct 2007) | 2 lines Changed paths: M /trunk/pm1fs2.c M /trunk/pp1.c Fast P+1 stage 2 stared working. More testing and optimization needed. ------------------------------------------------------------------------ r1021 | kruppa | 2007-10-04 17:54:53 +0200 (Thu, 04 Oct 2007) | 3 lines Changed paths: M /trunk/pm1.c M /trunk/pm1fs2.c M /trunk/pp1.c Generating sequences g and h for P+1 (hopefully) works now. Not well optimized yet. ------------------------------------------------------------------------ r1020 | kruppa | 2007-09-27 18:35:11 +0200 (Thu, 27 Sep 2007) | 3 lines Changed paths: M /trunk/pm1fs2.c Fixed bug in maxS(). Fixed bugs in gfp_ext_rn2(). Extended table of P values. Started pp1fs2() function for P+1 stage 2. ------------------------------------------------------------------------ r1019 | kruppa | 2007-09-25 16:15:26 +0200 (Tue, 25 Sep 2007) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/pm1.c M /trunk/pm1fs2.c Choose and print parameters for new P-1 stage 2 at start of pm1(). ------------------------------------------------------------------------ r1018 | kruppa | 2007-09-25 16:07:47 +0200 (Tue, 25 Sep 2007) | 2 lines Changed paths: M /trunk/mpmod.c Moved MPZ_REALLOC from ecm_mulredc_basecase to mpres_mul* functions ------------------------------------------------------------------------ r1017 | kruppa | 2007-09-25 14:53:20 +0200 (Tue, 25 Sep 2007) | 3 lines Changed paths: M /trunk/mpmod.c Made ecm_mulredc_basecase() reallocate space in R instead of failing an assertion ------------------------------------------------------------------------ r1016 | kruppa | 2007-09-24 14:53:34 +0200 (Mon, 24 Sep 2007) | 2 lines Changed paths: M /trunk/pm1fs2.c Fixed bug in computing max(S_1 + S_2) during parameter selection. ------------------------------------------------------------------------ r1015 | kruppa | 2007-09-23 22:33:13 +0200 (Sun, 23 Sep 2007) | 2 lines Changed paths: M /trunk/auxlib.c M /trunk/ecm-impl.h M /trunk/ecm_ntt.c M /trunk/listz.c M /trunk/mpmod.c M /trunk/pm1.c M /trunk/pm1fs2.c M /trunk/polyeval.c Fast P-1 stage 2 now uses parameterization as in the paper. ------------------------------------------------------------------------ r1014 | zimmerma | 2007-09-14 08:48:50 +0200 (Fri, 14 Sep 2007) | 5 lines Changed paths: M /trunk/ChangeLog M /trunk/Makefile.am M /trunk/README.dev M /trunk/configure.in ChangeLog: added changes since release 6.1 that were missing README.dev: added hint about man page Makefile.am: ensure that ecm.1 is in the tarball configure.in: fixed warning from autoconf ------------------------------------------------------------------------ r1013 | kruppa | 2007-09-10 14:15:00 +0200 (Mon, 10 Sep 2007) | 2 lines Changed paths: M /trunk/pm1fs2.c Added code for computing r^(n^2) over quadratic extension ring ------------------------------------------------------------------------ r1012 | kruppa | 2007-09-10 14:13:47 +0200 (Mon, 10 Sep 2007) | 2 lines Changed paths: M /trunk/NEWS Changes for 6.1.2 and 6.1.3 ------------------------------------------------------------------------ r1011 | zimmerma | 2007-09-10 09:16:45 +0200 (Mon, 10 Sep 2007) | 3 lines Changed paths: M /trunk/mpmod.c M /trunk/pm1fs2.c mpmod.c: patch from Alex (wrong initialization in mpmod_init_MPZ) pm1fs2.c: default is now method=1 instead of method=0 ------------------------------------------------------------------------ r1010 | zimmerma | 2007-09-07 17:05:31 +0200 (Fri, 07 Sep 2007) | 2 lines Changed paths: M /trunk/mpmod.c use Mulder's algorithm (ecm_redc_n) in REDC only when xn=2n ------------------------------------------------------------------------ r1009 | zimmerma | 2007-09-07 16:44:56 +0200 (Fri, 07 Sep 2007) | 2 lines Changed paths: M /trunk/TODO added analysis of slowdown reported by Ch. Clavier ------------------------------------------------------------------------ r1008 | zimmerma | 2007-09-07 16:42:20 +0200 (Fri, 07 Sep 2007) | 4 lines Changed paths: M /trunk/mpmod.c modified ecm_redc_n to allow xn = 2n-1 too (happens often when the high limb of the modulus has few bits); unfortunately for large inputs (e.g. the c58672 in TODO) this seems to be slower than the else-branch in REDC ------------------------------------------------------------------------ r1007 | zimmerma | 2007-09-07 14:24:17 +0200 (Fri, 07 Sep 2007) | 3 lines Changed paths: M /trunk/configure.in changed version number of development version to 6.2 (6.1.1 was already an existing release) ------------------------------------------------------------------------ r1006 | zimmerma | 2007-09-07 14:15:55 +0200 (Fri, 07 Sep 2007) | 2 lines Changed paths: M /trunk/ecm-gmp.h fixed incorrect comment ------------------------------------------------------------------------ r1005 | zimmerma | 2007-09-07 14:10:00 +0200 (Fri, 07 Sep 2007) | 3 lines Changed paths: M /trunk/INSTALL M /trunk/ecm-gmp.h INSTALL: updated GMP web page and version ecm-gmp.h: mpn_mul_fft now returns int (>= GMP 4.2.1) ------------------------------------------------------------------------ r1004 | kruppa | 2007-09-04 16:44:51 +0200 (Tue, 04 Sep 2007) | 2 lines Changed paths: M /trunk/mpmod.c Corrected bugfix for using mpn_mul_fft(). ------------------------------------------------------------------------ r1003 | kruppa | 2007-09-03 12:22:00 +0200 (Mon, 03 Sep 2007) | 2 lines Changed paths: M /trunk/pm1fs2.c Fixed some compiler warnings about unused variables and functions. ------------------------------------------------------------------------ r1002 | kruppa | 2007-08-31 19:35:28 +0200 (Fri, 31 Aug 2007) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/mpmod.c Marked input arguments of funcions "const". ------------------------------------------------------------------------ r1001 | kruppa | 2007-08-31 19:34:40 +0200 (Fri, 31 Aug 2007) | 2 lines Changed paths: M /trunk/ecm_ntt.c Fixed warning the right way this time ------------------------------------------------------------------------ r1000 | kruppa | 2007-08-31 19:33:54 +0200 (Fri, 31 Aug 2007) | 2 lines Changed paths: M /trunk/ecm.c Made return values use the FACTOR_FOUND defines ------------------------------------------------------------------------ r999 | kruppa | 2007-08-30 22:34:36 +0200 (Thu, 30 Aug 2007) | 2 lines Changed paths: M /trunk/ecm_ntt.c Circumvent compiler warnings ------------------------------------------------------------------------ r998 | kruppa | 2007-08-30 16:42:49 +0200 (Thu, 30 Aug 2007) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/mpmod.c M /trunk/pm1fs2.c M /trunk/test.pm1 Some code rearrangements in preparation for fast P+1 stage 2 ------------------------------------------------------------------------ r997 | zimmerma | 2007-08-29 16:46:17 +0200 (Wed, 29 Aug 2007) | 3 lines Changed paths: M /trunk/TODO M /trunk/ecm2.c M /trunk/sp.h M /trunk/tune.c TODO: added item (efficiency regression) tune.c, ecm2.c, sp.h: fixed compiler warnings with -W -Wall ------------------------------------------------------------------------ r996 | kruppa | 2007-08-24 15:48:33 +0200 (Fri, 24 Aug 2007) | 3 lines Changed paths: M /trunk/pm1.c Fixes bug reported by P.L.Montgomery: B1 was converted from double to unsigned long before assigning it to B2min, causing truncation. ------------------------------------------------------------------------ r995 | zimmerma | 2007-08-01 17:57:38 +0200 (Wed, 01 Aug 2007) | 2 lines Changed paths: M /trunk/TODO updated reference ------------------------------------------------------------------------ r994 | zimmerma | 2007-07-31 13:52:59 +0200 (Tue, 31 Jul 2007) | 2 lines Changed paths: M /trunk/TODO added pointer to new algorithm ------------------------------------------------------------------------ r993 | zimmerma | 2007-07-22 13:24:41 +0200 (Sun, 22 Jul 2007) | 2 lines Changed paths: M /trunk/ecm.h applied patch from Emmanuel Thome to use the library mode from a C++ program ------------------------------------------------------------------------ r992 | zimmerma | 2007-07-15 10:22:57 +0200 (Sun, 15 Jul 2007) | 2 lines Changed paths: M /trunk/main.c updated P-1 champion size ------------------------------------------------------------------------ r991 | kruppa | 2007-06-18 13:01:29 +0200 (Mon, 18 Jun 2007) | 2 lines Changed paths: M /trunk/stage2.c Fixes incorrect memory estimate for stage 2 ------------------------------------------------------------------------ r990 | zimmerma | 2007-06-16 21:19:05 +0200 (Sat, 16 Jun 2007) | 2 lines Changed paths: M /trunk/main.c fixed bug #3448: better check for invalid B2 ------------------------------------------------------------------------ r989 | zimmerma | 2007-06-05 18:33:29 +0200 (Tue, 05 Jun 2007) | 2 lines Changed paths: M /trunk/eval.c fixed bug #3363 (Expression parser needs to check for remainder in division) ------------------------------------------------------------------------ r988 | kruppa | 2007-04-22 17:06:41 +0200 (Sun, 22 Apr 2007) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/mpmod.c M /trunk/pm1fs2.c Speedups and cleanups for fast stage 2 code. ------------------------------------------------------------------------ r987 | kruppa | 2007-04-22 17:05:54 +0200 (Sun, 22 Apr 2007) | 3 lines Changed paths: M /trunk/median.c Removed unneccesary recursive calls in TToomCookMul_space() which could inflate run-time considerably for degenerate cases. ------------------------------------------------------------------------ r986 | kruppa | 2007-04-22 17:04:23 +0200 (Sun, 22 Apr 2007) | 2 lines Changed paths: M /trunk/ks-multiply.c Added ASSERTS to check that input coefficients are non-negative. ------------------------------------------------------------------------ r985 | kruppa | 2007-04-16 12:57:28 +0200 (Mon, 16 Apr 2007) | 3 lines Changed paths: M /trunk/Makefile.am M /trunk/pm1fs2.c Building symmetric polynomial as in draft 8.1 implemented. Some clean up work left to be done yet. Added Makefile target for pm1fs2 test drive binary. ------------------------------------------------------------------------ r984 | kruppa | 2007-03-30 14:43:19 +0200 (Fri, 30 Mar 2007) | 2 lines Changed paths: M /trunk/techdocs/buildpoly.tex Fixes. Added case p^k | n, i.e. n not squarefree. ------------------------------------------------------------------------ r983 | kruppa | 2007-03-28 19:05:08 +0200 (Wed, 28 Mar 2007) | 2 lines Changed paths: M /trunk/Makefile.am M /trunk/auxlib.c M /trunk/ecm-impl.h M /trunk/main.c M /trunk/mpmod.c M /trunk/pm1.c M /trunk/pm1fs2.c Implemented building F from arithmetic progressions of prime length. ------------------------------------------------------------------------ r982 | kruppa | 2007-03-19 18:25:05 +0100 (Mon, 19 Mar 2007) | 3 lines Changed paths: M /trunk/techdocs/buildpoly.tex M /trunk/techdocs/convolv.tex More details on sets of coprime residues in buildpoly.tex. Small corrections and additions in convolv.tex. ------------------------------------------------------------------------ r981 | zimmerma | 2007-03-19 08:23:00 +0100 (Mon, 19 Mar 2007) | 2 lines Changed paths: M /trunk/main.c updated champion sizes ------------------------------------------------------------------------ r980 | kruppa | 2007-03-16 15:28:44 +0100 (Fri, 16 Mar 2007) | 2 lines Changed paths: M /trunk/techdocs/buildpoly.tex Small corrections, additions ------------------------------------------------------------------------ r979 | kruppa | 2007-03-15 21:06:05 +0100 (Thu, 15 Mar 2007) | 2 lines Changed paths: A /trunk/techdocs/buildpoly.tex A note on Montgomery's idea for fast building F from its roots ------------------------------------------------------------------------ r978 | kruppa | 2007-03-12 18:35:54 +0100 (Mon, 12 Mar 2007) | 2 lines Changed paths: M /trunk/techdocs/schoen_strass.tex Small fixes. ------------------------------------------------------------------------ r977 | kruppa | 2007-03-07 23:36:31 +0100 (Wed, 07 Mar 2007) | 2 lines Changed paths: M /trunk/pm1.c Removed duplicated "special division for 2^n+-1" message in P-1. ------------------------------------------------------------------------ r976 | zimmerma | 2007-03-07 18:56:25 +0100 (Wed, 07 Mar 2007) | 2 lines Changed paths: M /trunk/pm1fs2.c Output "Step 2 took ..." in normal mode (as in stage2.c) ------------------------------------------------------------------------ r975 | kruppa | 2007-03-07 10:41:57 +0100 (Wed, 07 Mar 2007) | 3 lines Changed paths: M /trunk/pm1fs2.c Print total stage 2 time. Fix memory allocation bug when modulus->bits < 0, i.e. for 2^n-1 numbers. ------------------------------------------------------------------------ r974 | kruppa | 2007-03-06 11:25:22 +0100 (Tue, 06 Mar 2007) | 2 lines Changed paths: M /trunk/schoen_strass.c Make transposed Karatsuba return the number of multiplications used ------------------------------------------------------------------------ r973 | kruppa | 2007-03-06 10:43:51 +0100 (Tue, 06 Mar 2007) | 2 lines Changed paths: A /trunk/techdocs A /trunk/techdocs/convolv.tex A /trunk/techdocs/curve_convert.tex A /trunk/techdocs/schoen_strass.tex Notes on some of the math and algoritms used in GMP-ECM ------------------------------------------------------------------------ r972 | kruppa | 2007-03-05 18:37:11 +0100 (Mon, 05 Mar 2007) | 2 lines Changed paths: M /trunk/bestd.c M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/mpmod.c M /trunk/pm1.c M /trunk/pm1fs2.c M /trunk/pp1.c Debugging and speedups for Fast P-1 stage 2 ------------------------------------------------------------------------ r971 | kruppa | 2007-03-05 18:35:38 +0100 (Mon, 05 Mar 2007) | 2 lines Changed paths: M /trunk/schoen_strass.c Debugging for lenA == lenB/2+1 case ------------------------------------------------------------------------ r970 | kruppa | 2007-03-05 18:33:07 +0100 (Mon, 05 Mar 2007) | 2 lines Changed paths: M /trunk/median.c Added some ASSERT()s ------------------------------------------------------------------------ r969 | kruppa | 2007-03-05 18:31:58 +0100 (Mon, 05 Mar 2007) | 2 lines Changed paths: M /trunk/listz.c Added/corrected some ASSERT()s ------------------------------------------------------------------------ r968 | kruppa | 2007-02-26 11:21:33 +0100 (Mon, 26 Feb 2007) | 2 lines Changed paths: M /trunk/x86_64/mulredc2.asm Reverting this file to revision 908 ------------------------------------------------------------------------ r967 | kruppa | 2007-02-26 03:08:58 +0100 (Mon, 26 Feb 2007) | 3 lines Changed paths: M /trunk/Makefile.am M /trunk/ecm-impl.h M /trunk/ecm_ntt.c M /trunk/listz.c M /trunk/main.c M /trunk/median.c M /trunk/mpmod.c M /trunk/pm1.c A /trunk/pm1fs2.c M /trunk/schoen_strass.c M /trunk/stage2.c M /trunk/x86_64/bench.c M /trunk/x86_64/mulredc1.asm M /trunk/x86_64/mulredc2.asm Changes in preparation of a fast P-1 stage 2. Crude and incomplete implementation of fast P-1 stage 2. ------------------------------------------------------------------------ r966 | zimmerma | 2007-02-15 22:48:33 +0100 (Thu, 15 Feb 2007) | 2 lines Changed paths: M /trunk/ecm-params.h.core2 removed extra line break ------------------------------------------------------------------------ r965 | zimmerma | 2007-02-15 22:39:06 +0100 (Thu, 15 Feb 2007) | 2 lines Changed paths: A /trunk/ecm-params.h.core2 tuned parameters contributed by tom@womack.net ------------------------------------------------------------------------ r964 | zimmerma | 2007-02-05 18:15:21 +0100 (Mon, 05 Feb 2007) | 2 lines Changed paths: M /trunk/bestd.c extended again the d-table with good phi(d) ------------------------------------------------------------------------ r963 | zimmerma | 2007-02-05 18:01:21 +0100 (Mon, 05 Feb 2007) | 2 lines Changed paths: M /trunk/bestd.c extended table for d values with good phi(d) ------------------------------------------------------------------------ r962 | kruppa | 2007-02-04 00:17:46 +0100 (Sun, 04 Feb 2007) | 2 lines Changed paths: M /trunk/ecm.c Print point and curve in Weierstrass form at beginning of stage 2 with -v -v ------------------------------------------------------------------------ r961 | zimmerma | 2007-01-02 16:58:44 +0100 (Tue, 02 Jan 2007) | 2 lines Changed paths: A /trunk/c200 new test number (from 10^284+1) ------------------------------------------------------------------------ r960 | zimmerma | 2006-12-18 10:45:08 +0100 (Mon, 18 Dec 2006) | 2 lines Changed paths: M /trunk/AUTHORS added NTT contribution for Dave ------------------------------------------------------------------------ r959 | zimmerma | 2006-12-18 09:44:03 +0100 (Mon, 18 Dec 2006) | 2 lines Changed paths: M /trunk/COPYING M /trunk/COPYING.LIB M /trunk/Fgw.c M /trunk/auxi.c M /trunk/auxlib.c M /trunk/b1_ainc.c M /trunk/bestd.c M /trunk/bestdaux.c M /trunk/candi.c M /trunk/config.guess M /trunk/config.sub M /trunk/configfsf.guess M /trunk/configfsf.sub M /trunk/countsmooth.c M /trunk/ecm-ecm.h M /trunk/ecm-gmp.h M /trunk/ecm-impl.h M /trunk/ecm.c M /trunk/ecm.h M /trunk/ecm2.c M /trunk/ecm_ntt.c M /trunk/ecmfactor.c M /trunk/eval.c M /trunk/factor.c M /trunk/getprime.c M /trunk/ks-multiply.c M /trunk/listz.c M /trunk/longlong.h M /trunk/lucas.c M /trunk/main.c M /trunk/median.c M /trunk/memory.c M /trunk/mpmod.c M /trunk/mpzspm.c M /trunk/mpzspv.c M /trunk/mul_lo.c M /trunk/ntt_gfp.c M /trunk/pm1.c M /trunk/polyeval.c M /trunk/pp1.c M /trunk/random.c M /trunk/resume.c M /trunk/rho.c M /trunk/runecm2.c M /trunk/schoen_strass.c M /trunk/sp.c M /trunk/sp.h M /trunk/spm.c M /trunk/spv.c M /trunk/stage2.c M /trunk/test.ecm M /trunk/test.pm1 M /trunk/test.pp1 M /trunk/toomcook.c M /trunk/trial.c M /trunk/tune.c changed address of FSF to new one ------------------------------------------------------------------------ r958 | zimmerma | 2006-12-12 13:32:34 +0100 (Tue, 12 Dec 2006) | 4 lines Changed paths: M /trunk/mpmod.c moved "using special division" from mpmod_init to mpmod_init_base2 (the latter is called directly from ecm.c, thus the message was not displayed, as noticed by Peter Montgomery) ------------------------------------------------------------------------ r957 | kruppa | 2006-10-10 12:45:57 +0200 (Tue, 10 Oct 2006) | 2 lines Changed paths: M /trunk/trial.c Avoid infinite loop in trial division with zero as input number. ------------------------------------------------------------------------ r956 | zimmerma | 2006-10-05 09:18:21 +0200 (Thu, 05 Oct 2006) | 2 lines Changed paths: M /trunk/INSTALL added hint about -max_log2_len option for tune ------------------------------------------------------------------------ r955 | zimmerma | 2006-10-05 09:14:30 +0200 (Thu, 05 Oct 2006) | 3 lines Changed paths: M /trunk/tune.c max_log2_len is now a command-line parameter in tune.c (suggestion from Thomas M.Ott) ------------------------------------------------------------------------ r954 | kruppa | 2006-08-22 12:34:55 +0200 (Tue, 22 Aug 2006) | 3 lines Changed paths: M /trunk/ecm-ecm.h M /trunk/main.c Print first and last ten digits of numbers >1000 digits. Small cleanup in ecm-ecm.h (duplicated #define's). ------------------------------------------------------------------------ r953 | kruppa | 2006-08-06 14:46:56 +0200 (Sun, 06 Aug 2006) | 2 lines Changed paths: M /trunk/TODO Added several ideas ------------------------------------------------------------------------ r952 | kruppa | 2006-08-05 08:56:44 +0200 (Sat, 05 Aug 2006) | 2 lines Changed paths: M /trunk/auxlib.c M /trunk/ecm-impl.h More cleanups of -chkpnt code ------------------------------------------------------------------------ r951 | kruppa | 2006-08-04 17:16:17 +0200 (Fri, 04 Aug 2006) | 4 lines Changed paths: M /trunk/auxlib.c M /trunk/ecm-impl.h M /trunk/stage2.c Fixed stupid segfault in chkpnt code. Changed checkpoint interval to 10 min. Fixed bug when cleaning up treefiles after receiving signal. ------------------------------------------------------------------------ r950 | kruppa | 2006-08-03 21:10:44 +0200 (Thu, 03 Aug 2006) | 3 lines Changed paths: M /trunk/README M /trunk/auxlib.c M /trunk/ecm-impl.h M /trunk/ecm.1 M /trunk/ecm.c M /trunk/ecm.h M /trunk/ecm.xml M /trunk/factor.c M /trunk/main.c M /trunk/pm1.c M /trunk/pp1.c M /trunk/resume.c M /trunk/stage2.c By popular request: added option to write checkpoints periodically during stage 1. ------------------------------------------------------------------------ r949 | kruppa | 2006-08-03 18:54:47 +0200 (Thu, 03 Aug 2006) | 2 lines Changed paths: M /trunk/ecm-impl.h M /trunk/listz.c M /trunk/stage2.c stage2.c did not #include config.h, so unistd.h was not included either. ------------------------------------------------------------------------ r948 | zimmerma | 2006-07-27 14:16:43 +0200 (Thu, 27 Jul 2006) | 2 lines Changed paths: M /trunk/main.c new size for P-1 champion ------------------------------------------------------------------------ r947 | zimmerma | 2006-07-25 14:10:22 +0200 (Tue, 25 Jul 2006) | 2 lines Changed paths: M /trunk/sp.h added copyright information ------------------------------------------------------------------------ r946 | zimmerma | 2006-07-25 13:29:42 +0200 (Tue, 25 Jul 2006) | 2 lines Changed paths: M /trunk/ecm2.c M /trunk/main.c M /trunk/stage2.c fixed some compiler warnings ------------------------------------------------------------------------ r945 | kruppa | 2006-07-25 12:09:07 +0200 (Tue, 25 Jul 2006) | 4 lines Changed paths: M /trunk/stage2.c Following a comment of P.L.Montgomery, added message when computing product of the F(g_i) at end of stage 2 to avoid unexplained delay for large input numbers. ------------------------------------------------------------------------ r944 | lfousse | 2006-07-19 22:30:59 +0200 (Wed, 19 Jul 2006) | 2 lines Changed paths: M /trunk/stage2.c Include header for `unlink' in stage2.c. ------------------------------------------------------------------------ r943 | zimmerma | 2006-07-17 10:10:51 +0200 (Mon, 17 Jul 2006) | 2 lines Changed paths: M /trunk/main.c new size for P+1 champion ------------------------------------------------------------------------ r942 | zimmerma | 2006-07-07 10:59:37 +0200 (Fri, 07 Jul 2006) | 2 lines Changed paths: M /trunk/factor.c stage1time and use_ntt were not initialized in ecm_init() ------------------------------------------------------------------------ r941 | zimmerma | 2006-05-30 08:56:15 +0200 (Tue, 30 May 2006) | 2 lines Changed paths: M /trunk/main.c changed mininum size of ecm champions ------------------------------------------------------------------------ r940 | lfousse | 2006-05-24 13:28:47 +0200 (Wed, 24 May 2006) | 2 lines Changed paths: M /trunk/runecm2.c Use a *real* dummy domain instead of an existing one for the example email. ------------------------------------------------------------------------ r939 | zimmerma | 2006-05-13 08:19:37 +0200 (Sat, 13 May 2006) | 2 lines Changed paths: M /trunk/runecm2.c changed email ------------------------------------------------------------------------ r938 | zimmerma | 2006-05-05 17:20:28 +0200 (Fri, 05 May 2006) | 2 lines Changed paths: M /trunk/configure.in fixed calls to AC_ARG_ENABLE() ------------------------------------------------------------------------ r937 | lfousse | 2006-05-05 17:08:43 +0200 (Fri, 05 May 2006) | 2 lines Changed paths: M /trunk/configure.in Use proper invocation of AC_ARG_ENABLE for asm-redc. ------------------------------------------------------------------------ r936 | zimmerma | 2006-05-05 17:05:37 +0200 (Fri, 05 May 2006) | 2 lines Changed paths: M /trunk/configure.in M /trunk/ecm.1 M /trunk/ecm.xml changed mailing-list address ------------------------------------------------------------------------ r935 | gaudry | 2006-05-05 16:34:32 +0200 (Fri, 05 May 2006) | 1 line Changed paths: M /trunk/configure.in fixed error message for --enable-asm-redc ------------------------------------------------------------------------ r934 | lfousse | 2006-05-05 16:01:31 +0200 (Fri, 05 May 2006) | 2 lines Changed paths: M /trunk/Makefile.am A /trunk/athlon/Makefile.am M /trunk/configure.in A /trunk/pentium4/Makefile.am A /trunk/x86_64/Makefile.am Use automake's DIST_DIRS feature for a cleaner `dist' target. ------------------------------------------------------------------------ r933 | lfousse | 2006-05-05 15:49:15 +0200 (Fri, 05 May 2006) | 2 lines Changed paths: M /trunk/Makefile.am redc assembly files in toplevel dir are symlinks and should not be part of `dist'. ------------------------------------------------------------------------ r932 | zimmerma | 2006-05-03 12:08:07 +0200 (Wed, 03 May 2006) | 2 lines Changed paths: M /trunk/INSTALL added url of mailing-list archive ------------------------------------------------------------------------ r931 | zimmerma | 2006-05-03 09:36:15 +0200 (Wed, 03 May 2006) | 2 lines Changed paths: M /trunk/Fgw.c M /trunk/configure.in M /trunk/mpzspv.c get rid of valloc ------------------------------------------------------------------------ r930 | zimmerma | 2006-05-02 17:50:04 +0200 (Tue, 02 May 2006) | 2 lines Changed paths: M /trunk/AUTHORS M /trunk/INSTALL M /trunk/Makefile.am M /trunk/README.dev M /trunk/configure.in fixed stupid removal in Makefile.am, prepared for release 6.1.1 ------------------------------------------------------------------------ r928 | zimmerma | 2006-05-01 22:51:53 +0200 (Mon, 01 May 2006) | 2 lines Changed paths: M /trunk/ChangeLog added 'tag' for release 6.1 in ChangeLog Mon May 01 2006 22:49:07 zimmerma -- r927 Released version 6.1. Mon May 01 2006 13:01:15 kruppa -- r923 * trunk/Fgw.c, trunk/main.c, trunk/ecm2.c: modified Small cleanups. Added exit code 143 when exiting due to signal. Sun Apr 02 2006 18:38:08 zimmerma -- r922 * trunk/INSTALL: modified updated to gmp-4.2 and ecm-6.1 Fri Mar 31 2006 13:36:34 zimmerma -- r921 * trunk/TODO: modified added item Wed Mar 22 2006 11:46:18 gaudry -- r920 * trunk/configure.in: modified When asm-redc is enabled, check whether the computer is not too hold, because the default asm code needs at least a PPro or a k7. Fri Mar 17 2006 15:31:58 zimmerma -- r919 * trunk/TODO, trunk/configure.in, trunk/Makefile.am: modified added missing sources in Makefile.am added suggestion from James Wanless Fri Mar 17 2006 14:18:39 zimmerma -- r918 * trunk/README.dev: modified updated ChangeLog instructions Fri Mar 17 2006 13:55:41 zimmerma -- r917 * trunk/ecm.1, trunk/ecm.xml, trunk/AUTHORS, trunk/TODO, trunk/configure.in, trunk/ChangeLog, trunk/README.dev, trunk/NEWS: modified updated NEWS/ChangeLog for 6.0.1 release Fri Mar 17 2006 08:51:14 zimmerma -- r916 * trunk/pm1.c, trunk/pp1.c, trunk/ecm.c, trunk/ecm-impl.h: modified default B2 is less aggressive (exponent 1.43 instead of 1.5) put exponent and costs as macros in ecm-impl.h Tue Mar 14 2006 15:45:26 zimmerma -- r915 * trunk/mpmod.c, trunk/test.ecm: modified fixed bug reported by Allan Steel Fri Mar 10 2006 15:42:07 zimmerma -- r914 * trunk/test.pp1: modified fixed new test to work on 32-bit machine too Fri Mar 10 2006 15:11:54 zimmerma -- r913 * trunk/test.pp1: modified added test case for P+1 bug Fri Mar 10 2006 14:45:29 zimmerma -- r912 * trunk/TODO: modified added workaround Fri Mar 10 2006 14:11:26 zimmerma -- r911 * trunk/TODO: modified added suggestion from Bernstein Tue Mar 07 2006 16:15:54 zimmerma -- r910 * trunk/lucas.c: modified fixed overflow bug in P+1 (unsigned int -> unsigned long) P+1 was probably not working for B1>2^32 on 64-bit machines Tue Mar 07 2006 15:59:57 zimmerma -- r909 * trunk/README: modified changed s into x0 Mon Mar 06 2006 13:49:35 gaudry -- r908 * (MANY FILES) : added * trunk/redc.asm: deleted * trunk/mpmod.c, trunk/ecm.c, trunk/configure.in, trunk/Makefile.am: modified Redc and combined Mul/Redc in asm for different archi (p4, athlon, amd64) Configure.in and Makefile.am modified accordingly. Wed Feb 22 2006 15:58:39 zimmerma -- r907 * trunk/mpmod.c, trunk/schoen_strass.c, trunk/Fgw.c: modified added warnings for uses of _mpz_realloc Wed Feb 22 2006 15:05:45 zimmerma -- r906 * trunk/TODO: modified modified one bug item Thu Feb 16 2006 13:40:16 gaudry -- r905 * trunk/ecm2.c: modified Fixed some memory bugs in multiplyW2n(), that occured in -v -v and/or WANT_ASSERT mode, because it is called with NULL as first arguments. Fixed a double free of variables "coeffs" and a missing free for fd. Wed Feb 15 2006 14:08:12 zimmerma -- r904 * trunk/TODO, trunk/INSTALL: modified modified instructions for gwnum Wed Feb 15 2006 13:50:49 zimmerma -- r903 * trunk/TODO, trunk/stage2.c: modified disable NTT for Fermat numbers Tue Feb 14 2006 10:49:15 zimmerma -- r902 * trunk/TODO: modified added more examples of problems with Fermat numbers Mon Feb 13 2006 13:06:33 zimmerma -- r901 * trunk/TODO: modified added item Thu Jan 19 2006 17:40:59 zimmerma -- r900 * trunk/TODO: modified added item in TODO Mon Jan 16 2006 21:47:11 zimmerma -- r899 * trunk/ecm.c, trunk/lucas.c: modified improved to 17-digit values of decimal constants used in PRAC Sun Jan 15 2006 21:31:16 kruppa -- r898 * trunk/pm1.c, trunk/pp1.c, trunk/ecm.c, trunk/stage2.c, trunk/ecm- impl.h: modified Added some stop_asap() checks to stage 2. Sun Jan 15 2006 09:13:20 zimmerma -- r897 * trunk/TODO: modified added item in TODO (CPUTIME) Fri Jan 13 2006 21:22:14 kruppa -- r896 * trunk/pm1.c, trunk/factor.c, trunk/pp1.c, trunk/ecm.c, trunk/main.c, trunk/ecm-impl.h, trunk/ecm.h: modified Signal handling for P+1 added, save files contain correct B1done value. Stage 2 and cleanup TBD. Thu Jan 12 2006 23:08:21 kruppa -- r895 * trunk/pm1.c, trunk/factor.c, trunk/pp1.c, trunk/ecm.c, trunk/main.c, trunk/configure.in, trunk/ecm.h: modified Add signal handler to exit gracefully. ECM and P-1 stage 1 mostly done, rest TBD. Wed Dec 21 2005 08:36:30 kruppa -- r894 * trunk/schoen_strass.c: modified Bugfix: static mpz_t gt might be used after being mpz_clear()'ed Wed Dec 21 2005 00:38:10 kruppa -- r893 * trunk/schoen_strass.c, trunk/stage2.c: modified Mem leak fix: clear static mpz_t in schoen_strass.c at end of stage 2. Wed Dec 21 2005 00:31:33 kruppa -- r892 * trunk/mpmod.c, trunk/test.ecm, trunk/listz.c, trunk/stage2.c, trunk/ecm2.c, trunk/ecm-impl.h: modified Print stage 2 prime of group order if factor was found and -v -v. Needs more polishing and not tested as much as I'd like yet. Sun Nov 13 2005 09:29:37 kruppa -- r891 * trunk/README, trunk/ecm.1, trunk/ecm.xml, trunk/main.c: modified Added -idlecmd option to pause or quit GMP-ECM when system is busy Sat Nov 12 2005 07:40:08 kruppa -- r890 * trunk/ecm.c: modified Tests in stage 1 if point at infinity is reached and prints message in verbose mode. Nice for finding group order of curves. Fri Nov 11 2005 07:18:51 zimmerma -- r889 * trunk/main.c: modified changed champs information, and updated minimal digit size to get champions Thu Oct 27 2005 11:11:19 zimmerma -- r888 * trunk/ecm.1: modified file generated from ecm.xml, with empty lines manually removed in the last paragraph Thu Oct 27 2005 11:01:53 zimmerma -- r887 * trunk/ecm.xml: modified added line-breaks in AUTHORS section Thu Oct 27 2005 07:54:42 zimmerma -- r886 * trunk/README.dev: modified add hint for autoreconf Wed Oct 26 2005 07:51:56 zimmerma -- r885 * trunk/ecm-impl.h: modified patch for old gcc versions Wed Oct 26 2005 07:01:04 zimmerma -- r884 * trunk/TODO: modified added item Mon Oct 24 2005 17:13:15 zimmerma -- r883 * trunk/ecm.c: modified changed default B2 for ecm Sat Oct 22 2005 15:45:43 zimmerma -- r882 * trunk/AUTHORS: modified added pointer to gforge Thu Sep 29 2005 19:26:35 zimmerma -- r881 * trunk/runecm2.c: modified fixed potential buffer overrun Thu Sep 29 2005 12:08:52 zimmerma -- r880 * trunk/runecm2.c: added contribution from Torbjo"rn Wed Sep 28 2005 13:30:51 kruppa -- r879 * trunk/ecm.c: modified Added warning about prac() bug (calling add3() with identical points) Sat Sep 10 2005 19:56:55 kruppa -- r878 * trunk/README, trunk/main.c: modified Slight cleanup of shell command code, replaced "-prp*" section of README by shellcmd section. Thu Sep 08 2005 19:51:02 kruppa -- r877 * trunk/README, trunk/candi.c, trunk/main.c, trunk/TODO: modified Made -one work better when used with -resume Tue Sep 06 2005 14:02:20 zimmerma -- r876 * trunk/TODO: modified added item Mon Sep 05 2005 12:09:53 zimmerma -- r875 * trunk/Makefile.am: modified added missing entries for "make dist" Thu Sep 01 2005 15:17:44 dnewman -- r874 * trunk/stage2.c: modified Make memory_use take into account sp_F. Thu Sep 01 2005 13:15:42 dnewman -- r873 * trunk/stage2.c, trunk/ecm-impl.h, trunk/ecm_ntt.c, trunk/tune.c: modified Precompute transform of F for use in ntt_PrerevertDivision. Wed Aug 24 2005 13:55:52 zimmerma -- r872 * trunk/check.mpl: modified added add3/duplicate code in Montgomery's coordinates Sat Aug 20 2005 18:41:19 kruppa -- r871 * trunk/TODO: modified Marked -stage1time done, removed shell commands entry (also done) Sat Aug 20 2005 18:37:59 kruppa -- r870 * trunk/Fgw.c, trunk/ecm.c: modified Assume ecm stage 1 always available in GWNUM library, pass error codes from gw_stage_1() back correctly Sat Aug 20 2005 18:16:24 kruppa -- r869 * trunk/test.ecm: modified Some more tests for base 2 numbers, for testing the GWNUM stage 1 Fri Aug 19 2005 21:00:35 kruppa -- r868 * trunk/Makefile.am: modified Fixed filenames in EXTRA_DIST Fri Aug 19 2005 14:23:03 dnewman -- r867 * (MANY FILES) : modified Comprehensive header cleanup; in particular, headers now satisfy their dependencies on other headers. Moved the -n / -nn renicing code from main.c to macros NICE10 / NICE20 in ecm-ecm.h. Added the configure option --enable-memory-debug to conditionally compile memory.c. Fixed some printf format/argument mismatches in tune.c and memory.c. Added some info on NTT and tune to README and documented -no-ntt in ecm.xml. Wed Aug 17 2005 22:47:58 dnewman -- r866 * trunk/mpzspv.c, trunk/TODO, trunk/configure.in, trunk/mpzspm.c, trunk/sp.h, trunk/spv.c, trunk/tune.c: modified Changed mpzspm_t to use spm's instead of __spm_struct's. Added code to use memmove in spv_set. Removed all bovine activity in configure.in. Minor update to TODO. Wed Aug 17 2005 16:11:28 dnewman -- r865 * trunk/schoen_strass.c, trunk/spv.c, trunk/ecm-gmp.h, trunk/tune.c: modified Changed RNG in tune.c to avoid using get_random_ui() as on some platforms (MinGW) it's too slow to be called many times. Fixed declarations of __gmpn_add_nc and __gmpn_mod_34lsub1 which were causing segfaults under Cygwin. Wed Aug 10 2005 12:22:34 dnewman -- r864 * trunk/spm.c, trunk/sp.c, trunk/sp.h, trunk/tune.c: modified Fixed potential problem with spm_init's generation of primitive roots. Tue Aug 09 2005 16:31:34 dnewman -- r863 * trunk/tune.c: modified Changed GRANULARITY to 250ms for more precision. Added a '-v' cmdline option that prints every function evaluation to stderr; also added a TUNE_SLOW define to give possibly more consistent results. Wed Aug 03 2005 20:45:24 dnewman -- r862 * trunk/nbdigits.c: deleted * trunk/auxi.c: modified Rewrote nb_digits() to remove dependency on string.h and FREE(). Deleted an empty file. Wed Aug 03 2005 20:13:48 dnewman -- r861 * trunk/redc.asm: added * trunk/redc.s: deleted * trunk/acinclude.m4, trunk/configure.in, trunk/Makefile.am: modified Imported some more routines from GMP's acinclude.m4 to fix problem with --enable-asm-redc under windows. Renamed redc.s back to redc.asm as it now goes through m4 before the assembler. Fixed a minor issue with architecture detection in configure.in. Wed Aug 03 2005 09:14:19 dnewman -- r860 * trunk/ecm-params.h.athlon, trunk/ecm-params.h.powerpc7450: added * trunk/ecm-params.h.athlonxp, trunk/ecm-params.h.power4: deleted * trunk/configure.in: modified Some tweaks to ecm-params detection. Wed Aug 03 2005 08:42:27 dnewman -- r859 * trunk/config.sub, trunk/configfsf.guess, trunk/config.guess, trunk/configfsf.sub: added Added GMP's finer-grained CPU detection (useful for selecting the right ecm-params.h) Tue Aug 02 2005 18:47:24 kruppa -- r858 * trunk/ecm.1: added Adding man page ecm.1 to CVS (xsltproc/docbook not available everywhere) Tue Aug 02 2005 17:46:09 dnewman -- r857 * trunk/pm1.c, trunk/factor.c, trunk/pp1.c, trunk/bestd.c, trunk/TODO, trunk/ecm.c, trunk/main.c, trunk/configure.in, trunk/stage2.c, trunk/Makefile.am, trunk/ecm-impl.h, trunk/ecm.h, trunk/tune.c: modified Removed the configure option --enable-ntt. Now ntt code is used by default but can be disabled with the command-line option -no-ntt. Lots of changes to function prototypes to accommodate this. Tue Aug 02 2005 16:58:46 kruppa -- r856 * trunk/test.pp1, trunk/test.ecm, trunk/test.pm1: modified Changed syntax of function to sh (Bourne shell) instead of bash Tue Aug 02 2005 16:30:21 dnewman -- r855 * trunk/getprime2.c, trunk/random2.c: deleted * trunk/test.pp1, trunk/random.c, trunk/test.ecm, trunk/Makefile.am, trunk/test.pm1: modified Changed test script shell to /bin/sh as MinGW doesn't have bash. Makefile.am now replaces the getprime2.c and random2.c hacks. Fixed CryptGenRandom() in random.c (maybe) and cleaned up the #includes a bit. Tue Aug 02 2005 15:14:30 kruppa -- r854 * trunk/bestd.c, trunk/ks-multiply.c, trunk/tune.c, trunk/trial.c: modified Fixed compiler warnings with gcc -Wall -W Tue Aug 02 2005 10:06:02 kruppa -- r853 * trunk/ecm.xml: modified Removed -prp* section, added shell commands section. Fixed exit code tables. Mon Aug 01 2005 22:52:06 dnewman -- r852 * trunk/main.c, trunk/trial.c: modified Cleanups to trial.c. Bugfix to probab_prime_p. Mon Aug 01 2005 22:00:03 kruppa -- r851 * trunk/smartprp.c: deleted * trunk/candi.c, trunk/main.c, trunk/configure.in, trunk/Makefile.am, trunk/ecm-ecm.h: modified Removed -prp* options and smartprp.c, added -prpcmd option Mon Aug 01 2005 20:13:11 kruppa -- r850 * trunk/pm1.c, trunk/factor.c, trunk/pp1.c, trunk/random.c, trunk/auxlib.c, trunk/main.c, trunk/ecm.c, trunk/stage2.c, trunk/ecm2.c, trunk/ecm-impl.h, trunk/memory.c, trunk/ecm.h, trunk /ks-multiply.c, trunk/tune.c: modified Added -stage1time option. All time-keeping variables are of type long now. Mon Aug 01 2005 16:45:56 kruppa -- r849 * trunk/mul_fft.c: deleted Not needed for GMP-ECM (part of GMP) Sun Jul 31 2005 18:10:36 kruppa -- r848 * trunk/mpmod.c, trunk/factor.c, trunk/bestd.c, trunk/auxlib.c, trunk/TODO, trunk/main.c, trunk/ecm.c, trunk/listz.c, trunk/stage2.c, trunk/ecm2.c: modified Allocate more memory to mpz_t's in stage 2 to avoid reallocs. More allocation locations tagged for mem leak/realloc debugging. Fixed segfault in stage 2 (if factor found in roots of F) Sun Jul 31 2005 18:00:32 kruppa -- r847 * trunk/memory.c: modified Prints peak memory allocation Sat Jul 30 2005 15:24:49 kruppa -- r846 * trunk/mpmod.c, trunk/median.c, trunk/TODO, trunk/listz.c, trunk/stage2.c, trunk/ecm2.c, trunk/ecm-impl.h, trunk/memory.c, trunk/ks-multiply.c, trunk/ecm-ecm.h, trunk/polyeval.c: modified Some changes to avoid unnecessary reallocs. memory.c can print location of mpz_init() that led to mem leak/realloc (if tagged) Sat Jul 30 2005 15:08:14 kruppa -- r845 * trunk/configure.in: modified Looks for DocBook stylesheets in several directories Sat Jul 30 2005 15:07:08 kruppa -- r844 * trunk/README.lib: modified Added maxmem entry Fri Jul 29 2005 13:50:56 kruppa -- r843 * trunk/random.c: modified Removed leftover debug output Thu Jul 28 2005 23:17:58 kruppa -- r842 * trunk/pm1.c, trunk/factor.c, trunk/random.c, trunk/pp1.c, trunk/ecm.c, trunk/main.c, trunk/ecm.h: modified Seed RNG only once per program invocation. Use GetRandCrypt() under Windows, but untested yet: Wine lacks required dlls. Thu Jul 28 2005 20:55:16 kruppa -- r841 * trunk/pm1.c, trunk/factor.c, trunk/pp1.c, trunk/bestd.c, trunk/ecm.xml, trunk/TODO, trunk/auxlib.c, trunk/ecm.c, trunk/main.c, trunk/stage2.c, trunk/ecm2.c, trunk/ecm-impl.h, trunk/rho.c, trunk/ecm.h: modified Added -maxmem option. Memory estimate not perfectly accurate yet. Sun Jul 24 2005 21:40:14 kruppa -- r840 * trunk/pm1.c, trunk/test.pp1, trunk/pp1.c, trunk/bestd.c, trunk/test.ecm, trunk/ecm.c, trunk/stage2.c, trunk/test.pm1: modified Fixed bug that occurred when B2 < B2min (did a stage 2, but shouldn't) Sun Jul 24 2005 19:51:36 kruppa -- r839 * trunk/Fgw.c, trunk/ecm.c, trunk/main.c: modified Cleanups in Fgw.c Sat Jul 23 2005 23:00:51 kruppa -- r838 * trunk/ecm.xml: modified Added chapter for exit status values Sat Jul 23 2005 21:51:24 kruppa -- r837 * trunk/mpmod.c, trunk/schoen_strass.c, trunk/configure.in, trunk/Makefile.am: modified Correctly aligns GWDATA segment when GWNUM libaray is used. Fixed compilation of tune when using GWNUM. Fri Jul 22 2005 21:14:45 kruppa -- r836 * trunk/Fgw.c, trunk/Makefile.am: modified Speedup for mpz_t <-> gwnum conversion. An elusive bug remains. Thu Jul 21 2005 13:22:12 dnewman -- r835 * trunk/tune2.c: deleted * trunk/ecm-params.h.alpha-ev6, trunk/mpmod.c, trunk/ecm- params.h.default, trunk/TODO, trunk/ecm-params.h.athlonxp, trunk/mul_lo.c, trunk/sp.h, trunk/Makefile.am, trunk/ecm- params.h.athlon64, trunk/ecm_ntt.c, trunk/mpzspv.c, trunk/ntt_gfp.c, trunk/configure.in, trunk/ecm-impl.h, trunk/ecm- params.h.power4, trunk/ecm-params.h.alpha-ev5, trunk/tune.c: modified Added mpn_mul_n tuning to tune.c and erased tune2.c. Moved all the '#ifdef TUNE' blocks to sp.h and ecm-impl.h. Updated ecm-params.*. Fixed minor bug in configure.in. Updated TODO. Wed Jul 20 2005 23:37:17 dnewman -- r834 * trunk/test.pp1, trunk/mpzspv.c, trunk/test.ecm, trunk/auxlib.c, trunk/TODO, trunk/configure.in, trunk/mpzspm.c, trunk/spv.c, trunk/Makefile.am, trunk/test.pm1, trunk/tune.c: modified Complete overhaul of cputime () in auxlib.c, including changes to configure.in. This fixes the mingw issue of cputime () giving calendar time instead of process time. Tested on cygwin, mingw, athlon, athlon 64, alpha. Fixed tune.c to use elltime (). Some portability fixes in spv.c, mpzspv.c. Changed /bin/bash to /bin/sh in the test scripts. Shortened an unnecessarily long expression in mpzspm.c. Updated TODO. Tue Jul 19 2005 16:13:28 dnewman -- r833 * trunk/TODO, trunk/TODO.sp, trunk/stage2.c: modified Adjusted the expected memory calculation to take into account -treefile and NTT memory. Removed some items from TODO, TODO.sp Tue Jul 19 2005 13:49:33 dnewman -- r832 * trunk/tune-ecm_ntt.c, trunk/tune-mpmod.c, trunk/tune-mpzspv.c, trunk/tune-ntt_gfp.c: deleted * trunk/configure.in, trunk/Makefile.am, trunk/tune.c: modified Removed the dirty hack used to compile tune separately from the main code. Fixed a segfault bug in tune.c. Now configure links ecm- params.h.xxx to ecm-params.h (but see the TODO in configure.in) Fri Jul 15 2005 14:59:11 dnewman -- r831 * trunk/ecm-params.h.power4: added * trunk/mpzspv.c, trunk/mpzspm.c, trunk/spv.c: modified Corrected header for malloc() to fix compilation problem on OS X. Added ecm-params.h for the power4 line of cpus. Wed Jul 13 2005 20:53:53 dnewman -- r830 * trunk/ecm-params.h.alpha-ev6, trunk/ecm-params.h.default, trunk /tune-mpmod.c, trunk/ecm-params.h.athlonxp, trunk/ecm- params.h.athlon64, trunk/tune-ecm_ntt.c, trunk/tune-mpzspv.c, trunk /tune-ntt_gfp.c, trunk/ecm-params.h.alpha-ev5: added * trunk/mpmod.c, trunk/sp.h, trunk/Makefile.am, trunk/ecm_ntt.c, trunk/test.pm1, trunk/test.pp1, trunk/mpzspv.c, trunk/ntt_gfp.c, trunk/test.ecm, trunk/ecm-impl.h, trunk/tune.c: modified Added computation of NTT thresholds to tune.c by means of tune-*.c. Added ecm-params target to Makefile.am and ecm-params.h for some architectures. Fixed minor problem in test.* that was causing script errors on alphas. Wed Jul 13 2005 10:35:28 dnewman -- r829 * trunk/ntt_gfp.c, trunk/configure.in, trunk/sp.h, trunk/tune.c: modified Rewrote tune.c to use a function pointer framework for computing thresholds. Removed a redundant line from configure.in. Commented out unused functions in ntt_gfp.c. Tue Jul 12 2005 17:19:19 kruppa -- r828 * trunk/getprime.c, trunk/mpmod.c, trunk/factor.c, trunk/schoen_strass.c, trunk/main.c, trunk/stage2.c, trunk/ecm2.c, trunk/eval.c, trunk/test.pm1, trunk/test.pp1, trunk/pm1.c, trunk/pp1.c, trunk/test.ecm, trunk/ecm.c, trunk/lucas.c, trunk/ecm- impl.h, trunk/ecm.h, trunk/ecm-ecm.h, trunk/tune.c: modified Return code of ecm reflects primality of factor (if any) and cofactor. Renamed MOD_* macros to ECM_MOD_* and moved to ecm.h Mon Jul 11 2005 22:19:32 kruppa -- r827 * trunk/TODO: modified Removed entries for Montgomery roots, moving param selection out of stage 2. Added entries for fixing cputime and RNG seeding under Windows Mon Jul 11 2005 09:48:56 zimmerma -- r826 * trunk/mpmod.c: modified fixed bug in mpmod_init (use of mpz_sizeinbase instead of mpz_size) Sat Jul 09 2005 22:05:42 kruppa -- r825 * trunk/ecm.c, trunk/ecm2.c, trunk/ecm-impl.h: modified Added code for generating roots of F,G for ECM in Montgomery coordinates if S==1. Does now work yet and will probably never be fast. Disabled by default Thu Jul 07 2005 13:07:58 kruppa -- r824 * trunk/mpmod.c, trunk/pm1.c, trunk/ecm-impl.h: modified Fixes bug introduced with negative i0 Wed Jul 06 2005 15:29:12 kruppa -- r823 * trunk/pp1.c, trunk/stage2.c, trunk/ecm-impl.h: modified Fixes bug in P+1 introduced in last update Wed Jul 06 2005 07:34:04 dnewman -- r822 * trunk/mpzspv.c, trunk/TODO.sp: modified Rewrote mpzspv_to_mpzv to use a constant amount of memory, with a small resulting speedup. Updated TODO.sp. Wed Jul 06 2005 06:37:19 dnewman -- r821 * trunk/acinclude.m4: added * trunk/configure.in, trunk/Makefile.am: modified Some improvements to configure.in. Now --enable-redc verifies that the cpu really is a 32-bit x86 (with the help of a macro in acinclude.m4), and the manpage is only compiled if xsltproc and docbook.xsl are present. Tue Jul 05 2005 22:00:03 dnewman -- r820 * trunk/Fgw.c, trunk/TODO, trunk/configure.in, trunk/Makefile.am, trunk/ecm-impl.h: modified Added options --enable-asm-redc and --enable-ntt - now the gwnum, redc.s and ntt code compiles conditionally. Removed corresponding items from TODO, added one more. Tue Jul 05 2005 21:05:43 dnewman -- r819 * trunk/redc.s: added * trunk/redc.asm: deleted Renamed redc.asm to redc.s Mon Jul 04 2005 22:24:06 dnewman -- r818 * trunk/stage2.c: modified Changed how sp_num is displayed when using -v Mon Jul 04 2005 21:13:00 kruppa -- r817 * trunk/mpmod.c, trunk/pm1.c, trunk/schoen_strass.c, trunk/resume.c, trunk/bestd.c, trunk/auxlib.c, trunk/stage2.c, trunk/rho.c, trunk/ecm_ntt.c, trunk/ecm-gmp.h, trunk/ecm-ecm.h, trunk/polyeval.c: modified Some cleanups to avoid compiler warnings Mon Jul 04 2005 20:20:52 dnewman -- r816 * trunk/configure.in: modified Removed AC_FUNC_MALLOC and AC_FUNC_REALLOC from configure.in, see http://lists.gnu.org/archive/html/bug-autoconf/2002-10/msg00075.html Mon Jul 04 2005 19:49:38 dnewman -- r815 * trunk/mpzspv.c, trunk/sp.h, trunk/ecm_ntt.c: modified Added missing config.h include to sp.h. Commented memory usage in many of the ntt functions. Reduced memory usage of mpzspv_normalise, resulting in a speedup. Mon Jul 04 2005 19:43:14 kruppa -- r814 * trunk/ecm2.c: modified Replaced variable length array in multiplyW2n with an mpz_t Mon Jul 04 2005 18:45:57 dnewman -- r813 * trunk/TODO: modified Added estimated memory item to TODO. Mon Jul 04 2005 06:39:03 dnewman -- r812 * (MANY FILES) : modified Replaced snprintf with malloc + sprintf throughout. Fixed incorrect prototype for ceil_log2(). Removed prototype for (static) usage(). Changed #include "gmp.h" to #include throughout. Sun Jul 03 2005 21:33:49 kruppa -- r811 * trunk/pm1.c, trunk/pp1.c, trunk/bestd.c, trunk/ecm.c, trunk/main.c, trunk/listz.c, trunk/stage2.c, trunk/ecm2.c, trunk/ecm-impl.h, trunk/ecm.h: modified Moved selection of dF, k, d1, d2 out of stage 2 so correct parameters can be printed immediately. Print expected number of curves before stage 1. Made dF and k unsigned long throughout. Sun Jul 03 2005 21:12:34 dnewman -- r810 * trunk/mpmod.c, trunk/schoen_strass.c, trunk/mpzspv.c, trunk/random.c, trunk/main.c, trunk/listz.c, trunk/configure.in, trunk/stage2.c, trunk/sp.h, trunk/spv.c, trunk/ecm-impl.h, trunk/ecm_ntt.c, trunk/ks-multiply.c, trunk/ecm-ecm.h, trunk/polyeval.c: modified Preliminary changes to configure script, now configure.in generates config.h. Added some autoconf checks. Renamed some #defines for standardisation purposes. Commented out some unused functions in spv.c. Sat Jul 02 2005 19:47:25 kruppa -- r809 * trunk/TODO: modified Some updates Sat Jul 02 2005 19:35:30 kruppa -- r808 * trunk/pp1.c, trunk/bestd.c, trunk/ecm2.c: modified Code to init roots of G can deal with negative i0 now Sat Jul 02 2005 16:18:28 dnewman -- r807 * trunk/sp.c, trunk/sp.h: modified Added 64-bit primality test for 64-bit machines. Defined UDItype in sp.h for longlong.h (fixes compilation on Athlon 64). Sat Jul 02 2005 15:26:32 kruppa -- r806 * trunk/pm1.c, trunk/pp1.c, trunk/stage2.c, trunk/ecm2.c: modified Cleanup. Removed redundant variable "s", using "i0" instead Sat Jul 02 2005 10:21:56 kruppa -- r805 * trunk/Fgw.c: modified Added dummy function to avoid "empty file" warning. To be fixed, use conditional compilation instead Sat Jul 02 2005 10:09:01 kruppa -- r804 * trunk/stage2.c: modified Removed stray "%" (caused segfault) Sat Jul 02 2005 09:40:34 kruppa -- r803 * trunk/mpzspv.c: modified Added for valloc() etc. Fixes crash on Sparc v9 Fri Jul 01 2005 20:08:28 kruppa -- r802 * trunk/stage2.c: modified Avoid floating point division by 0. when printing expected nr. of curves Thu Jun 30 2005 22:41:30 dnewman -- r801 * trunk/tune2.c, trunk/schoen_strass.c, trunk/TODO, trunk/configure.in, trunk/stage2.c, trunk/mul_lo.c, trunk/sp.h, trunk/ecm-impl.h: modified Added item to TODO. Fixed compilation for when HAVE_NTT not defined. Added AC_C_INLINE to configure.in and changed INLINE to inline everywhere. Wed Jun 15 2005 12:28:17 zimmerma -- r800 * trunk/TODO: modified added new item Mon Jun 13 2005 12:31:58 dnewman -- r799 * trunk/mpzspv.c, trunk/ecm_ntt.c: modified Minor fixes to header inclusion. Thu Jun 09 2005 14:35:21 dnewman -- r798 * trunk/ecm_ntt.c: modified Fixed potential segfault Thu Jun 09 2005 14:00:38 dnewman -- r797 * trunk/mpzspv.c, trunk/ntt_gfp.c, trunk/Fgw.c, trunk/stage2.c, trunk/spv.c, trunk/ecm_ntt.c: modified Removed declaration-within-code ISO-C unorthodoxy Thu Jun 09 2005 13:17:57 zimmerma -- r796 * trunk/stage2.c: modified // -> /* ... */ Thu Jun 09 2005 13:13:49 zimmerma -- r795 * trunk/sp.h: modified removed C++-style comments Thu Jun 09 2005 13:12:34 zimmerma -- r794 * trunk/Makefile.am: modified added longlong.h in noinst_HEADERS Thu Jun 09 2005 12:05:05 dnewman -- r793 * trunk/stage2.c, trunk/ecm-impl.h, trunk/ecm_ntt.c: modified Added -treefile support to ntt_PolyFromRoots_Tree and ntt_polyevalT Tue Jun 07 2005 19:34:46 kruppa -- r792 * trunk/rho.c: modified Minor cleanups (no change in functionality) Tue Jun 07 2005 19:32:43 kruppa -- r791 * trunk/mpmod.c, trunk/Fgw.c, trunk/ecm.c, trunk/ecm-impl.h: modified Interface to Woltman's GWNUM stage 1 for ECM Wed May 18 2005 11:56:07 kruppa -- r790 * trunk/mpzspv.c: modified #ifdef'd an malloc_usable_size() Sun Apr 24 2005 13:40:50 dnewman -- r789 * trunk/mpzspv.c, trunk/configure.in: modified Check for malloc_usable_size in autoconf as not all libc's have it. Sat Apr 23 2005 04:57:32 zimmerma -- r788 * trunk/getprime.c: modified improved getprime main loop Mon Apr 11 2005 16:12:23 dnewman -- r787 * trunk/ntt_gfp.c: modified Changed large length DIT's to use a recursive algorithm, rather than scramble + DIF + scramble. Sun Apr 10 2005 14:50:51 kruppa -- r786 * trunk/TODO: modified Added sliding window multiplication for ECM entry Thu Apr 07 2005 16:07:06 dnewman -- r785 * trunk/mpzspv.c: added * trunk/spm.c, trunk/ntt_gfp.c, trunk/stage2.c, trunk/sp.h, trunk/spv.c: modified Changed mpzspp to mpzspv and mpzp to mpzv. Added mpzspv_verify and lots of mpzspv assertions. Minor speedup by using valloc rather than malloc for sp coeff alignment. Speedup for ntt_PolyFromRoots. Memory reduction and speedup for ntt_polyevalT, also fixed a memory leak. mpzspv_to_mpzv no longer clobbers the input. Minor cosmetic changes. Fixed (probably) and documented the upper bound on sp_num in mpzspm_init. Check for some malloc errors. Thu Apr 07 2005 15:48:30 dnewman -- r784 * trunk/mpzspp.c: deleted * trunk/TODO.sp, trunk/mpzspm.c, trunk/Makefile.am, trunk/ecm-impl.h, trunk/ecm_ntt.c: modified Renamed mpzspp.c to mpzspv.c Tue Apr 05 2005 17:43:10 kruppa -- r783 * trunk/main.c: modified Added -faccmd option, compiled in only if WANT_FACCMD is defined Sat Apr 02 2005 00:40:57 dnewman -- r782 * trunk/stage2.c, trunk/sp.h, trunk/ecm-impl.h, trunk/mpzspp.c, trunk/ecm_ntt.c, trunk/polyeval.c: modified Added a preliminary version of ntt_polyevalT (without treefile support) and a couple of helper routines in mpzspp.c. Minor change to alignment of sp_invF. Un-static'd TUpTree in polyeval.c so ntt_polyevalT can use it. Minor change to thresholds in sp.h. Fri Apr 01 2005 08:15:57 kruppa -- r781 * trunk/NEWS: modified Added news entries for 6.0.1 Thu Mar 31 2005 21:57:41 dnewman -- r780 * trunk/TODO.sp, trunk/stage2.c, trunk/sp.h, trunk/ecm-impl.h, trunk/mpzspp.c, trunk/ecm_ntt.c: modified Adjusted functions in ecm_ntt.c so mpzspm_init is now only called once (ever). Abandoned the "automatic transform" idea. Rewrote ntt_PolyInvert and saved a transform per level. Rewrote ntt_PrerevertDivision to use a cached transform of 1/F. Fixed potential bug in mpzspp_normalise. Updated TODO.sp Thu Mar 31 2005 21:50:35 kruppa -- r779 * trunk/INSTALL: modified Added detail to, removed typo from Win install instruction Thu Mar 31 2005 19:35:36 fousse -- r778 * trunk/INSTALL: modified Typo. Thu Mar 31 2005 19:22:16 kruppa -- r777 * trunk/INSTALL: modified Added install instructions for Windows/MinGW Wed Mar 30 2005 15:53:29 kruppa -- r776 * trunk/README: modified Updated Note on ECM extra smoothness Wed Mar 30 2005 15:34:47 kruppa -- r775 * trunk/rho.c: modified Remove GSL dilog_series code (is GPL, not LGPL). Changed EXTRA_SMOOTHNESS to 23.4 (Montgomery's value) Mon Mar 28 2005 18:07:54 kruppa -- r774 * trunk/TODO: modified Added shell command on event entry Mon Mar 28 2005 18:05:52 kruppa -- r773 * trunk/main.c: modified Fix segfault when parsing B2, work around MinGW scanf() bug Mon Mar 28 2005 18:04:57 kruppa -- r772 * trunk/stage2.c, trunk/rho.c: modified Free rhotable memory at end of stage 2 Wed Mar 23 2005 19:45:30 zimmerma -- r771 * trunk/ecm-impl.h: modified added missing macro Wed Mar 23 2005 19:36:35 zimmerma -- r770 * trunk/pp1.c, trunk/ecm.c, trunk/mul_lo.c: modified added comments and normalization when FULL_REDUCTION is not defined Wed Mar 23 2005 17:07:25 kruppa -- r769 * trunk/TODO: modified Added negative i0, composite d2, avoiding reallocs, getting bestD() out of stage 2. Added release targets for some entries. Tue Mar 22 2005 15:02:43 zimmerma -- r768 * trunk/bestd.c, trunk/TODO: modified added one TODO item added one dF value (600600) Mon Mar 14 2005 01:24:18 dnewman -- r767 * trunk/ntt_gfp.c, trunk/sp.h, trunk/spv.c, trunk/mpzspp.c, trunk/ecm_ntt.c: modified Started work on a framework for transform caching - now mpzspp's should automatically cache transforms and normalise. Added mpzspp_[to/from]_ntt functions to facilitate this and help simplify ntt_PrerevertDivision. Fixed an incorrect assertion in spv_mul. Sat Mar 12 2005 19:24:44 dnewman -- r766 * trunk/sp.h, trunk/mpzspp.c, trunk/ecm_ntt.c: modified mpzspp_t's are now passed by reference instead of by value. Tue Mar 08 2005 17:22:18 dnewman -- r765 * trunk/listz.c: modified Fixed a bug in list_neg Tue Mar 08 2005 13:50:34 zimmerma -- r764 * trunk/listz.c: modified fixed potential bug in list_neg Tue Mar 08 2005 13:35:27 dnewman -- r763 * trunk/ecm.xml: modified Corrected Dave Newman's email address from firstname.lastname@... to david.lastname@... Tue Mar 08 2005 01:21:37 dnewman -- r762 * trunk/sp.h, trunk/ecm_ntt.c: modified Unrolled recursion in ntt_PolyInvert and added a DEBUG block to verify it gives the right answer. Adjusted NTT_POLYINVERT_THRESHOLD in sp.h Mon Mar 07 2005 11:57:47 dnewman -- r761 * trunk/sp.h, trunk/ecm_ntt.c: modified Rewrote ntt_PolyInvert. This seems to stop the "Found input number N" errors. Adjusted POLYINVERT_NTT_THRESHOLD accordingly. Sun Mar 06 2005 19:21:41 kruppa -- r760 * trunk/ecm.xml: modified Small corrections, Sch"onhage now spelled with oe. Sun Mar 06 2005 13:29:07 zimmerma -- r759 * trunk/TODO, trunk/configure.in: modified changed version to 6.1 added item in TODO Sat Mar 05 2005 21:29:25 zimmerma -- r758 * trunk/resume.c, trunk/main.c, trunk/eval.c, trunk/ecm.h: modified patch for Apple ('\r' instead of '\n' for newline) Fri Mar 04 2005 20:46:07 kruppa -- r757 * trunk/README.lib, trunk/main.c, trunk/INSTALL: modified Small correction to --help output (missing abs bars in -base2 option) Fri Mar 04 2005 13:05:24 kruppa -- r756 * trunk/Fgw.c: modified Added copyright notice for GWNUM based code Fri Mar 04 2005 12:33:06 dnewman -- r755 * trunk/spm.c, trunk/ntt_gfp.c, trunk/sp.c, trunk/longlong.h, trunk/mpzspm.c, trunk/sp.h, trunk/spv.c, trunk/ecm_ntt.c, trunk/mpzspp.c: modified added licence headers Fri Mar 04 2005 12:07:47 dnewman -- r754 * trunk/ecm-impl.h: modified added ecm-specific ntt functions Fri Mar 04 2005 11:51:38 dnewman -- r753 * trunk/stage2.c: modified added HAVE_NTT option to enable sp code, changed dF to be always a power-of-two Fri Mar 04 2005 11:49:40 dnewman -- r752 * trunk/spv.c: added added spv.c Fri Mar 04 2005 11:49:29 dnewman -- r751 * trunk/spm.c: added added spm.c Fri Mar 04 2005 11:49:04 dnewman -- r750 * trunk/ntt_gfp.c: added added ntt_gfp.c Fri Mar 04 2005 11:48:47 dnewman -- r749 * trunk/mpzspp.c: added added mpzspp.c Fri Mar 04 2005 11:48:29 dnewman -- r748 * trunk/mpzspm.c: added added mpzspm.c Fri Mar 04 2005 11:48:08 dnewman -- r747 * trunk/longlong.h: added added longlong.h (copy from gmp-4.1.4) Fri Mar 04 2005 11:47:33 dnewman -- r746 * trunk/listz.c: modified removed static from list_mul and list_neg so the sp code can use them Fri Mar 04 2005 11:46:23 dnewman -- r745 * trunk/ecm_ntt.c: added added ecm_ntt.c Fri Mar 04 2005 11:46:02 dnewman -- r744 * trunk/TODO.sp: added added TODO.sp Fri Mar 04 2005 11:45:34 dnewman -- r743 * trunk/Makefile.am: modified added sp sources Fri Mar 04 2005 10:39:30 dnewman -- r742 * trunk/sp.c: added added sp.c Fri Mar 04 2005 10:39:12 dnewman -- r741 * trunk/sp.h: added added sp.h Fri Mar 04 2005 09:11:07 zimmerma -- r740 * trunk/README: modified added note about GMP thresholds Wed Mar 02 2005 22:46:58 zimmerma -- r739 * trunk/tune2.c: modified added ECM_STDOUT, ECM_STDERR Wed Mar 02 2005 22:06:00 zimmerma -- r738 * trunk/schoen_strass.c, trunk/random.c, trunk/main.c, trunk/listz.c, trunk/stage2.c, trunk/ecm2.c, trunk/eval.c, trunk/ks-multiply.c, trunk/polyeval.c: modified small changes for _MSC_VER prints memory usage (with -v) prints argv[0] instead of "ecm" in case of error Wed Mar 02 2005 22:02:34 zimmerma -- r737 * trunk/TODO: modified added item (-maxmem) Wed Mar 02 2005 22:02:06 zimmerma -- r736 * trunk/README: modified added "10. Known problems" Wed Mar 02 2005 22:01:17 zimmerma -- r735 * trunk/AUTHORS: modified added beta-testers Wed Mar 02 2005 21:30:08 kruppa -- r734 * trunk/README: modified Mention default S for Fermat numbers (S=1 or 2). "he thinks" -> "it thinks". Wed Mar 02 2005 12:37:53 zimmerma -- r733 * trunk/main.c: modified put back "Run xxx out of yyy" in loop mode Wed Mar 02 2005 12:24:42 zimmerma -- r732 * trunk/TODO: modified added new item Tue Mar 01 2005 21:52:27 kruppa -- r731 * trunk/rho.c: modified Fixed array out-of-bounds access, reported by Jon Becker Tue Mar 01 2005 21:51:31 kruppa -- r730 * trunk/b1_ainc.c, trunk/pm1.c, trunk/pp1.c, trunk/ecm.c: modified Calculation of default B2 in ecm(), pm1() or pp1() would overwrite caller's B2min, B2 (mpz_t), using local copies now. Auto-increment returns rounded B1. Fixes bug reported by "Phil MjX" on mersenneforum.org Tue Mar 01 2005 10:19:55 zimmerma -- r729 * trunk/test.ecm: modified simplified test so that it takes less time (overall gain of about 3) Tue Mar 01 2005 08:30:01 zimmerma -- r728 * trunk/mpmod.c, trunk/ecm.c, trunk/tune.c: modified fix to link problem for "tune" Mon Feb 28 2005 16:07:28 kruppa -- r727 * trunk/stage2.c, trunk/configure.in, trunk/polyeval.c: modified Checks for snprintf(), if not available falls back to sprintf() Mon Feb 28 2005 13:11:10 kruppa -- r726 * trunk/Fgw.c: modified Removed original gwnum dword->fft conversion code Mon Feb 28 2005 11:40:55 zimmerma -- r724 * trunk/ChangeLog, trunk/README.dev: modified updated ChangeLog/README.dev 2005-02-28 paul * release version 6.0 * ecm.xml: updated -primetest 2005-02-27 alex * pm1.c, pp1.c, ecm.c: For Fermat numbers, S=1 (S=2 for P-1) is now default * Makefile.am, configure.in, ecm-impl.h, main.c, mpmod.c, schoen_strass.c, Fgw.c: More changes for linking gwnum 2005-02-24 alex * Fgw.c, configure.in: Changes for linking gwnum (incomplete) * main.c: Added option -h for getting help * ecm.xml: More cleanup 2005-02-23 paul * auxlib.c, ecm-impl.h, ecm.c, ecm2.c, ks-multiply.c, pm1.c, pp1.c, stage2.c, tune.c, tune2.c: hopefully fixed the wrap-around bug of cputime() * INSTALL, NEWS, README, configure.in: changed version to 6.0 removed "Known problems" in README (were obsolete) * random2.c, smartprp.c, trial.c, auxi.c, b1_ainc.c, candi.c, eval.c, getprime2.c, main.c: fixed copyright years/names 2005-02-23 alex * ecm.xml: Some inconsistent typesetting fixed * ecm.xml: added -base2, -timestamp, -savea, different loglevels 2005-02-23 paul * ChangeLog, Makefile.am, README, README.lib, auxi.c, configure.in, ecmfactor.c, factor.c, pp1.c, smartprp.c, tune2.c: gmp -> GMP fixed one memory leak added missing (L)GPL headers 2005-02-22 paul * Makefile.am, README: forgot to install ecm.h * AUTHORS, ChangeLog, INSTALL, Makefile.am, NEWS, README, README.lib, TODO, ecm-ecm.h, ecm-impl.h, ecm.xml, eval.c, mpmod.c, mul_lo.c, polyeval.c, tune2.c: changes after remarks from Torbjo"rn on rc4 * INSTALL, Makefile.am, auxi.c, candi.c, eval.c, main.c, memory.c: don't install tune*, ecmfactor fixed problems with -DMEMORY_DEBUG 2005-02-22 paul * INSTALL, Makefile.am, auxi.c, candi.c, eval.c, main.c, memory.c: don't install tune*, ecmfactor fixed problems with -DMEMORY_DEBUG 2005-02-22 alex * stage2.c: bestD() overwriting original B2 caused B2 to keep ever growing. Fixed * README, bestd.c: Corrected computation of effective B2 value in bestD(). It did not account for rounding up due to integer block size. This changes computed probabilities, table in README updated accordingly * main.c: Added option -timestamp * INSTALL: On SunOS, the compiler and flags must be specified at configure time, or testing for GMP lib/header version match fails * TODO: Removed -base2 entry, it's done 2005-02-22 laurent * Makefile.am: Distribute ecm.xml too. 2005-02-22 paul * factor.c, main.c: fixed memory leaks * listz.c: moved declaration outside a #ifdef 2005-02-22 alex * bestd.c, rho.c, stage2.c, README: bestD() computes new effective B2 value. ecmprob() now assumes group order divisible by 24 on average (README updated). * auxlib.c: outputf() was missing the va_end(). Not sure if it is really needed (i.e. to avoid memleaks), but it's cleaner to have it 2005-02-22 paul * TODO, auxlib.c: fixed problem under MinGW (thanks Japke Rosink) 2005-02-22 alex * main.c: Added #include , missing it causes segfault on Amd64 because sizeof(int) != sizeof(char*) 2005-02-22 paul * ecmprob.magma: added some results 2005-02-21 alex * stage2.c, main.c, pm1.c, pp1.c, ecm.c, ecm.h, factor.c, bestd.c, ecm-impl.h: Changed B2, B2min to mpz_t, can now be arbitrarily large so long as B2-B2min < ~1e24. 2005-02-19 paul * pm1.c, pp1.c: put back x0=... in normal verbose mode (P-1/P+1) 2005-02-19 alex * ecm2.c, main.c, mpmod.c, pm1.c, pp1.c, ecm-impl.h, ecm.c: Added option -base2. Added option -savea (appending to save files). Fixed bug in ecm2.c multiplyWn() debugging code * pp1.c, stage2.c, pm1.c, ecm.c, ecm-impl.h: Added another verbosity level: RESVERBOSE, enabled with "-v -v". Prints intermediate residues (after stage 1 etc), mere "-v" does not anymore 2005-02-18 paul * pp1.c: fixed problem with |S|=1 (was: pp1_mul did modify the exponent passed) 2005-02-18 alex * pp1.c: A bug for |S| > 1 fixed, |S| == 1 still mysteriously broken * stage2.c, bestd.c, ecm-impl.h, ecm.h, ecm2.c, main.c, pm1.c, pp1.c: Using mpz_t for i0, s and related vars. Eliminates most overflow conditions. Currently, P+1 with S=1 does not work correctly, to be fixed later 2005-02-17 alex * ChangeLog: Added treefile, double sieve, new ECM root generation that reduces extgcds * TODO: treefile is done, removed 2005-02-17 paul * ecm-impl.h, ecm.c, pp1.c, ChangeLog, TODO: changed default poly. choice in P+1 (now same as in ECM) 2005-02-17 alex * stage2.c: Fixed segfault: n could get mpz_clear'd without having been inited Fixed overflow condition in fin_diff_coeff() by using mpz_t's. Makes test.pp1 work with |S| > 1. Small comment changes. * pm1.c: Changed printf to outputf, #ifdef DEBUG to WANT_ASSERT and exit() to return with error status 2005-02-17 paul * ecmprob.magma, pm1.c, pp1.c, stage2.c, ecm-impl.h, ecm.h: finished Brent-Suyama's extension for P+1 2005-02-16 paul * ecm-impl.h, ecm2.c, pp1.c, stage2.c: Brent/Suyama for P+1 (cont'd) 2005-02-16 alex * ecm2.c: Replaced printf by outputf 2005-02-16 paul * stage2.c, ecm-impl.h, ecm2.c, pp1.c: preliminary code for Brent/Suyama's extension in P+1 2005-02-15 alex * ecm.c: Increased thresholds for high Brent-Suyama degrees * README: A small correction about mem saving with treefiles, and typo * ecm-impl.h, pp1.c, stage2.c, bestd.c: My latest attempt to get bestD() right: new conditions for i0 and i1, and fixed init of progressions if i0 != 0 (mod d2) 2005-02-14 alex * polyeval.c, stage2.c: Small cleanups of treefile code 2005-02-13 alex * README: Added description of -treefile, plus some small changes 2005-02-11 paul * random.c: commented the outputf calls in non-library mode 2005-02-11 alex * bestd.c, random.c, stage2.c, TODO: Small changes to use ECM_ERROR and outputf(), updated TODO 2005-02-11 paul * polyeval.c: removed unused variable 2005-02-11 alex * pp1.c, stage2.c, main.c, pm1.c, polyeval.c, ecm.h, factor.c, listz.c, ecm-impl.h, ecm.c: Storing product tree of F in files pretty much works now. Some cleanups tbd 2005-02-10 alex * test.ecm: Small comment change 2005-02-09 paul * pp1.c, stage2.c, README.lib, ecm.c, ecm.h, factor.c, main.c, pm1.c: updated README.lib now use the library function ecm_factor() in main.c too! 2005-02-09 alex * schoen_strass.c: Changes for library, uses ASSERT and outputf() now 2005-02-08 paul * Makefile.am, README, auxlib.c, bestd.c, ecm-impl.h, ecm2.c, ecmfactor.c, main.c, mul_lo.c, trial.c, tune.c, tune2.c: further cleanup * README.lib: documentation for libecm * pm1.c, pp1.c, stage2.c, factor.c, listz.c, main.c, mpmod.c, ecm-impl.h, ecm.c, ecm.h, ecm2.c, auxlib.c: got rid of verbose parameter through outputf() interface * random.c, ecm.h, ecm2.c, ecmfactor.c, factor.c, main.c, pm1.c, pp1.c, auxi.c, ecm-ecm.h, ecm-impl.h, ecm.c: further work for library interface * mul_lo.c, stage2.c, main.c: fixed problems to please icc * test.pm1: added test for step 2 primes near B2min * Makefile.am, ecm-ecm.h, ecm-impl.h, ecm.c, ecm.h, factor.c, main.c, pm1.c, pp1.c, resume.c, stage2.c: preliminary new file factor.c for library interface 2005-02-07 alex * rho.c: Changed error testing/handling to ASSERT. 2005-02-07 paul * auxlib.c, bestd.c, ecm-impl.h, ecm2.c, getprime.c, ks-multiply.c, mpmod.c, rho.c, stage2.c, toomcook.c: got rid of xmalloc() [now allocation errors are signaled to caller] replaced abort, exit, stdout, stderr, printf, gmp_printf... * schoen_strass.c, configure.in: check for __gmpn_mod_34lsub1 in configure, and wrote a replacement for it when it does not exist * auxlib.c: use ECM_STDOUT/ECM_STDERR * auxi.c, ecm-ecm.h: removed gcd in auxi.c, ecm-ecm.h * ks-multiply.c, listz.c, lucas.c, median.c, mpmod.c, mul_lo.c, pm1.c, polyeval.c, pp1.c, rho.c, schoen_strass.c, stage2.c, tune2.c, TODO, auxlib.c, bestd.c, ecm-ecm.h, ecm-impl.h, ecm.c, ecm2.c, getprime.c: removed FILE arguments 2005-02-06 alex * ecm-ecm.h, ecm-impl.h, ecm.c, main.c, stage2.c, auxlib.c: Encapsuled output control in outputf() function, made stage2.c and ecm.c use that function * rho.c: For B1=1, computation would go into very long loop. Fixed * stage2.c: Removed experimental SAVE_TREE code, put back in for pow() etc. 2005-02-05 alex * rho.c, stage2.c: Got rid of finite() in stage2.c, made sure ecmprob() does not return negative values 2005-02-04 paul * TODO: put proposal from Karim Belabas * schoen_strass.c, stage2.c, toomcook.c, mul_lo.c, polyeval.c, rho.c, median.c, listz.c, ks-multiply.c, TODO, ecm-impl.h: more changes to catch errors and specify output/error streams * ecm-impl.h, stage2.c, test.pm1: changed one P-1 test (B2 was too near from 2^53, so that B2' overflowed) 2005-02-04 alex * bestd.c: Fixed bug in bestd() ...again (following PaulZ's suggestion) 2005-02-04 paul * bestd.c, mul_lo.c: added FIXME in bestd.c changed mul_lo.c to use table computed by tune2.c * tune2.c: fixed pb with return value * median.c, mpmod.c, pm1.c, polyeval.c, pp1.c, stage2.c, ecmfactor.c, getprime.c, listz.c, lucas.c, main.c, ecm2.c, ecm-impl.h, ecm.c, ecm.h: put back changes from Alex (rev 1.57 to 1.58 of pm1.c) that I removed by error added control for output/error streams changed exit(1) into error return values 2005-02-03 paul * TODO, configure.in: updated TODO removed useless message in configure 2005-02-03 alex * ecm-impl.h, ecm2.c, mpmod.c, pm1.c, pp1.c: Replaced UNUSED by ATTRIBUTE_UNUSED, following GMP convention (see gmp-impl.h) * TODO: Montgomery's double sieve is pretty much implemented now * bestd.c: Fixed bug in bestd() which sometimes caused too small d values to be chosen, thus failing to test entire [B2min,B2] interval * auxlib.c: SunOS needs time.h for CLOCKS_PER_SEC 2005-02-03 paul * pm1.c, polyeval.c, pp1.c, random.c, rho.c, schoen_strass.c, stage2.c, toomcook.c, mpmod.c, mul_lo.c, ecm2.c, getprime.c, ks-multiply.c, listz.c, lucas.c, median.c, auxlib.c, bestd.c, ecm.c: changed header from GPL to LGPL added missing years if any * ecmfactor.c: improved readability 2005-02-03 laurent * median-aux.c: Removed unused file median-aux.c. * ecm.xml: ecm.xml validates again. 2005-02-03 paul * Makefile.am: added missing headers * getprime.c: added main() to compile with -DMAIN * countsmooth.c: added missing #ifndef and url for primegen 2005-02-03 alex * schoen_strass.c: Made F_mod_*() static, hopefully to please icc 2005-02-03 paul * auxlib.c, ecm-ecm.h, ecm-impl.h, ecmfactor.c, getprime2.c, nbdigits.c, random.c, random2.c: new files for library * stage2.c, toomcook.c, trial.c, tune.c, tune2.c, smartprp.c, resume.c, rho.c, schoen_strass.c, mul_lo.c, pm1.c, polyeval.c, pp1.c, ks-multiply.c, listz.c, lucas.c, main.c, median.c, mpmod.c, ecm.h, ecm2.c, eval.c, getprime.c, bestd.c, candi.c, configure.in, countsmooth.c, ecm-gmp.h, ecm.c, Makefile.am, auxi.c, b1_ainc.c: complete rewrite to separate library/frontend * test.pp1, check.mpl, ecm.h, listz.c, stage2.c: modified one P+1 test * ecm-gmp.h, pm1.c, tune.c, tune2.c: fixed warnings (with -W -Wall -pedantic -Wmissing-prototypes) 2005-02-02 alex * ecm2.c, pm1.c, stage2.c, ecm.h: Cleanup of ecm/pm1_rootsF code. Small speedup in pm1_rootsF init. 2005-02-02 paul * Makefile.am, ecm.h, mul_lo.c, tune.c, tune2.c: added new file tune2.c for tuning mpn_mul_lo_n() * ecm-gmp.h: added missing MPN_OVERLAP_P * ecm.xml: changed email for Jim * AUTHORS: added item for Jim * AUTHORS: mungled email addresses * README, configure.in, ecm-gmp.h, ecm.h, ks-multiply.c, mpmod.c, mul_lo.c, pm1.c, tune.c: added --enable-assert to configure got rid of WANT_GMP_IMPL * README, ecm.xml: added -prp options in ecm.xml cleaned README * main.c, pm1.c, stage2.c, eval.c: more cleanup * README, bestd.c, countsmooth.c, ecm.c, ecm.h, eval.c, getprime.c, main.c, mpmod.c, pm1.c, pp1.c, trial.c: more cleanup (fixed icc warnings) * Makefile.am, README, TODO, configure.in, ecm.xml, mpmod.c: updated README added target tune in Makefile 2005-02-01 alex * pp1.c, stage2.c, ecm.h: Printing of mul count in rootsF/G for P+1 put back in * ecm2.c: Better selection of number of parallel progressions in ecm_rootsF(), avoids unreasonably long init times 2005-02-01 paul * main.c, pm1.c, pp1.c, test.ecm, README, candi.c, ecm.c, ecm.h: made -go work for P+1 and ECM 2005-02-01 alex * pm1.c: Merge errors. :( Fixed * ecm2.c, pm1.c: Put counting of muls/extgcds when computing roots of F/G back in. Only printed with "-v -v" (or more -v) 2005-02-01 paul * ChangeLog, Makefile.am, README.dev, TODO, configure.in, ecm.xml, main.c: updated TODO, ChangeLog, ecm.xml * AUTHORS: obscured my email 2005-02-01 alex * AUTHORS: Updated (and obscured) my email address 2005-02-01 paul * configure.in: avoid AC_CHECK_LIB 2005-02-01 alex * INSTALL: Minor corrections 2005-02-01 paul * configure.in: check mpn_mul_fft *after* LDFLAGS/LIBS are defined * TODO, ecm.c, ecm.h, main.c, pm1.c, pp1.c, stage2.c, ChangeLog, README: removed -ticdelay option (was unsupported) * INSTALL, Makefile.am, ecm.xml: added man page (ecm.1) in distribution added install instruction in INSTALL * auxi.c: added missing years * ecm.xml: added missing * INSTALL, ecm.xml: modified INSTALL for new configure/make 2005-01-31 paul * bestd.c, ecm.h, ks-multiply.c, listz.c, median.c, resume.c, toomcook.c: code cleanup (removed muls count, unused code, ...) * AUTHORS, README, ecm.xml, ks-multiply.c, listz.c: improved documentation 2005-01-31 alex * resume.c: Fixed possible unterminated string 2005-01-31 paul * Makefile.am, README.dev, configure.in: simplified LDFLAGS/LIBS * README.dev, ecm.1, ecm.xml: source documentation is now ecm.xml (docbook) * main.c: updated champion's digits 2005-01-28 paul * test.ecm: removed last line 2005-01-28 laurent * configure.in: Configure.in needs to set LDFLAGS. 2005-01-28 paul * ks-multiply.c, listz.c, ecm.h: fixed bug (MPN_COPY requires size > 0) * ks-multiply.c, test.ecm, test.pm1, test.pp1: check non-zero size in MPN_COPY 2005-01-28 laurent * ecm.h, main.c, resume.c: VERSION is defined in configure.in now. 2005-01-28 paul * pp1.c, schoen_strass.c, stage2.c, Makefile.am, README, TODO, bestd.c, configure.in, ecm.c, ecm.h, ecm2.c, eval.c, listz.c, lucas.c, main.c, mul_lo.c, ntl.c, pm1.c, polyeval.c, polyz.c: removed counting of multiplications removed use of NTL updated TODO 2005-01-27 paul * AUTHORS, ks-multiply.c: filled AUTHORS file * main.c: typo * README.dev: need aclocal too * Makefile: removed -> switch to autotools * AUTHORS, NEWS, README.dev: new files for autotools * ecm2.c, ks-multiply.c, listz.c, stage2.c: fixed a few memory leaks 2005-01-27 alex * pm1.c: Fixed mem leak (pointed out by PaulZ) 2005-01-26 paul * INSTALL, README, b1_ainc.c, main.c: updated README * ChangeLog, mpmod.c: modified ChangeLog added special base2mod for Fermat numbers 2005-01-26 laurent * Makefile.am, configure.in: Rules cleaning. * Makefile.am, configure.in: More autotools voodoo, borrowed from MPFR. 2005-01-26 paul * listz.c, mpmod.c, pm1.c, schoen_strass.c, INSTALL: further cleaning of "unused" variables * bestd.c, ecm.c, ecm.h, listz.c, main.c, pm1.c, pp1.c: removed unused code and variables 2005-01-25 paul * bestd.c, ecm.h, ks-multiply.c, listz.c: fixed a problem in bestD changed version to 5.2.0 implemented wrap-around trick in division 2005-01-17 laurent * Makefile.am: Add missing source file. * bestd.c: No need to bother with values.h, float.h is enough and seems to work just as well. 2005-01-15 laurent * auxi.c: Correct call to getrusage. 2005-01-14 alex * ecm.h, pm1.c, pp1.c, stage2.c, ecm.c: Expected time to find a factor printed at end of stage 2 2005-01-07 paul * redc.asm: added copyright header * mpmod.c, redc.asm, Makefile: added assembly support for redc 2005-01-05 paul * mpmod.c: renamed ecm functions with mpz_ or mpn_ prefix 2005-01-04 paul * mpmod.c: improved mpz_mod_n 2004-10-24 alex * rho.c: Added missing prototypes * ecm.h, rho.c, stage2.c, Makefile: Dickman's rho function for computing ECM's probability of success. Is printed for various factor sizes with -v parameter. 2004-10-12 alex * ecm-gmp.h: Replaced #if ... /#else #if ... by /#elif ... * schoen_strass.c: Fixed checksum debugging code for NOPAD case 2004-10-06 paul * mul_fft.c: this is the mul_fft.c code from GMP (gmp-cvs-20040917 patched) * main.c: changed default k to 2 2004-09-29 paul * ks-multiply.c, listz.c, stage2.c: added wrapmul in ks-multiply.c (not used so far) removed check for number of multiplies in stage2.c (not possible with KS) 2004-09-28 alex * stage2.c: Print final residue (product of polyeval output) if verbosity >= 3, i.e. "-v -v" * schoen_strass.c, mpmod.c, Fgw.c, Makefile, ecm.h: Interface to use George Woltman's gwnum library for Fermat numbers 2004-09-23 alex * mpmod.c: Stupid bug: = instead of == in comparison. :( 2004-09-21 paul * mpmod.c: fixed misusage of mpn_mul_fft() 2004-09-21 alex * ecm.h, mpmod.c, schoen_strass.c: Use of GMP FFT in mpmod.c self-contained now, and used only for exponent >=32768. Use of GMP FFT added to schoen_strass.c, but currently disabled - misses a factor of F15. * README: Small changes to Note on Fermat numbers section 2004-09-21 paul * mpmod.c: with HAVE_FFT defined, mpres_mul() directly calls mpn_mul_fft() * README: removed one sentence 2004-09-13 alex * README: Some changes to Brent-Suyama section. Added note on factoring Fermat numbers. 2004-09-13 paul * main.c: disable Line=... messages by default * ks-multiply.c: fixed bug when m=0 * Makefile: added CFLAGS to LD (needed on Sparc) 2004-09-11 alex * schoen_strass.c, listz.c: PolyInvert() now uses a middle product for Fermat numbers * schoen_strass.c, stage2.c: Moved global var Fermat from stage2.c to schoen_strass.c 2004-09-10 alex * polyeval.c: Oops - accidentally deleted a list_mod() statement. Fixed. * polyeval.c, bestd.c: polyeval_tellegen() still used a short product instead of Schoenhage-Strassen for Fermat numbers! Fixed, approx. doubles speed for POLYEVAL and mul count matches theory now. * median.c: Additional paramter check for Fermat case of KMulGen() * ecm.h: KS_MULTIPLY was accidentally #undef'd, now #define'd again * median.c: TMulGen would call TMulKS() even if KS_MULTIPLY was undef'd. Fixed. 2004-09-09 alex * bestd.c, ecm.h, pp1.c, stage2.c: Double sieve for P+1 (does not use multiple progressions, only skips) bestD() takes d2 into account. 2004-09-09 laurent * configure.in: Verify that GMP was compiled with FFT enabled. * Makefile.am, configure.in: Actually test for GMP presence in configure. Support for test targets in Makefile.am. 2004-09-09 paul * median.c, test.pm1, ecm.h, ks-multiply.c, listz.c, main.c, bestd.c: use middle product in PolyInvert changed bestD() for new KS routines now TMulKS takes an additional parameter "rev" 2004-09-09 alex * mpmod.c, pm1.c, stage2.c, test.ecm, bestd.c, ecm.h, ecm2.c: Double sieve for generating roots of ECM and P-1, see Montgomery "Speeding", section 9. P+1 tbd. bestD() does not know about larger block length yet, tbd. 2004-09-09 paul * ks-multiply.c: malloc -> xmalloc * ks-multiply.c: fixed malloc bug * trial.c, auxi.c: fixed warning 2004-09-08 alex * listz.c: added #else to avoid warning about unreachable code by Sun CC. 2004-09-08 paul * polyeval.c, ecm2.c, ks-multiply.c, median.c, ecm.h, ecm-gmp.h: added TMulKS() [and macro FFT_WRAP, and KS_TMUL_THRESHOLD] moved ASSERT() to ecm.h fixed typo in ecm2.c * ecm.h, listz.c, median.c, polyeval.c: added list_swap fixed inefficiency in polyeval_tellegen (list_mul_high call did not reduce coefficients mod n) 2004-09-07 paul * listz.c, median-aux.c, median.c, polyeval.c, ecm.h, ks-multiply.c: cleaned up polyeval_tellegen and median.c: - converted comments in english - use functions of listz.c when possible - translate mpz_mul_ui (.., .., 2) into mpz_mul_2exp - other tiny optimizations * TODO: sorted items * pm1.c, pp1.c, main.c: default for -ticdelay is now -1 * ecm.h: added comments about #define's 2004-09-06 paul * mpmod.c: oops, modulus->bits can be negative * mpmod.c: modulus->bits is always positive * ecm.h, mpmod.c, pm1.c: now isbase2() is called with same threshold BASE2_THRESHOLD (in ecm.h) base2mod() changed to perform no division any more 2004-09-03 alex * stage2.c, listz.c: Where possible, polynomials F, G and invF are deallocated before calling polyeval(), polyeval_tellegen() or poly_gcd(). * ecm.h: POLYEVAL got set whenever POLYEVALTELLEGEN was set, overriding POLYGCD. Changed so that POLYGCD overrides both POLYEVAL and POLYEVALTELLEGEN. 2004-09-03 paul * stage2.c: experimental code to save/restore the product tree 2004-09-03 alex * schoen_strass.c, listz.c: PrerevertDivision() uses a non-zeropadded transforms for Fermat numbers if poly degree is not too large. Removed Matrix Fourier Algorithm (no faster) and testing code in schoen_strass.c 2004-09-03 paul * ks-multiply.c: added comment 2004-09-01 laurent * Makefile.am, configure.in: Autoconf'ing the project. 2004-09-01 alex * ecm.h, listz.c, schoen_strass.c: Added option for transform without zero padding to Sch"onhage-Strassen. 2004-08-31 paul * ks-multiply.c: thresholds now takes into account number size too 2004-08-31 alex * TODO: Added entry for idea of choosing roots of F and G from Montgomery "Speeding", 9.1.3 2004-08-31 paul * Makefile, ks-multiply.c, listz.c: improved PrerevertDivision when KS_MULTIPLY defined 2004-08-30 alex * Makefile, auxi.c, ecm.h, listz.c, schoen_strass.c, toomcook.c: Sch"onhage-Strassen code now can multiply monic polynomials directly. This saves adds when building polys from their roots, unfortunately the speed gain is minimal. mpz_divby3_1op() moved to auxi.c 2004-08-30 paul * ks-multiply.c: Kronecker-Scho"nhage's code, contributed by David Newman * Makefile, bestd.c, ecm-gmp.h, ecm.h, listz.c, mpmod.c, polyeval.c, stage2.c: integrated David Newman's Kronecker-Scho"nhage's code * schoen_strass.c: commented out mpz_divby3_1op (already in toomcook.c) * TODO: added one todo line (KS mult) 2004-08-26 alex * bestd.c, ecm.h, ecm2.c, stage2.c: bestd_po2() examines both B2min and B2 instead of just their difference, and has better d values for very small polynomial degrees. * resume.c: Resuming now always reduces the x-coordinate (mod N). * schoen_strass.c: Multiplication of transformed coefficients inside the MFA transform. Should save uncached memory accessed but isn't much faster in practice. * ecm.h, ecm2.c, stage2.c: Correct handling of adding identical points in addWnm() by doubling. * Makefile: Commented out Sun cc specific options * schoen_strass.c: Routines for Sch"onhage-Strassen multiplication of polynomials 2004-07-28 alex * pm1.c, pp1.c, resume.c, stage2.c, Makefile, auxi.c, bestd.c, ecm-gmp.h, ecm.h, ecm2.c, listz.c, main.c, median.c: Schoenhage-Strassen for multiplying polynomials modulo Fermat numbers. Computation of roots for F and G for ECM does several progressions in parallel, to reduce number of extgcds and, for F, reduce the number of roots computed. * eval.c: Sun cc doesn't like leading underscores in variable names. _B and _N renamed to B and N. 2004-06-24 laurent * Makefile, README, ecm.1: Fix typo in documentation and clarify the `nobase2' option. 2004-04-06 paul * bestd.c, listz.c: added Weimerskirch/Paar trick for Karatsuba (K=3) 2004-02-13 laurent * median.c: Deleted some unused lines. 2004-01-21 laurent * ecm.1: Added ecm.1 man page; needs update. 2004-01-20 laurent * ecm.h, main.c: Added usage function and changed usage output stream to stdout. 2004-01-20 jim * main.c: Fixed for MinGW builds (no res/resource.h) 2004-01-20 laurent * main.c: Fixed "-n" and "-nn" parameters for unix. 2004-01-16 jim * README, TODO, candi.c, ecm.c, ecm.h, main.c, pm1.c, pp1.c: Added -go handling. The code to use this has NOT been added to ecm_stage1() or pp1_stage1(). It has been placed into pm1_stage1(). Also, the full syntax processing has been added to main.c. Candi.c has a new structure to handle this (mpgocandi_t). can be of any valid expression form, and may contain N letter(s) as a placeholder for the current processing candidate number. * stage2.c, pp1.c, pm1.c: Modifications to allow screen percentage ticks to run through a global function. That function will only update at the proper delay, or possibly NOT show anything at all * main.c: Modifications to allow screen percentage ticks to run through a global function. That function will only update at the proper delay, or possibly NOT show anything at all The showscreenticks() and showscreenticks_change_stage() functions are in this source file. * ecm.h, ecm.c: Modifications to allow screen percentage ticks to run through a global function. That function will only update at the proper delay, or possibly NOT show anything at all * TODO: Added information about -ticdelay n (note ticdelay -1 turns off the percentage done stuff). Any other n value simply is the number of ms between percentage done stderr updates. 2004-01-12 jim * smartprp.c: Bug in the hex escape sequence parser in the -prp cmd * README: Added explanation of the escape %xH[H] to the -prp cmd * smartprp.c: Added the excape %xH[H] to the -prp cmd cmd parsing. 2004-01-12 paul * smartprp.c: new file (added for Jim) 2004-01-12 jim * ecm.h: Updated beta version and added new declarations for the smart prp function * main.c: Use the new "smart" prp function and parse the new -prp* command args * candi.c: Use the new "smart" prp function * Makefile: Updated with new smartprp.c (and updated DIST to be all .c files) * README: Added new section about exteral spawned prp app, and added to the command line section * ChangeLog: Added info about changes since 5.1-beta (Hex expressions and VC porting in 5.1.1-beta) (External PRP program spawning in 5.1.2-beta) 2004-01-12 paul * ecm.h, eval.c, main.c, pm1.c, pp1.c, candi.c: number of primality loops is now controlled by PROBAB_PRIME_TESTS (ecm.h) 2004-01-09 laurent * main.c: Fixed "warning: C++ style comments are not allowed in ISO C90" 2004-01-09 jim * auxi.c, resume.c, trial.c, main.c: Porting needed for VC to build * median.c: Port change needed to compile under VC * ecm-gmp.h: Porting needed for VC to build This latest change taken from the latest (4.1.2) GMP's gmp-impl.h file. * eval.c, candi.c, bestd.c: Porting needed for VC to build * ecm.h: Bumped up beta version number. This version has hex number handling in expressions, and is ported to VC 2004-01-06 jim * eval.c: Added handling of Hex numbers to the expression parser. * median.c: Modified to build under MinGW (and VC) * bestd.c: Modified to comiple under MinGW 2003-12-12 paul * polyeval.c: changed copyright line * Makefile, bestd.c, ecm.h, listz.c, median.c, polyeval.c, stage2.c: several improvements in polyeval_tellegen (now default) 2003-12-11 paul * ecm.h, median.c, polyeval.c, bestd.c: added option -DCHECK_MULS to check number of muls * stage2.c: added warning when estimated and computed muls differ * polyeval.c: a factor of 2 was missing in muls_tuptree (case l=m) * median.c: now muls_tgen calls directly muls_gen * listz.c: replaced calls to toomcook4/toomcook4_low/toomcook4_high by calls to LIST_MULT_N/list_mul_low/list_mul_high * ecm.h: type of multiplication (kara, toom3, toom4) is now defined in ecm.h * bestd.c: replaced muls_toom4 by muls_gen 2003-12-10 laurent * polyeval.c: Multiplication accounting is added in polyeval_tellegen. 2003-12-09 laurent * stage2.c: Timing added when using polyeval_tellegen. 2003-12-09 paul * mpmod.c: changed temp1 -> temp2 in mpres_out_str 2003-12-09 laurent * ecm.h, median-aux.c, median.c, polyeval.c: Polyeval_tellegen now uses transposed Toom Cook. 2003-12-05 alex * countsmooth.c: Reports values of lucky Brent-Suyama matches. Barely tested, beware! 2003-12-05 laurent * median.c: Used better ad hoc divisions by 2 and 3. 2003-12-04 laurent * median.c: Fixed bugs in transposed Toom Cook multiplication with weird argument sizes. 2003-12-03 alex * Makefile, countsmooth.c: computes roots at 1 (mod 6). -pm1, -blocks and -ecm flags. Makefile target. 2003-12-03 laurent * Makefile, median.c: Transposed Toom-Cook3 should work now. 2003-12-02 alex * countsmooth.c: Can use getprime() now. * countsmooth.c: Tool to count smooth values. Supports Brent-Suyama. 2003-12-01 alex * TODO: Added Colin Percivals generalized DWT, moved rootsF [j*d, i] -> [j*d+1, i] to done. 2003-12-01 laurent * ecm.h, median.c, polyeval.c: Fixed muls_tkara. 2003-11-27 paul * TODO: added new entry 2003-11-27 laurent * Makefile, bestd.c, ecm.h, median.c, polyeval.c, stage2.c: Added a new multipoint evaluation function in stage2 (polyeval_tellegen) with associated functions. #define POLYEVALTELLEGEN if you want to use it. 2003-11-26 paul * COPYING.LIB, Makefile: added COPYING.LIB (for ecm-gmp.h and memory.c) 2003-11-24 paul * test.ecm: put in sh syntax * stage2.c: comptue one more term of 1/F (needed for TupTree) 2003-11-19 paul * INSTALL: new timings for ppc 2003-11-18 paul * main.c: updated minimum sizes for potential champions * INSTALL: updated timings for ppc 2003-11-17 paul * INSTALL: added timings for ppc 2003-11-07 paul * ecm.h: new function mpres_realloc * ecm.c, ecm2.c: defined local procedures as 'static' * mpmod.c: mpz_mod_n now always assumes ALLOC(r) >= nn * stage2.c: check alloc. size of f (used as mpres_t) * test.ecm: added new tests for ecm-5.0.1 bug 2003-11-05 paul * ecm.h: changed some prototypes, added some others * toomcook.c: redirect directly to karatsuba in toom4 * test.pp1: added newline before ok * test.pm1: added newline before ok message * test.ecm: ok message made similar to other tests * stage2.c: now compute estimated number of muls for stage2 (and prints corresponding percentage) * polyeval.c: modified calls to RecursiveDivision * pm1.c: removed unused variable * mpmod.c: incorporated changes to fix ecm-5.0.1 bug (overflow in input of mpz_mod_n) * listz.c: new routines to compute short products (low and high) * check.mpl: new routines to determine numbers of muls of short products * bestd.c: completely rewritten (now determine the exact number of muls and not an estimation) * TODO: removed several done items 2003-10-31 paul * test.ecm: added test for bug in 5.0.1 2003-10-20 paul * Makefile, auxi.c, bestd.c, candi.c, eval.c: changes suggested by Laurent Fousse to enable compilation with gcc 3.3.2 * TODO: removed done item 2003-10-17 paul * ChangeLog, ecm.c, ecm.h, main.c, pm1.c, pp1.c, stage2.c, test.ecm, test.pp1: changed quiet mode (-q) to print all factors on same line (contributed by Laurent Fousse) 2003-10-16 paul * Makefile, bestd.c, ecm.h, ecm2.c, stage2.c, test.ecm, test.pm1, test.pp1: put in patch from Alex to solve "B2min too small" problem 2003-07-22 paul * TODO: added one item (-Q) * TODO: added one item * polyeval.c: fixed potential memory leak 2003-07-02 paul * bestdaux.c, candi.c, ecm2.c, eval.c, getprime.c, listz.c, main.c, ntl.c, pm1.c, stage2.c: added check for return value of malloc 2003-06-20 paul * Makefile, ntl.c: applied patches from Christian Cornelssen * stage2.c: moved check for overflow at the beginning * TODO: added suggestion from one user 2003-06-19 paul * TODO: added one item 2003-05-09 paul * main.c: smallest P+1 champion is now 37 digits 2003-04-22 alex * Makefile, mul_lo.c, stage2.c: Changes to make building with NTL work 2003-04-22 paul * INSTALL: fixed typo * test_sh.ecm, test_sh.pm1, test_sh.pp1: now replaced by test.* * test.ecm, test.pm1, test.pp1: converted from csh to sh * Makefile: now only GMP is linked statically 2003-04-16 jim * main.c: Changes for incremental saving (but commented out). * resume.c: Changes to incremental resuming (but it is commented out) * stage2.c, pp1.c: Stage 2 overflow fixes from 5.01 * pm1.c: Stage 2 overflow fixes from 5.01 Added some new logic for the incremental save/resume, but it is commented out. * ecm2.c: Stage 2 overflow fixes from 5.01 * ecm.h: Stage 2 overflow fixes (from 5.01). * ecm.c: Partial resume code commented out for now. * test_sh.ecm, test_sh.pm1, test_sh.pp1, test.ecm, test.pp1, test.pm1: Added stage2 overflow test * Makefile: Added ChangeLog to EXTRADIST section * ChangeLog: Version 5.01 additional file to ECM project * INSTALL: Added new changes which were in release 5.01 Added new information about MinGW32 timings (and compiling switches) * README: Updated to Pauls changes in 5.01 2003-04-11 paul * main.c: limit for P+1 champion is now 35 digits 2003-04-09 jim * auxi.c: Failed to set the "first time flag" and convert the double to unsigned to fix a roundoff bug. 2003-04-05 alex * mpmod.c: GMP prior to 4.1 does not have GMP_NUMB_BITS defined. Using __GMP_BITS_PER_MP_LIMB in that case. * resume.c: Bugfix: save file lines were not correct if both sigma and A value was given (a semikolon was missing). 2003-03-30 jim * README: Added section 6 "ECM-GMP Expression syntax" (and adjusted original sections 6-10 to 7-11) Added docs about b1_ainc.c candi.c trial.c eval.c and the test_sh.* files which are newly added to version 5.1 List trial div, expressions, and looping in section 2 (new major items since ecm4c). Added the -I f and -i n and -B2scale switches to section 3 (efficient use of ecm-gmp) Changed section 5 (memory usage) to list that in -b breadthfirst mode (the default for -inp), that the whole file is read into memory at one time, thus increasing memory footprint (but for good reasons). Added [-inp file] as an optional paramter (even though it is really an option), and list that the redirection of a file i.e. < file is now optional [ < file]. It is optional because of -inp file. Added -t n to "Options to control factorization method" Added -B2scale f to Options to control step 2 Added -cofdec and -ve n to Options to control output Added info about incremental saving during B1 stage, and also documented that it is currently NOT working Added options "-i n", "-I f", "-n", "-nn", "-one" and "-b -d" to Miscellaneous options section. Added information that the first known problem has been eliminated (at least I know it is gone in my MinGW32 builds). Added 2 new "known problems" which are caused by the new stderr output for the 1 line output, and for the stage1/stage2 "percentage" done screen output (which btw, the stage2 percentage done is not yet done ;) The "second" of these new known problems is not a "certain" problem. It is one that someone (Paul) needs to look into and see how the interaction of this new stderr output works with the client server, and with things like nohup & under unix (I myself don't have access to any *nix shell) * main.c, pm1.c, pp1.c, ecm.h, ecm.c: Added a B2 scaling factor (user supplied multipler for calculated B2 values) 2003-03-25 jim * ecm2.c: ecm.h was modified, but ecm2.c was missed and needed modified also. * resume.c: The incremental saving function has been neutered until it is working correctly in ecm/pm1/pp1 stage 1 functions. * main.c: Added -cofdef (forced cofactor in decimal even if an expression is "valid") Added a Cnnn "header" to the B1= .. line listing the length of the candidate (unless the candidate expression is explicitily listed) Fixed a couple of spots where the stderr 1 line output (in > redirection mode) was not erasing the prior line * auxi.c: Added high resulution timers to MinGW and MSVC builds. 2003-03-14 jim * main.c: B2min was broken due to handling of the -i n and -I f auto incrementation code. 2003-03-14 paul * main.c: smallest top-ten P-1 has now 39 digits * test.pm1: B1 was too small in new test * pp1.c, stage2.c, test.pm1, pm1.c: added check for overflow in stage2 + test * ecm.h: changed "unsigned int" into "unsigned long" for s parameter 2003-03-12 paul * Makefile: added -pedantic 2003-03-12 jim * main.c: On "usage" screen, had not renamed -a n to -i n, nor had -I f been added. Removed a C++ comment. * main.c: Removed C++ comment * resume.c, candi.c: Removed C++ comments * resume.c: Removed a couple C++ comments * pp1.c: Fixed Boo boo. * pm1.c: Fixed boo boo. * eval.c: Removed C++ comments * ecm.c: fixed boo boo. * b1_ainc.c, auxi.c: Removed some C++ comments * main.c: Changed default "shallow" mode factor finding to "deep" mode (continues to find more factors Removed the -deep command line switch Added command line switch -one (forces ECM back into shallow mode). -i file changed to -inp file. -ib file removed. -i defaults to width-first and cat file | ecm b1 defaults to depth-first searching. added a -d for depth-first processing (to complement the -b breadth-first mode). Removed -a n (constant B1 increment mode) Added -i n (constant B1 increment mode) Added -I f (Auto calculation (with scaling by f) B1 increment mode) Removed stage 1 precentage "setup" output. That is now output in the 3 stage1 functions. * README: Added new options to document, but there is very little docmentation about them yet. THIS IS STILL todo * TODO: Updated todo list with new items, and moved items that are done, or partly done * pp1.c: Finished stage 1 percentage counter * pm1.c: Finished stage1 percentage done * ecm.c: Completed percentage done in stage one, and removed 'setup' output for stage2 * stage2.c: Added "starting" code for percentage counter on stage 2 * ecm.h: Added declaration of B1 incrementation function * b1_ainc.c: Code to do auto-incrementation of B1. It works with a constant increment value, or it computes the "ideal" increment (based on current B1). It can also scale this calculated "ideal" * toomcook.c: Removed ABS define, since it is now in ecm.h * Makefile: Added new source file b1_ainc.c (does "auto" incrementation of B1 values) 2003-03-11 paul * main.c: changed P-1 champion limit * main.c, pp1.c: fixed problem with potential champions for -pp1, when it performs P-1 * ecm-gmp.h, ecm.h: moved ABS to ecm.h 2003-03-07 jim * TODO: Updated the % done item. * pp1.c, pm1.c: Added Stage 1 screen percentage updating. Added Stage 1 Auto incremental saving * main.c: Added -deep to trial factoring (actually added !(-deep) Added space after function calls to bring my code into existing coding specifications. * ecm.h: Added -deep command to trial factoring * trial.c: If we are not in -deep mode, then bail out after first factor is found * TODO: Added -qq for ultra quiet mode (i.e. for running under the client/server) * eval.c: Fixed bugs introduced removing C++'ism. The max val needed to be adjusted before the realloc, or we would simply realloc the same sized buffer (and then overwrite it) * main.c: Placed the fprintf(T:000) back where it needed to be. Remove any temp AutoSaved B1 file since it is no longer needed. * candi.c: Wrong function title in the validation check logic. * ecm.c: Added "plumbing" for incremental AutoSaving of B1 (every 15 minutes) !!NOT YET WORKING CORRECTLY!! Changed 1:000 percentage screen updates to once every 30 seconds. 2003-03-07 paul * Makefile, ecm.h: changed version to 5.1-beta 2003-03-07 jim * resume.c: Added const to char * in resume, and put an else so that sigma AND A could not both be written Created write_temp_resumefile() and kill_temp_resume_file() functions. * ecm.h: Added defines for write_temp_resumefile and kill_temp_resume_file (found in resume.c) Added spaces to my function declarations to more conform to the ecm standard. * eval.c: Added a strnicmp() function (in VC and MinGW, we use the native version) * TODO: Changed incremental saving and Percentage done to be partially done 2003-03-07 paul * ecm-gmp.h, ecm.c, eval.c, main.c, trial.c: fixed a few C++-specific idioms 2003-03-07 jim * ecm.c: Stage 1 ECM code for 1:000 to 1:100 done and working fine. * trial.c: The "testing code" for the T:000 to T:100 was not quite in the right place 2003-03-06 jim * main.c: Added some code to do T:000 to T:100 (and 1: 2:) "percentage done" output to stderr (i.e. dummy lights) Changed Factors= to factors= on the stderr line in Breadthfirst mode, now show "loop count" stderr line on first loop. in Breadthfirst mode, show the current line processing/total lines in file on the output line. * eval.c: Added C++ style sinle line comment to expression parser Now lines starting with '#' is a comment, and from where ever a // is found, to the end of a line is a comment. * TODO: Added information about 80 dots and how to do that with the existing stderr output of the lines/loops * trial.c: Added code to do T:000 to T:100 "percentage done" output to stderr (i.e. dummy lights) * ecm.h: Updated "internal" version to current interim build 2003-03-05 jim * test_sh.pp1: P+1 Test script file (for borne shell) * test_sh.pm1: P-1 Test script file (for borne shell) * test_sh.ecm: ECM Test script file (for borne shell) * dummy: Bye bye dummy * dummy: Test for PauZ (and help for me) 2003-03-05 paul * candi.c, eval.c, trial.c: files from Jim 2003-03-04 jim * main.c: LOT of changes. New switches: -a n (auto increment B1 after each loop) -i file (input file, not from stdin) -ib file (breadth-first looping) -b (breadth-first looping for stdin) -deep (continued factoring after a factor is found) -n (nice mode, only fully implemented in Win32) -nn (VERY nice mode, i.e. idle) -t n (trial factoring, up to n) -ve n (verbose expression printing, for expressions < n chars) The read_number() now calls the expression parser. read_number is only used to skip blank (or commented) lines. The expression parser does the rest. Most changes took place in the "looping" code, since now we loop either width first (the current 5.0 default), or "breadth-first", and since during looping the program may handle found factors either "shallow" (the current default 5.0 mode), or -deep. * auxi.c: nb_bits now const. A MUCH improved rand generator for Win32 * TODO: Expression parse done. -nice done (not -kill). a "key" added so that todo's and done's can be listed * ecm-gmp.h: alloc.h did not work with MinGW. The change was taken from a GMP header * ecm.h: Created mpcandi_t struction (for warehousing info about the candidate) Use the mpcandi_t object instead of simple mpz_t where it is needed Added functions from candi.c, eval.c, trial.c (and main.c since read_number is needed by resume code) Made nb_bits be const * resume.c: Use mpcand_t for candidate numbers Output expressions in save/resume file (if expression exists) Patches for MinGW and MSVC for machine name * Makefile: Added candi.c eval.c and trial.c to Makefile 2003-03-04 paul * main.c: check mp_bits_per_limb = GMP_NUMB_BITS * toomcook.c: rewrote comparison in toomcook4() * test.ecm: updated for new bestD() * stage2.c: use new bestD() function * main.c: new semantics of -k * README: new semantics for -k option * main.c: now default k is 0 (lower bound) * ecm.h: updated wrt changes in bestd.c * check.mpl: new functions to compute number of multiplies in step 2 * bestd.c: new code using data generated by bestdaux.c * TODO: added several new items * Makefile, bestdaux.c: added bestdaux.c, auxiliary file to determine optimal parameters for bestd.c 2003-03-03 paul * INSTALL: updated timings for Athlon and EV6 * README: added comments about probability wrt Table 1 * tune.c: print MUL_KARATSUBA_THRESHOLD and DIV_DC_THRESHOLD * main.c: error when factor found is 1 * TODO: added 3 items * mul_lo.c: put ecm.h after gmp-impl.h * ecm-gmp.h, ecm.h: now all thresholds are in ecm.h (should go after gmp-impl.h or ecm-gmp.h) * tune.c: fixed typo found by Christian Cornelssen * mpmod.c: changes from Christian Cornelssen to change thresholds * README: added info on "tune" * Makefile: use LDFLAGS for tune added tune in clean target * INSTALL: added info on Darwin 2003-02-28 paul * test.ecm: added tests for bug found by Jim * ecm.c: fixed bug found by Jim Fougeron 2003-02-24 paul * TODO: added 1 item * INSTALL, Makefile: get rid of -LNTL/... in Makefile added instructions for LDFLAGS/-D__freebsd in INSTALL 2003-02-24 alex * pm1.c: Added comment to why we use Dickson(4), Dickson(6), x^12,.. 2003-02-24 paul * TODO: updated 2003-02-23 alex * README: small changes, notably maximum possible B1 for ECM and P+1. 2003-02-23 paul * ecm.c, main.c: now prints the *exact* number of digits for large numbers * TODO, listz.c: updated TODO changed list_gcd to use 'p' only at the end * README: stage -> step * Makefile, ecm.h: version is now 5.0 * INSTALL: minor changes * memory.c: added tests_memory_status * mpmod.c: changed threshold for isbase2 * resume.c: added space 2003-02-23 alex * toomcook.c: Made indentation more Gnuish * main.c: Revered order of opening save and resume file 2003-02-23 paul * ecm.h: removed __gmp_default_free * listz.c: style editing * getprime.c: forgot to reinitialize offset * ecm.c: style editing static val -> non static * TODO: added 2 items * Makefile: CFLAGS not needed in link phase 2003-02-22 alex * listz.c, lucas.c, main.c, mpmod.c, mul_lo.c, ntl.c, pm1.c, polyeval.c, polyz.c, pp1.c, resume.c, stage2.c, test.ecm, test.pm1, test.pp1, toomcook.c, tune.c, Makefile, TODO, auxi.c, bestd.c, ecm.c, ecm.h, ecm2.c, getprime.c: Changed copyright notice * auxi.c: Small cleanup of get_random_ui() 2003-02-21 alex * main.c: Initialising comment etc. with empty string to avoid comment fields containig garbage being printed. * pp1.c: Using n^2-1 instead of n-1 for inclusion in stage 1. Unconditionally setting g to 1 afterwards to avoid including n^2-1 again later. 2003-02-20 alex * README: Small changes. Comments on default values of k and Brent-Suyama updated. * ecm.h, mpmod.c: mpres_clear, mpres_set and mpres_swap are now macros * resume.c: addef fflush() after writing save file lines to avoid partial line in case of abort * main.c: Added test for existing save file 2003-02-20 paul * test.pm1: rm -> /bin/rm * README, ecm-gmp.h: put back #include by default (unless __freebsd is defined) * ecm.h: defined macro FREE * pm1.c, pp1.c: don't print x0 when resume * memory.c: exported tests_free * main.c: __gmp_free_func -> FREE * ecm2.c: use variable for 2S+2 * ecm.c: use fprintf for error * ecm-gmp.h: use alloca when __sun is defined * auxi.c: use macro FREE * TODO: added some items * README: added note about different -save and -resume names added note about alloca.h problems * Makefile: added -static * Makefile: more changes from Granlund * Makefile, ecm.h: changed version to 5.0-beta-pl3 improved Makefile clean entry 2003-02-19 alex * auxi.c: Made changes suggested by Torbjorn * ecm2.c: Fixed bug freeing unallocated vars in ecm_rootsG_init(). (thanks again Torbjorn) * auxi.c, ecm.h, main.c: Better seed for RNG 2003-02-19 paul * Makefile, auxi.c, ecm-gmp.h: patches from Granlund for FreeBSD * Makefile, ecm-gmp.h, ecm.h, mpmod.c, test.ecm: put extract from gmp-impl.h in separate file (copyright is different) 2003-02-19 alex * ecm.c, ecm.h, ecm2.c, mpmod.c, pm1.c, pp1.c, stage2.c: P-1 selects reasonable degree for Brent-Suyama if none given by user *_roots[FG] return number of multiplies used 2003-02-19 paul * stage2.c, test.ecm, README, TODO, main.c: added -c option improved documentation * Makefile, ecm.h: changed version to ecm-5.0-beta-pl2 * Makefile, ecm.c, ecm.h, ecm2.c, mpmod.c, pp1.c, stage2.c: fixed warnings with -W * test.pm1: forced remove 2003-02-18 paul * bestd.c, ecm2.c, lucas.c, main.c, memory.c, resume.c, stage2.c: changed stream back to stdout for factors found * README, TODO: note on B2 > 100*B1 added items in TODO 2003-02-18 alex * ecm.c: The temp variable "t" was passed around, but never actually used anywhere. Removed. * ecm.h, ecm2.c, mpmod.c, pm1.c, stage2.c, test.pm1: ECM and P-1 print a message if a factor is found during the computation of the roots of F or G and verbosity is >= 2 Eliminated "comparison between signed and unsigned" warnings when compiling with -W Added a test for saving/resuming to test.pm1 2003-02-16 alex * ecm.c: Added default values for Brent-Suyama's extension for ECM. * TODO: Added a suggestion from Jay Berg, and one from me 2003-02-16 paul * INSTALL, Makefile, README, TODO, bestd.c, ecm.c, ecm.h, main.c, mpmod.c, pm1.c, pp1.c, stage2.c, test.pm1, test.pp1: fixed several issues found by Jay Berg 2003-02-16 alex * main.c: Fixed generation of random sigmas (new sigma for each input number) 2003-02-15 alex * pm1.c: cascade_mul_ui replaced by cascade_mul_d to avoid nasty bug: the power of a small prime is accumulated and *then* passed to cascade_mul_ui, so that overflow would occur for B1 >= 2^32. * resume.c: Make read_resumefile_line less ugly. Also reads Prime95 v22 ECM residues now. The users name and machine name are written when saving to file. 2003-02-14 paul * README, TODO: remarks from Jay Berg * INSTALL, README: added advertizing for ECMNET * TODO, pm1.c: fixed problem with mul_casc (powerpc630-ibm-aix5.1.0.0) * Makefile, README: added c155 in distrib * Makefile, README, ecm.h: version is back to 5.0-beta added timings in README * Makefile: missing tab * Makefile: get rid of recursive make * mpmod.c, mul_lo.c: __GMP_BITS_PER_MP_LIMB -> GMP_NUMB_BITS use inline only with gcc * INSTALL: moved comments about CC/CFLAGS * c155: test number for ecm efficiency * INSTALL: added comment about CC/CFLAGS * resume.c: changed char c to int c in freadstrn (otherwise c != EOF always true on irix64) * mpmod.c: put back reduction in mpres_add/sub * Makefile: transfer LD in recursive make * mpmod.c: fixed two problems (missing include alloca.h, add_nc not always defined) * pm1.c: include gmp-mparam.h only when WANT_GMP_IMPL * ecm.h, listz.c, polyeval.c: now list_mul_z also reduces mod modulus * pm1.c: fixed typo * mpmod.c: now mpz_mod_n takes both source and destination, to avoid copies * main.c: added -primetest option * listz.c: added function to check size of residues (DEBUG) * ecm.c: used swap in add3 to avoid copies * TODO: removed done item (-primetest) 2003-02-13 paul * Makefile, ecm.h: changed version * mul_lo.c, pp1.c, stage2.c, mpmod.c: minor editing * main.c: updated on-line help * listz.c: added missing space * cputime.h: now in auxi.c * TODO: removed 2 done items * README: updated with -resume, new files, etc * ecm.h, main.c, mpmod.c, pm1.c, pp1.c, ecm.c: implemented -nobase2 option * mpmod.c, mul_lo.c: got rid of gmp-impl.h (if WANT_GMP_IMPL not defined) * Makefile: removed -pedantic now -DPOLYEVAL is no longer needed (it is the default) * ecm.h: define MUL_KARATSUBA_THRESHOLD (if not already) define POLYEVAL (if not POLYGCD) * main.c: replaced #ifdef POLYEVAL by #ifndef POLYGCD (now POLYEVAL is the default) 2003-02-13 alex * main.c: Small change to "Usage" text: added remark that -resume can read from stdin. 2003-02-12 paul * main.c: updated on-line help 2003-02-12 alex * resume.c: I'll try to stop violating my own specs. METHOD= values changed from PM1 and PP1 to P-1 and P+1. 2003-02-12 paul * mpmod.c: improved mpz_mod_n (gain of about 10%) * test.ecm: added -k for "extra" factor * TODO: removed done items * Makefile: removed mul_hi from tune target * mpmod.c: new REDC at mpn-level, using fast mpn_mul_lo * main.c: changed default number of blocks for POLYEVAL * ecm.h: added prototype for mul_lo * README: added "how to get the best of GMP-ECM" * mul_lo.c: low-half multiplication * tune.c: to tune mpmod algorithms * Makefile: added mul_lo and tune * stage2.c: removed number of muls without -v * Makefile: replaced CXX by LD when appropriate 2003-02-12 alex * resume.c: Routines for saving/resuming residues 2003-02-12 paul * stage2.c: fixed another memory leak * stage2.c: fixed memory leak * pm1.c: added default POWM_THRESHOLD 2003-02-11 alex * test.ecm, test.pm1, test.pp1: Modified test suites to work with new command line paramters * Makefile, ecm.c, ecm.h, ecm2.c, main.c, pm1.c, pp1.c, stage2.c, test.ecm: Added -resume option. Sigma, the A paramter and starting point are now specified by the command line option -sigma, -A and -x0. 2003-02-08 paul * INSTALL: added comment about editing Makefile 2003-02-06 alex * toomcook.c: Changed copyright. 2003-02-06 paul * mpmod.c: added mpn-version of REDC * Makefile: ecm5 -> ecm * test.pm1: put missing | * Makefile: removed -static 2003-02-05 paul * TODO: added one item * mpmod.c: added cast to mp_limb_t * Makefile: added GMP= and NTL= in recursive make call * Makefile: updated VERSION * INSTALL, README, ecm.h: version is now 5.0-beta updated INSTALL and README * main.c: use ECM_VERSION for -save * test.ecm, test.pm1: removed too long tests * TODO: added item for long term * toomcook.c: added spaces * test.ecm, test.pm1, test.pp1: added license * stage2.c: removed #ifdef INVF (now INVF always used) added B2min added total count of muls * pp1.c: added B2min * polyz.c: removed/commented unused code * polyeval.c: added count of muls * pm1.c: updated copyright line added B2min * ntl.c: updated copyright line * mpmod.c: fixed efficiency problem in mpz_mod_n * memory.c: updated document origin * main.c: added champion treatment allow rational seed added B2min * lucas.c, listz.c, getprime.c: updated copyright line * ecm2.c: updated copyright line removed unused code * ecm.h: added licence modified prototypes to include B2min * ecm.c: updated copyright line added B2min in args to ecm() * bestd.c: updated copyright line * auxi.c: updated copyright line added cputime (from cputime.h, now removed) * TODO: removed items done * README: added explanation on - how to use P-1, P+1, ECM - table of optimal B1, B2 - memory usage - option -save * Makefile: added licence moved polyz.c in EXTRAFILES * INSTALL: updated (NTL not needed any more) 2003-02-04 paul * polyeval.c: implement algorithm POLYEVAL * stage2.c: adapted for polyeval * polyz.c: commented poly_gcd when POLYEVAL * mpmod.c: cosmetic changes * listz.c: several changes for polyeval, in particular modified PolyFromRoots to complete the whole product tree * ecm.h: several changes for polyeval * auxi.c: added ceil_log2 * Makefile: adapted to allow both compilation with NTL (POLYGCD=1) and without (default) * test.ecm: added -k option to one test with g1 > B2 * TODO: added one item * main.c: added -power in on-line help * main.c: added option -power 2003-01-30 alex * ecm.c, ecm.h, ecm2.c, main.c, mpmod.c, pm1.c, pp1.c, stage2.c: Command line option -dickson to control whether Dickson polys are used or not. Factors found in ecm_rootsG are handeled properly. Added checksum to save file lines. 2003-01-29 alex * main.c, pm1.c, pp1.c: P-1 and P+1 are again exponentiating by N-1 and the default seed for P-1 is random again, which I had disabled during test runs. 2003-01-27 alex * mpmod.c, pm1.c, pp1.c, stage2.c, ecm.c, ecm.h, ecm2.c, main.c: Choosing the modulo reduction algorithm inside the different factoring algos now, residue and modulus passed to pm1(), pp1() and ecm() are mpz_t again. Initialisation for different mod algos are separate functions now to allow specifically choosing one. New command line paramters added for doing so. 2003-01-27 paul * mpmod.c: added explicit casts for 64-bit machines * main.c: char -> int for return value of getchar() 2003-01-13 alex * ecm.h, mpmod.c: Another attempt to check in the MODMULN code. 2003-01-12 alex * pp1.c: Fixed typo and a small memory leak in rootsG_init/clear. 2003-01-10 paul * Makefile: added mpmod.{c,o} 2003-01-03 alex * mpmod.c: Basic functionality for modular arithmetic. Plain mpz_mod, base-2 and REDC are implemented. MULMODN is to follow. * listz.c, lucas.c, main.c, memory.c, pm1.c, pp1.c, stage2.c, ecm.c, ecm.h, ecm2.c: Changes for using mpmod arithmetic. Moved computation of roots of F and G into ecm2.c, pp1.c and pm1.c, respectively. 2002-12-20 paul * stage2.c: comments should be in standard C format /* ... */ and not in C++ format // ... 2002-12-19 alex * auxi.c, ecm.h, stage2.c: Basic functions for Dickman's polynomials. Not enabled yet. 2002-12-17 paul * test.ecm: test file for ecm (from ecm4c) * check.mpl: added useful routines for ecm * stage2.c: adapted for ecm stage 2 * main.c: removed temporary try with MPM * pp1.c: added computation of multiplies added routine to check if factor found by P-1 or P+1 now always uses PRAC * polyz.c: inhibit memory check functions during NTL call * pm1.c: adapted to generic stage 2 * main.c: added memory check functions (with -DDEBUG) fixed some memory leaks * lucas.c: started to adapt to generic modular multiplication * listz.c: put mpz_mulmod macro in ecm.h * ecm.h: modified for ecm stage 2 * ecm.c: adapted for stage 2 * TODO: removed 2 items * ecm2.c: functions for ecm stage 2 * Makefile: added ecm2.c and memory.c * memory.c: file to check memory allocation 2002-12-06 paul * pm1.c, pp1.c, ecm.c: added fflush() after "Using sigma/seed=..." * TODO: added format proposal for save/restore * TODO: changed one item * TODO: added 4 items 2002-12-05 alex * ecm.c, ecm.h, main.c, pm1.c, pp1.c, stage2.c: Support for save files half finished. Factors are returned in a new variable, f. p is only for passing residues around, and stage2() must not change p. New parameter: B1done, which tells factoring functions up to which bound stage 1 has been completed before. Save files can be created, but not read in yet. 2002-12-03 paul * Makefile: aux.c -> auxi.c 2002-11-30 paul * aux.c, auxi.c: renamed aux.c to auxi.c (problems under Windows) 2002-11-29 paul * pp1.c: get rid of count_leading_zeros * Makefile, lucas.c: added file lucas.c * stage2.c: changed order of operands in pp1_mul * pp1.c: now uses Lucas sequences when PRAC is defined * main.c: defined B1cost for ECM * ecm.h: changed order of arguments of pp1_mul_ui for consistency added prototype for pp1_mul_prac * ecm.c: cosmetic changes * TODO: added several items 2002-11-28 paul * INSTALL, Makefile, README: added INSTALL file * pp1.c, stage2.c: use pp1_mul instead of pp1_mul_ui to avoid depending on longlong.h * Makefile: put /usr/local/gmp and /usr/local/ntl as default directories for GMP and NTL * toomcook.c: changed to "gmp.h" for consistency * ecm.h, main.c, pm1.c, pp1.c: now use random seed for all methods when sigma=0 use getpid() in addition of time() for random seed 2002-11-27 paul * pp1.c: fixed bug in pp1_mul_ui 2002-11-26 alex * ecm.c, main.c, pm1.c, pp1.c: Cleaned up handling of default values and method-specific screen output. 2002-11-26 paul * test.pp1: test file for Williams P+1 * pp1.c: Williams P+1 method * stage2.c: adapted for P+1 * ecm.h: added prototypes for P+1 added 'method' argument to stage2 functions * pm1.c: passed method=PM1_METHOD to stage2() call * main.c: added P+1 * ecm.c: added method in stage2() call * README: todo in a separate file updated * TODO: TODO file :-) * Makefile: added pp1.c 2002-11-25 paul * ecm.h, listz.c, stage2.c: added INVF trick (precomputation of 1/F to speed up divisions by F) * check.mpl: added auxiliary functions to compute numbers of multiplies of karatsuba, toomcook3, toomcook4 * README: removed one item done in TODO, added one more 2002-11-24 alex * toomcook.c: Proof for temp space now reflects the reduced memory requirements of toomcook3(). No change to code itself. 2002-11-19 paul * ecm.h, listz.c: karatsuba() returns int again 2002-11-19 alex * ecm.c, ecm.h, main.c: Added ECM stage 1. In ecm.h, return type of karatsuba changed to void again, to match karatsuba in listz.c. * listz.c: Toom-Cook 4 is default again. buildG now prints timing info to stdout instead of stderr. 2002-11-15 paul * ecm.h: return type changed to int * toomcook.c: optimized karatsuba/toomcook3/toomcook4 thresholds for minimum of scalar multiplies * test.pm1: added one test 2002-11-14 paul * main.c: replaced __GNU_MP_VERSION etc by gmp_version (better for dynamic library) * Makefile: removed -static 2002-11-07 alex * ecm.h, toomcook.c: Bug: forgot to change definition of T in toomcook.c, so the temp space required still was 2*len+4*log_3(len). Oddly, it did not crash. Fixed (the "too much space" problem, not "the did not crash" problem). 2002-11-06 alex * listz.c, toomcook.c: toomcook[34]() use <= 2*len+2*log_3(len) temp space now 2002-11-05 paul * check.mpl: included Williams P+1 stage 1 code and test inputs * ecm.h, main.c, pm1.c, stage2.c: replaced INVS macros by if-statements (use the x+1/x trick whenever Pollard P-1 is performed) * main.c: changed default choice of B2 so that stage 2 takes about half of stage 1 (assuming Toom-Cook 3 is used) * test.pm1: added 3 more tests changed -e 6 into -e 12 (if "invs" trick not used) * stage2.c: added INVS macro to enable "invs" trick * pm1.c: added trick from Peter for Cunningham numbers * main.c: changed default k to 7 set default S to 1 for ecm, 2 for Pollard (without INVS) * listz.c: only style changes 2002-11-04 alex * ecm.h, listz.c, toomcook.c: Added toomcook4(). 2002-11-01 alex * test.pm1: Added a factor that was missed by old polygcd code to test cases 2002-10-29 alex * ecm.h, stage2.c, test.pm1: rootsG uses batch inversion for large Suyamas powers. Tried to make indentation coherent. 2002-10-28 alex * stage2.c: Fixed bad merge, T was allocated twice 2002-10-27 alex * ecm.c, ecm.h, listz.c, main.c, pm1.c, stage2.c: Added Suyamas powers for stage 2. 2002-10-25 alex * toomcook.c: Added GPL header. Cleaned up comments a little. No change to code itself. 2002-10-25 paul * check.mpl: added function to compute Toom-Cook 3 auxiliary space * toomcook.c: edited according to GNU coding style and added analysis of temp. space needed * stage2.c: modified memory space for T * main.c: put back sigma=17 as initial value for P-1 * listz.c: incorporated toomcook3 code from Alex * ecm.h: added prototype for toomcook3 * Makefile: added toomcook.c * test.pm1: added one test * stage2.c: fixed bug in rootsG (G[0] was not set) * test.pm1: test file for Pollard P-1 * poly.c: replaced by listz.c (lists) and polyz.c (polynomials) * polyz.c: routines for polynomials of integers (mpz_t) * listz.c: routines for arrays of mpz_t's * ntl.c: NTL interface (first version) * stage2.c, pm1.c: added verbose flag * main.c: adapted for NTL interface * getprime.c: fixed problems with signed/unsigned integers * ecm.h: modified for NTL interface * ecm.c: added verbose flag * check.mpl: added routine for Pollard P-1 stage 1 * bestd.c: added missing stdlib.h * aux.c: added missing string.h * README: gcd stuff is ok now (using NTL) * Makefile: adapted for NTL interface 2002-10-24 paul * toomcook.c: Toom-Cook 3-way code from Alexander Kruppa 2002-09-13 paul * COPYING: New file. * COPYING: first version * Makefile, README, aux.c, bestd.c, check.mpl, cputime.h, ecm.c, ecm.h, getprime.c, main.c, pm1.c, poly.c, stage2.c: New file. * Makefile, README, aux.c, bestd.c, check.mpl, cputime.h, ecm.c, ecm.h, getprime.c, main.c, pm1.c, poly.c, stage2.c: first version ecm-6.4.4/INSTALL0000644023561000001540000003660012106744313010252 00000000000000Installation Instructions ************************* Copyright (C) 1994-1996, 1999-2002, 2004-2011 Free Software Foundation, Inc. Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without warranty of any kind. Basic Installation ================== Briefly, the shell commands `./configure; make; make install' should configure, build, and install this package. The following more-detailed instructions are generic; see the `README' file for instructions specific to this package. Some packages provide this `INSTALL' file but do not implement all of the features documented below. The lack of an optional feature in a given package is not necessarily a bug. More recommendations for GNU packages can be found in *note Makefile Conventions: (standards)Makefile Conventions. The `configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses those values to create a `Makefile' in each directory of the package. It may also create one or more `.h' files containing system-dependent definitions. Finally, it creates a shell script `config.status' that you can run in the future to recreate the current configuration, and a file `config.log' containing compiler output (useful mainly for debugging `configure'). It can also use an optional file (typically called `config.cache' and enabled with `--cache-file=config.cache' or simply `-C') that saves the results of its tests to speed up reconfiguring. Caching is disabled by default to prevent problems with accidental use of stale cache files. If you need to do unusual things to compile the package, please try to figure out how `configure' could check whether to do them, and mail diffs or instructions to the address given in the `README' so they can be considered for the next release. If you are using the cache, and at some point `config.cache' contains results you don't want to keep, you may remove or edit it. The file `configure.ac' (or `configure.in') is used to create `configure' by a program called `autoconf'. You need `configure.ac' if you want to change it or regenerate `configure' using a newer version of `autoconf'. The simplest way to compile this package is: 1. `cd' to the directory containing the package's source code and type `./configure' to configure the package for your system. Running `configure' might take a while. While running, it prints some messages telling which features it is checking for. 2. Type `make' to compile the package. 3. Optionally, type `make check' to run any self-tests that come with the package, generally using the just-built uninstalled binaries. 4. Type `make install' to install the programs and any data files and documentation. When installing into a prefix owned by root, it is recommended that the package be configured and built as a regular user, and only the `make install' phase executed with root privileges. 5. Optionally, type `make installcheck' to repeat any self-tests, but this time using the binaries in their final installed location. This target does not install anything. Running this target as a regular user, particularly if the prior `make install' required root privileges, verifies that the installation completed correctly. 6. You can remove the program binaries and object files from the source code directory by typing `make clean'. To also remove the files that `configure' created (so you can compile the package for a different kind of computer), type `make distclean'. There is also a `make maintainer-clean' target, but that is intended mainly for the package's developers. If you use it, you may have to get all sorts of other programs in order to regenerate files that came with the distribution. 7. Often, you can also type `make uninstall' to remove the installed files again. In practice, not all packages have tested that uninstallation works correctly, even though it is required by the GNU Coding Standards. 8. Some packages, particularly those that use Automake, provide `make distcheck', which can by used by developers to test that all other targets like `make install' and `make uninstall' work correctly. This target is generally not run by end users. Compilers and Options ===================== Some systems require unusual options for compilation or linking that the `configure' script does not know about. Run `./configure --help' for details on some of the pertinent environment variables. You can give `configure' initial values for configuration parameters by setting variables in the command line or in the environment. Here is an example: ./configure CC=c99 CFLAGS=-g LIBS=-lposix *Note Defining Variables::, for more details. Compiling For Multiple Architectures ==================================== You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their own directory. To do this, you can use GNU `make'. `cd' to the directory where you want the object files and executables to go and run the `configure' script. `configure' automatically checks for the source code in the directory that `configure' is in and in `..'. This is known as a "VPATH" build. With a non-GNU `make', it is safer to compile the package for one architecture at a time in the source code directory. After you have installed the package for one architecture, use `make distclean' before reconfiguring for another architecture. On MacOS X 10.5 and later systems, you can create libraries and executables that work on multiple system types--known as "fat" or "universal" binaries--by specifying multiple `-arch' options to the compiler but only a single `-arch' option to the preprocessor. Like this: ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \ CPP="gcc -E" CXXCPP="g++ -E" This is not guaranteed to produce working output in all cases, you may have to build one architecture at a time and combine the results using the `lipo' tool if you have problems. Installation Names ================== By default, `make install' installs the package's commands under `/usr/local/bin', include files under `/usr/local/include', etc. You can specify an installation prefix other than `/usr/local' by giving `configure' the option `--prefix=PREFIX', where PREFIX must be an absolute file name. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you pass the option `--exec-prefix=PREFIX' to `configure', the package uses PREFIX as the prefix for installing programs and libraries. Documentation and other data files still use the regular prefix. In addition, if you use an unusual directory layout you can give options like `--bindir=DIR' to specify different values for particular kinds of files. Run `configure --help' for a list of the directories you can set and what kinds of files go in them. In general, the default for these options is expressed in terms of `${prefix}', so that specifying just `--prefix' will affect all of the other directory specifications that were not explicitly provided. The most portable way to affect installation locations is to pass the correct locations to `configure'; however, many packages provide one or both of the following shortcuts of passing variable assignments to the `make install' command line to change installation locations without having to reconfigure or recompile. The first method involves providing an override variable for each affected directory. For example, `make install prefix=/alternate/directory' will choose an alternate location for all directory configuration variables that were expressed in terms of `${prefix}'. Any directories that were specified during `configure', but not in terms of `${prefix}', must each be overridden at install time for the entire installation to be relocated. The approach of makefile variable overrides for each directory variable is required by the GNU Coding Standards, and ideally causes no recompilation. However, some platforms have known limitations with the semantics of shared libraries that end up requiring recompilation when using this method, particularly noticeable in packages that use GNU Libtool. The second method involves providing the `DESTDIR' variable. For example, `make install DESTDIR=/alternate/directory' will prepend `/alternate/directory' before all installation names. The approach of `DESTDIR' overrides is not required by the GNU Coding Standards, and does not work on platforms that have drive letters. On the other hand, it does better at avoiding recompilation issues, and works well even when some directory options were not specified in terms of `${prefix}' at `configure' time. Optional Features ================= If the package supports it, you can cause programs to be installed with an extra prefix or suffix on their names by giving `configure' the option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. Some packages pay attention to `--enable-FEATURE' options to `configure', where FEATURE indicates an optional part of the package. They may also pay attention to `--with-PACKAGE' options, where PACKAGE is something like `gnu-as' or `x' (for the X Window System). The `README' should mention any `--enable-' and `--with-' options that the package recognizes. For packages that use the X Window System, `configure' can usually find the X include and library files automatically, but if it doesn't, you can use the `configure' options `--x-includes=DIR' and `--x-libraries=DIR' to specify their locations. Some packages offer the ability to configure how verbose the execution of `make' will be. For these packages, running `./configure --enable-silent-rules' sets the default to minimal output, which can be overridden with `make V=1'; while running `./configure --disable-silent-rules' sets the default to verbose, which can be overridden with `make V=0'. Particular systems ================== On HP-UX, the default C compiler is not ANSI C compatible. If GNU CC is not installed, it is recommended to use the following options in order to use an ANSI C compiler: ./configure CC="cc -Ae -D_XOPEN_SOURCE=500" and if that doesn't work, install pre-built binaries of GCC for HP-UX. HP-UX `make' updates targets which have the same time stamps as their prerequisites, which makes it generally unusable when shipped generated files such as `configure' are involved. Use GNU `make' instead. On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot parse its `' header file. The option `-nodtk' can be used as a workaround. If GNU CC is not installed, it is therefore recommended to try ./configure CC="cc" and if that doesn't work, try ./configure CC="cc -nodtk" On Solaris, don't put `/usr/ucb' early in your `PATH'. This directory contains several dysfunctional programs; working variants of these programs are available in `/usr/bin'. So, if you need `/usr/ucb' in your `PATH', put it _after_ `/usr/bin'. On Haiku, software installed for all users goes in `/boot/common', not `/usr/local'. It is recommended to use the following options: ./configure --prefix=/boot/common Specifying the System Type ========================== There may be some features `configure' cannot figure out automatically, but needs to determine by the type of machine the package will run on. Usually, assuming the package is built to be run on the _same_ architectures, `configure' can figure that out, but if it prints a message saying it cannot guess the machine type, give it the `--build=TYPE' option. TYPE can either be a short name for the system type, such as `sun4', or a canonical name which has the form: CPU-COMPANY-SYSTEM where SYSTEM can have one of these forms: OS KERNEL-OS See the file `config.sub' for the possible values of each field. If `config.sub' isn't included in this package, then this package doesn't need to know the machine type. If you are _building_ compiler tools for cross-compiling, you should use the option `--target=TYPE' to select the type of system they will produce code for. If you want to _use_ a cross compiler, that generates code for a platform different from the build platform, you should specify the "host" platform (i.e., that on which the generated programs will eventually be run) with `--host=TYPE'. Sharing Defaults ================ If you want to set default values for `configure' scripts to share, you can create a site shell script called `config.site' that gives default values for variables like `CC', `cache_file', and `prefix'. `configure' looks for `PREFIX/share/config.site' if it exists, then `PREFIX/etc/config.site' if it exists. Or, you can set the `CONFIG_SITE' environment variable to the location of the site script. A warning: not all `configure' scripts look for a site script. Defining Variables ================== Variables not defined in a site shell script can be set in the environment passed to `configure'. However, some packages may run configure again during the build, and the customized values of these variables may be lost. In order to avoid this problem, you should set them in the `configure' command line, using `VAR=value'. For example: ./configure CC=/usr/local2/bin/gcc causes the specified `gcc' to be used as the C compiler (unless it is overridden in the site shell script). Unfortunately, this technique does not work for `CONFIG_SHELL' due to an Autoconf bug. Until the bug is fixed you can use this workaround: CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash `configure' Invocation ====================== `configure' recognizes the following options to control how it operates. `--help' `-h' Print a summary of all of the options to `configure', and exit. `--help=short' `--help=recursive' Print a summary of the options unique to this package's `configure', and exit. The `short' variant lists options used only in the top level, while the `recursive' variant lists options also present in any nested packages. `--version' `-V' Print the version of Autoconf used to generate the `configure' script, and exit. `--cache-file=FILE' Enable the cache: use and save the results of the tests in FILE, traditionally `config.cache'. FILE defaults to `/dev/null' to disable caching. `--config-cache' `-C' Alias for `--cache-file=config.cache'. `--quiet' `--silent' `-q' Do not print messages saying which checks are being made. To suppress all normal output, redirect it to `/dev/null' (any error messages will still be shown). `--srcdir=DIR' Look for the package's source code in directory DIR. Usually `configure' can determine that directory automatically. `--prefix=DIR' Use DIR as the installation prefix. *note Installation Names:: for more details, including other options available for fine-tuning the installation locations. `--no-create' `-n' Run the configure checks, but stop before creating any output files. `configure' also accepts some other, not widely useful, options. Run `configure --help' for more details. ecm-6.4.4/ecm.h0000644023561000001540000001333712106741273010142 00000000000000/* ecm.h - public interface for libecm. Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Paul Zimmermann, Alexander Kruppa, David Cleaver, Cyril Bouvier. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _ECM_H #define _ECM_H 1 #include /* for FILE */ #include #ifdef __cplusplus extern "C" { #endif typedef struct { int method; /* factorization method, default is ecm */ mpz_t x; /* starting point (if non zero) */ mpz_t sigma; /* contains sigma or A (ecm only) */ int sigma_is_A; /* if 1, 'sigma' contains A (Montgomery form), if 0, 'sigma' contains sigma (Montgomery form), if -1, 'sigma' contains A, and the input curve is in Weierstrass form y^2 = x^3 + A*x + B, with y in 'go'. */ mpz_t go; /* initial group order to preload (if NULL: do nothing), or y for Weierstrass form if sigma_is_A = -1. */ double B1done; /* step 1 was already done up to B1done */ mpz_t B2min; /* lower bound for stage 2 (default is B1) */ mpz_t B2; /* step 2 bound (chosen automatically if < 0.0) */ unsigned long k;/* number of blocks in stage 2 */ int S; /* degree of the Brent-Suyama's extension for stage 2 */ int repr; /* representation for modular arithmetic: ECM_MOD_MPZ=mpz, ECM_MOD_MODMULN=modmuln (Montgomery's quadratic multiplication), ECM_MOD_REDC=redc (Montgomery's subquadratic multiplication), ECM_MOD_GWNUM=Woltman's gwnum routines (tbd), > 16 : special base-2 representation MOD_DEFAULT: automatic choice */ int nobase2step2; /* disable special base-2 code in ecm stage 2 only */ int verbose; /* verbosity level: 0 no output, 1 normal output, 2 diagnostic output */ FILE *os; /* output stream (for verbose messages) */ FILE *es; /* error stream (for error messages) */ char *chkfilename; /* Filename to write stage 1 checkpoints to */ char *TreeFilename; /* Base filename for storing product tree of F */ double maxmem; /* Maximal amount of memory to use in stage 2, in bytes. 0. means no limit (optimise only for speed) */ double stage1time; /* Time to add for estimating expected time to find fac.*/ gmp_randstate_t rng; /* State of random number generator */ int use_ntt; /* set to 1 to use ntt poly code in stage 2 */ int (*stop_asap) (void); /* Pointer to function, if it returns 0, contine normally, otherwise exit asap. May be NULL */ int batch; /* Batch mode */ double batch_B1; /* B1 is the limit used to calculate s for batch mode */ mpz_t batch_s; /* s is the product of primes up to B1 for batch mode */ double gw_k; /* use for gwnum stage 1 if input has form k*b^n+c */ unsigned long gw_b; /* use for gwnum stage 1 if input has form k*b^n+c */ unsigned long gw_n; /* use for gwnum stage 1 if input has form k*b^n+c */ signed long gw_c; /* use for gwnum stage 1 if input has form k*b^n+c */ } __ecm_param_struct; typedef __ecm_param_struct ecm_params[1]; #define ECM_MOD_NOBASE2 -1 #define ECM_MOD_DEFAULT 0 #define ECM_MOD_MPZ 1 #define ECM_MOD_BASE2 2 #define ECM_MOD_MODMULN 3 #define ECM_MOD_REDC 4 /* values <= -16 or >= 16 have a special meaning */ int ecm_factor (mpz_t, mpz_t, double, ecm_params); void ecm_init (ecm_params); void ecm_clear (ecm_params); /* the following interface is not supported */ int ecm (mpz_t, mpz_t, mpz_t, mpz_t, mpz_t, double *, double, mpz_t, mpz_t, double, unsigned long, const int, int, int, int, int, int, FILE*, FILE*, char*, char *, double, double, gmp_randstate_t, int (*)(void), int, mpz_t, double, unsigned long, unsigned long, signed long); int pp1 (mpz_t, mpz_t, mpz_t, mpz_t, double *, double, mpz_t, mpz_t, double, unsigned long, const int, int, int, int, FILE*, FILE*, char*, char *, double, gmp_randstate_t, int (*)(void)); int pm1 (mpz_t, mpz_t, mpz_t, mpz_t, double *, double, mpz_t, mpz_t, double, unsigned long, const int, int, int, int, FILE*, FILE*, char *, char*, double, gmp_randstate_t, int (*)(void)); /* different methods implemented */ #define ECM_ECM 0 #define ECM_PM1 1 #define ECM_PP1 2 /* return value of ecm, pm1, pp1 */ #define ECM_FACTOR_FOUND_STEP1 1 /* should be positive */ #define ECM_FACTOR_FOUND_STEP2 2 /* should be positive */ #define ECM_NO_FACTOR_FOUND 0 /* should be zero */ #define ECM_ERROR -1 /* should be non-zero */ #define ECM_FACTOR_FOUND_P(x) ((x) > 0) #define ECM_ERROR_P(x) ((x) < 0) #define ECM_DEFAULT_B1_DONE 1.0 #define ECM_IS_DEFAULT_B1_DONE(x) (x <= 1.0) /* stage 2 bound */ #define ECM_DEFAULT_B2 -1 #define ECM_IS_DEFAULT_B2(x) (mpz_sgn (x) < 0) #define ECM_DEFAULT_K 0 /* default number of blocks in stage 2. 0 = automatic choice */ #define ECM_DEFAULT_S 0 /* polynomial is chosen automatically */ /* Apple uses '\r' for newlines */ #define IS_NEWLINE(c) (((c) == '\n') || ((c) == '\r')) #ifdef __cplusplus } #endif #endif /* _ECM_H */ ecm-6.4.4/config.h.in0000644023561000001540000001716312113354031011237 00000000000000/* config.h.in. Generated from configure.in by autoheader. */ /* How to specify hot-spot attribute, if available */ #undef ATTRIBUTE_HOT /* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP systems. This function is required for `alloca.c' support on those systems. */ #undef CRAY_STACKSEG_END /* Define to 1 if using `alloca.c'. */ #undef C_ALLOCA /* Define to 1 if you have the `access' function. */ #undef HAVE_ACCESS /* Define to 1 if you have `alloca', as a function or macro. */ #undef HAVE_ALLOCA /* Define to 1 if you have and it should be used (not on Ultrix). */ #undef HAVE_ALLOCA_H /* Define to 1 if you have the `ctime' function. */ #undef HAVE_CTIME /* Define to 1 if you have the header file. */ #undef HAVE_CTYPE_H /* Define to 1 if you have the header file. */ #undef HAVE_DLFCN_H /* Define to 1 if you have the `fcntl' function. */ #undef HAVE_FCNTL /* Define to 1 if you have the header file. */ #undef HAVE_FCNTL_H /* Define to 1 if you have the `fileno' function. */ #undef HAVE_FILENO /* Define to 1 if you have the `gethostname' function. */ #undef HAVE_GETHOSTNAME /* Define to 1 if you have the `getrusage' function. */ #undef HAVE_GETRUSAGE /* Define to 1 if you have the `gettimeofday' function. */ #undef HAVE_GETTIMEOFDAY /* Define to 1 if you have the header file. */ #undef HAVE_GMP_H /* Define to 1 if gwnum.a or gwnum.lib exist */ #undef HAVE_GWNUM /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H /* Define to 1 if you have the header file. */ #undef HAVE_IO_H /* Define to 1 if you have the `isascii' function. */ #undef HAVE_ISASCII /* Define to 1 if you have the `isdigit' function. */ #undef HAVE_ISDIGIT /* Define to 1 if you have the `isspace' function. */ #undef HAVE_ISSPACE /* Define to 1 if you have the `isxdigit' function. */ #undef HAVE_ISXDIGIT /* Define to 1 if you have the `gsl' library (-lgsl). */ #undef HAVE_LIBGSL /* Define to 1 if you have the `gslcblas' library (-lgslcblas). */ #undef HAVE_LIBGSLCBLAS /* Define to 1 if you have the `m' library (-lm). */ #undef HAVE_LIBM /* Define to 1 if you have the header file. */ #undef HAVE_LIMITS_H /* Define to 1 if the system has the type `long long int'. */ #undef HAVE_LONG_LONG_INT /* Define to 1 if you have the header file. */ #undef HAVE_MALLOC_H /* Define to 1 if you have the `malloc_usable_size' function. */ #undef HAVE_MALLOC_USABLE_SIZE /* Define to 1 if you have the header file. */ #undef HAVE_MATH_H /* Define to 1 if you have the `memmove' function. */ #undef HAVE_MEMMOVE /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H /* Define to 1 if you have the `memset' function. */ #undef HAVE_MEMSET /* Define to 1 if GMP is MPIR */ #undef HAVE_MPIR /* Define to 1 if you have the `nice' function. */ #undef HAVE_NICE /* Define to 1 if you have the `setpriority' function. */ #undef HAVE_SETPRIORITY /* Define to 1 if you have the `signal' function. */ #undef HAVE_SIGNAL /* Define to 1 if you have the header file. */ #undef HAVE_SIGNAL_H /* Define to 1 to enable SSE2 instructions in NTT code */ #undef HAVE_SSE2 /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H /* Define to 1 if you have the header file. */ #undef HAVE_STDLIB_H /* Define to 1 if you have the `strchr' function. */ #undef HAVE_STRCHR /* Define to 1 if you have the header file. */ #undef HAVE_STRINGS_H /* Define to 1 if you have the header file. */ #undef HAVE_STRING_H /* Define to 1 if you have the `strlen' function. */ #undef HAVE_STRLEN /* Define to 1 if you have the `strncasecmp' function. */ #undef HAVE_STRNCASECMP /* Define to 1 if you have the `strstr' function. */ #undef HAVE_STRSTR /* Define to 1 if you have the header file. */ #undef HAVE_SYS_RESOURCE_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_STAT_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_TIME_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_TYPES_H /* Define to 1 if you have the `time' function. */ #undef HAVE_TIME /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H /* Define to 1 if you have the `unlink' function. */ #undef HAVE_UNLINK /* Define to 1 if you have the header file. */ #undef HAVE_WINDOWS_H /* Define to 1 if you have the `__gmpn_add_nc' function. */ #undef HAVE___GMPN_ADD_NC /* Define to 1 if you have the `__gmpn_mod_34lsub1' function. */ #undef HAVE___GMPN_MOD_34LSUB1 /* Define to 1 if you have the `__gmpn_mullo_n' function. */ #undef HAVE___GMPN_MULLO_N /* Define to 1 if you have the `__gmpn_preinv_mod_1' function. */ #undef HAVE___GMPN_PREINV_MOD_1 /* Define to 1 if you have the `__gmpn_redc_1' function. */ #undef HAVE___GMPN_REDC_1 /* Define to 1 if you have the `__gmpn_redc_2' function. */ #undef HAVE___GMPN_REDC_2 /* Define to 1 if you have the `__gmpn_redc_n' function. */ #undef HAVE___GMPN_REDC_N /* Define to the sub-directory in which libtool stores uninstalled libraries. */ #undef LT_OBJDIR /* Define to 1 if you want memory debugging */ #undef MEMORY_DEBUG /* Define to 1 if you want Svoboda mulredc */ #undef MULREDC_SVOBODA /* Define to 1 if your C compiler doesn't accept -c and -o together. */ #undef NO_MINUS_C_MINUS_O /* Name of package */ #undef PACKAGE /* Define to the address where bug reports for this package should be sent. */ #undef PACKAGE_BUGREPORT /* Define to the full name of this package. */ #undef PACKAGE_NAME /* Define to the full name and version of this package. */ #undef PACKAGE_STRING /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME /* Define to the home page for this package. */ #undef PACKAGE_URL /* Define to the version of this package. */ #undef PACKAGE_VERSION /* If using the C implementation of alloca, define if you know the direction of stack growth for your system; otherwise it will be automatically deduced at runtime. STACK_DIRECTION > 0 => grows toward higher addresses STACK_DIRECTION < 0 => grows toward lower addresses STACK_DIRECTION = 0 => direction of growth unknown */ #undef STACK_DIRECTION /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS /* Define to 1 if you can safely include both and . */ #undef TIME_WITH_SYS_TIME /* Define to 1 to use asm redc */ #undef USE_ASM_REDC /* Version number of package */ #undef VERSION /* Define to 1 if you want assertions enabled */ #undef WANT_ASSERT /* Define to 1 if you want shell command execution */ #undef WANT_SHELLCMD /* Define to 1 if x86_64 mulredc*() functions should be called with Windows ABI */ #undef WINDOWS64_ABI /* Define for Solaris 2.5.1 so the uint64_t typedef from , , or is not used. If the typedef were allowed, the #define below would cause a syntax error. */ #undef _UINT64_T /* Define to empty if `const' does not conform to ANSI C. */ #undef const /* Define to `__inline__' or `__inline' if that's what the C compiler calls it, or to nothing if 'inline' is not supported under any name. */ #ifndef __cplusplus #undef inline #endif /* Define to the type of a signed integer type of width exactly 64 bits if such a type exists and the standard includes do not define it. */ #undef int64_t /* Define to `unsigned int' if does not define. */ #undef size_t /* Define to the type of an unsigned integer type of width exactly 64 bits if such a type exists and the standard includes do not define it. */ #undef uint64_t ecm-6.4.4/Makefile.am0000644023561000001540000001306312113421072011243 00000000000000## Process this file with automake to produce Makefile.in ACLOCAL_AMFLAGS = -I m4 # to not install libecm.la, we could write noinst_LTLIBRARIES instead of # lib_LTLIBRARIES below, however then libecm.a is not installed either # (see http://www.gnu.org/software/automake/manual/html_node/Libtool-Convenience-Libraries.html) lib_LTLIBRARIES = libecm.la EXTRA_PROGRAMS = rho batch # If we want assembly mulredc code, recurse into the right subdirectory # and set up variables to include the mulredc library from that subdir if ENABLE_ASM_REDC SUBDIRS = $(ASMPATH) MULREDCINCPATH = -I$(srcdir)/$(ASMPATH) MULREDCLIBRARY = $(builddir)/$(ASMPATH)/libmulredc.la # Add a tuning and testing program for the mulredc code EXTRA_PROGRAMS += bench_mulredc test_mulredc CLEANFILES = bench_mulredc test_mulredc bench_mulredc_CPPFLAGS = $(MULREDCINCPATH) bench_mulredc_LDADD = $(MULREDCLIBRARY) $(GMPLIB) test_mulredc_CPPFLAGS = $(MULREDCINCPATH) test_mulredc_LDADD = $(MULREDCLIBRARY) $(GMPLIB) else # Add a tuning program for the mulredc code EXTRA_PROGRAMS += bench_mulredc CLEANFILES = bench_mulredc bench_mulredc_LDADD = $(GMPLIB) endif libecm_la_SOURCES = ecm.c ecm2.c pm1.c pp1.c getprime.c listz.c lucas.c \ stage2.c toomcook.c mpmod.c mul_lo.c polyeval.c median.c \ schoen_strass.c ks-multiply.c rho.c bestd.c auxlib.c \ random.c factor.c sp.c spv.c spm.c mpzspm.c mpzspv.c \ ntt_gfp.c ecm_ntt.c pm1fs2.c mul_fft.c sets_long.c \ auxarith.c batch.c ellparam_batch.c # Link the asm redc code (if we use it) into libecm.la libecm_la_CPPFLAGS = $(MULREDCINCPATH) libecm_la_CFLAGS = $(OPENMP_CFLAGS) libecm_la_LDFLAGS = '-version-info 0:0:0' libecm_la_LIBADD = $(MULREDCLIBRARY) bin_PROGRAMS = ecm noinst_PROGRAMS = tune ecmfactor bench_mulredc # Most binaries want to link libecm.la, and the ones which don't will # have their own _LDADD which overrides the default LDADD here LDADD = libecm.la $(GMPLIB) ecm_CPPFLAGS = -DOUTSIDE_LIBECM ecm_CFLAGS = $(OPENMP_CFLAGS) ecm_SOURCES = auxi.c b1_ainc.c candi.c eval.c random.c main.c \ resume.c getprime.c champions.h tune_SOURCES = mpmod.c tune.c mul_lo.c listz.c auxlib.c ks-multiply.c \ toomcook.c schoen_strass.c polyeval.c median.c ecm_ntt.c \ ntt_gfp.c mpzspv.c mpzspm.c sp.c spv.c spm.c random.c \ mul_fft.c auxarith.c tune_CPPFLAGS = -DTUNE $(MULREDCINCPATH) tune_LDADD = $(MULREDCLIBRARY) $(GMPLIB) ecmfactor_CFLAGS = $(OPENMP_CFLAGS) rho_SOURCES = rho.c rho_CPPFLAGS = -DTESTDRIVE rho_LDADD = -lprimegen $(GMPLIB) $(GSL_LD_FLAGS) batch_SOURCES = batch.c batch_LDADD = $(GMPLIB) if MEMORY_DEBUG libecm_la_SOURCES += memory.c tune_SOURCES += memory.c endif if WITH_GWNUM gwdata.ld : echo "SECTIONS { .data : { . = ALIGN(0x20); *(_GWDATA) } }" >gwdata.ld libecm_la_SOURCES += Fgw.c # Use ecm_DEPENDENCIES += gwdata.ld instead? Is that possible? ecm_DEPENDENCIES = gwdata.ld ecm_LDFLAGS = $(AM_LDFLAGS) -Wl,gwdata.ld Fgwtest : Fgw.c gwdata.ld $(CC) $(CFLAGS) $(CPPFLAGS) -g -DTESTDRIVE -Wl,gwdata.ld -o Fgwtest Fgw.c libecm.a $(LIBS) endif include_HEADERS = ecm.h noinst_HEADERS = ecm-impl.h ecm-gmp.h ecm-ecm.h sp.h longlong.h ecm-params.h \ mpmod.h EXTRA_DIST = test.pm1 test.pp1 test.ecm README.lib INSTALL-ecm ecm.xml \ ecm-params.h.alpha-ev5 ecm-params.h.athlon64 \ ecm-params.h.default ecm-params.h.alpha-ev6 \ ecm-params.h.athlon ecm-params.h.powerpc7450 \ ecm-params.h.pentium3 ecm-params.h.pentium4 \ ecm-params.h.pentium-m ecm-params.h.powerpc970 \ ecm-params.h.mips64el ecm-params.h.armv5tel \ ecm-params.h.sparc64 ecm-params.h.ia64 \ ecm-params.h.hppa2.0 ecm-params.h.alpha-ev56 \ ecm-params.h.core2 ecm-params.h.corei5 \ mul_fft-params.h.athlon64 mul_fft-params.h.pentium3 \ mul_fft-params.h.default mul_fft-params.h.pentium4 DIST_SUBDIRS = athlon pentium4 x86_64 powerpc64 build.vc10 DISTCLEANFILES = config.m4 ecm-params: tune$(EXEEXT) @echo Optimising parameters for your system, please be patient. test -z "ecm-params.h" || rm -f ecm-params.h ./tune > ecm-params.h check: ecm$(EXEEXT) $(srcdir)/test.pp1 ./ecm$(EXEEXT) echo "" $(srcdir)/test.pm1 ./ecm$(EXEEXT) echo "" $(srcdir)/test.ecm ./ecm$(EXEEXT) longcheck: ecm$(EXEEXT) $(srcdir)/test.pp1 "$(VALGRIND) ./ecm$(EXEEXT)" $(srcdir)/test.pp1 "$(VALGRIND) ./ecm$(EXEEXT) -no-ntt" $(srcdir)/test.pp1 "$(VALGRIND) ./ecm$(EXEEXT) -modmuln" $(srcdir)/test.pp1 "$(VALGRIND) ./ecm$(EXEEXT) -redc" $(srcdir)/test.pp1 "$(VALGRIND) ./ecm$(EXEEXT) -mpzmod" $(srcdir)/test.pm1 "$(VALGRIND) ./ecm$(EXEEXT)" $(srcdir)/test.pm1 "$(VALGRIND) ./ecm$(EXEEXT) -no-ntt" $(srcdir)/test.pm1 "$(VALGRIND) ./ecm$(EXEEXT) -modmuln" $(srcdir)/test.pm1 "$(VALGRIND) ./ecm$(EXEEXT) -redc" $(srcdir)/test.pm1 "$(VALGRIND) ./ecm$(EXEEXT) -mpzmod" $(srcdir)/test.ecm "$(VALGRIND) ./ecm$(EXEEXT)" $(srcdir)/test.ecm "$(VALGRIND) ./ecm$(EXEEXT) -no-ntt" $(srcdir)/test.ecm "$(VALGRIND) ./ecm$(EXEEXT) -modmuln" $(srcdir)/test.ecm "$(VALGRIND) ./ecm$(EXEEXT) -redc" $(srcdir)/test.ecm "$(VALGRIND) ./ecm$(EXEEXT) -mpzmod" ## to be sure ecm.1 is considered as source ## (cf section "Man pages" in the automake manual) dist_man_MANS = ecm.1 ## If ./configure found xsltproc and docbook.xsl, add a rule for building ## the manpage. If they were not found, this rule is omitted and make will ## never try to rebuild the man page, which would result in an error anyway. if MAKE_MANPAGE ecm.1: $(srcdir)/ecm.xml xsltproc -o ecm.1 $(XSLDIR)/manpages/docbook.xsl $(srcdir)/ecm.xml endif ecm-6.4.4/x86_64/0000755023561000001540000000000012113421641010224 500000000000000ecm-6.4.4/x86_64/autogen.py0000755023561000001540000002135212106741272012175 00000000000000#!/usr/bin/python import re import sys def offaddr(addr, offset): if offset == 0: return "("+addr+")" else: return str(offset)+"("+addr+")" # Generate asm for addmul1_k # src and dst are pointers (stored in regs) + offsets # multiplier is in a register # rax, rbx, rcx, rdx are free for use. def addmul1_k(src, off_src, dst, off_dst, mult, k): init = "### addmul1: src[0] is " + offaddr(src, off_src) + "\n" init = init + "### dst[0] is " + offaddr(dst, off_dst) + "\n" init = init + "### mult is " + mult + "\n" init = init + "### k is " + str(k) + "\n" init = init + "### kills %rax, %rbx, %rcx, %rdx\n" init = init + "### dst[0,k[ += mult*src[0,k[ plus carry put in rcx or rbx\n" init = init + " movq " + offaddr(src, off_src) + ", %rax\n" init = init + " mulq " + mult + "\n" init = init + " movq %rax, %rbx\n" init = init + " movq %rdx, %rcx\n" block = """ movq __xii__, %rax mulq __mult__ addq __cylo__, __zi__ adcq %rax, __cyhi__ movq %rdx, __cylo__ adcq $0, __cylo__ """ code = init cylo = "%rbx" cyhi = "%rcx" for i in range(0,k-1): blocki = re.sub('__cylo__', cylo, block) blocki = re.sub('__cyhi__', cyhi, blocki) blocki = re.sub('__xii__', offaddr(src, off_src+(i+1)*8), blocki) blocki = re.sub('__zi__', offaddr(dst, off_dst+i*8), blocki) blocki = re.sub('__mult__', mult, blocki) code = code + blocki tmp = cylo cylo = cyhi cyhi = tmp final = " addq " + cylo + ", " + offaddr(dst, off_dst+8*(k-1)) + "\n" final = final + " adcq $0, " + cyhi + "\n" final = final + "### carry limb is in " + cyhi + "\n" code = code + final return code, cyhi ######## TODO: improve this code!!!! def mul1_k(src, off_src, dst, off_dst, mult, k): init = "### mul1: src[0] is " + offaddr(src, off_src) + "\n" init = init + "### dst[0] is " + offaddr(dst, off_dst) + "\n" init = init + "### mult is " + mult + "\n" init = init + "### k is " + str(k) + "\n" init = init + "### kills %rax, %rbx, %rcx, %rdx\n" init = init + "### dst[0,k[ = mult*src[0,k[ plus carry put in rcx or rbx\n" init = init + " movq " + offaddr(src, off_src) + ", %rax\n" init = init + " mulq " + mult + "\n" init = init + " movq %rax, %rbx\n" init = init + " movq %rdx, %rcx\n" block = """ movq __xii__, %rax mulq __mult__ movq __cylo__, __zi__ addq %rax, __cyhi__ movq %rdx, __cylo__ adcq $0, __cylo__ """ code = init cylo = "%rbx" cyhi = "%rcx" for i in range(0,k-1): blocki = re.sub('__cylo__', cylo, block) blocki = re.sub('__cyhi__', cyhi, blocki) blocki = re.sub('__xii__', offaddr(src, off_src+(i+1)*8), blocki) blocki = re.sub('__zi__', offaddr(dst, off_dst+i*8), blocki) blocki = re.sub('__mult__', mult, blocki) code = code + blocki tmp = cylo cylo = cyhi cyhi = tmp final = " movq " + cylo + ", " + offaddr(dst, off_dst+8*(k-1)) + "\n" final = final + "### carry limb is in " + cyhi + "\n" code = code + final return code def mulredc_k_rolled(k): header = """# mp_limb_t mulredc__k(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc__k TYPE(GSYM_PREFIX`'mulredc__k,`function') GSYM_PREFIX`'mulredc__k: """ init = re.sub("__k", str(k), header) init = init + """ movq %rdx, %r11 movq %rcx, %r10 pushq %rbx pushq %rbp """ init = init + " subq $" + str(8*(2*k+1)) + ", %rsp\n" init = init + """# %r8 : inv_m # %r10 : m # %r11 : y # %rsi : x # %rdi : z # %rsp : tmp # Free registers # %rax, %rbx, %rcx, %rdx, %r9 ### set tmp[0..2k+1[ to 0 """ for i in range(0,2*k+1): init = init + " movq $0, " + offaddr("%rsp", 8*i) + "\n" code = init middle_code = "###########################################\n" middle_code = middle_code + " movq $" + str(k) + ", %rbp\n" middle_code = middle_code + """ .align 64 Loop: ## compute u and store in %r9 movq (%rsi), %rax mulq (%r11) addq (%rsp), %rax mulq %r8 movq %rax, %r9 """ codeaddmul, carry = addmul1_k("%r10", 0, "%rsp", 0, "%r9", k) middle_code = middle_code + codeaddmul middle_code = middle_code + " addq " + carry + ", " + offaddr("%rsp", 8*k) + "\n" middle_code = middle_code + " adcq $0, " + offaddr("%rsp", 8*(k+1)) + "\n" middle_code = middle_code + " movq (%rsi), %r9\n" codeaddmul, carry = addmul1_k("%r11", 0, "%rsp", 0, "%r9", k) middle_code = middle_code + codeaddmul middle_code = middle_code + " addq " + carry + ", " + offaddr("%rsp", 8*k) + "\n" middle_code = middle_code + " adcq $0, " + offaddr("%rsp", 8*(k+1)) + "\n\n" middle_code = middle_code + """ addq $8, %rsi addq $8, %rsp decq %rbp jnz Loop """ code = code + middle_code final = "###########################################\n" final = final + "### Copy result in z\n" for i in range(0,k): final = final + " movq " + offaddr("%rsp", 8*i) + ", %rax\n" final = final + " movq %rax, " + offaddr("%rdi", 8*i) + "\n" final = final + " movq " + offaddr("%rsp", 8*k) + ", %rax # carry\n" final = final + " addq $" + str(8*(k+1)) + ", %rsp\n" final = final + " popq %rbp\n" final = final + " popq %rbx\n" final = final + " ret\n" code = code + final return code def mulredc_k(k): header = """# mp_limb_t mulredc__k(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc__k TYPE(GSYM_PREFIX`'mulredc__k,`function') GSYM_PREFIX`'mulredc__k: """ init = re.sub("__k", str(k), header) init = init + """ movq %rdx, %r11 movq %rcx, %r10 pushq %rbx """ init = init + " subq $" + str(8*(2*k+1)) + ", %rsp\n" init = init + """# %r8 : inv_m # %r10 : m # %r11 : y # %rsi : x # %rdi : z # %rsp : tmp # Free registers # %rax, %rbx, %rcx, %rdx, %r9 ### set tmp[0..2k+1[ to 0 """ for i in range(0,2*k+1): init = init + " movq $0, " + offaddr("%rsp", 8*i) + "\n" code = init for i in range(0,k): blocki = "###########################################\n" blocki = blocki + "### Step " + str(i) + "\n" blocki = blocki + "### Compute u and store in %r9\n" blocki = blocki + " movq " + offaddr("%rsi", 8*i) + ", %rax\n" blocki = blocki + " mulq (%r11)\n" blocki = blocki + " addq " + offaddr("%rsp", 8*i) + ", %rax\n" blocki = blocki + " mulq %r8\n" blocki = blocki + " movq %rax, %r9\n" blocki = blocki + "### tmp[i,i+k] += x[i]*y + u*m\n" codeaddmul, carry = addmul1_k("%r10", 0, "%rsp", 8*i, "%r9", k) blocki = blocki + codeaddmul blocki = blocki + " addq " + carry + ", " + offaddr("%rsp", 8*(k+i)) + "\n" blocki = blocki + " adcq $0, " + offaddr("%rsp", 8*(k+i+1)) + "\n" blocki = blocki + " movq " + offaddr("%rsi", 8*i) + ", %r9\n" codeaddmul, carry = addmul1_k("%r11", 0, "%rsp", 8*i, "%r9", k) blocki = blocki + codeaddmul blocki = blocki + " addq " + carry + ", " + offaddr("%rsp", 8*(k+i)) + "\n" blocki = blocki + " adcq $0, " + offaddr("%rsp", 8*(k+i+1)) + "\n" code = code + blocki final = "###########################################\n" final = final + "### Copy result in z\n" for i in range(0,k): final = final + " movq " + offaddr("%rsp", 8*(k+i)) + ", %rax\n" final = final + " movq %rax, " + offaddr("%rdi", 8*i) + "\n" final = final + " movq " + offaddr("%rsp", 16*k) + ", %rax # carry\n" final = final + " addq $" + str(8*(2*k+1)) + ", %rsp\n" final = final + " popq %rbx\n" final = final + " ret\n" code = code + final return code ##print addmul1_k("%rsi", 0, "%dsi", 0, "%r9", 3) k = int(sys.argv[1]) if k == 1: print """# # mp_limb_t mulredc1(mp_limb_t * z, const mp_limb_t x, const mp_limb_t y, # const mp_limb_t m, mp_limb_t inv_m) # # Compute z := x*y mod m, in Montgomery representation, where x, y < m # and m is n limb wide. inv_m is the less significant limb of the # inverse of m modulo 2^(n*GMP_LIMB_BITS) # # The result might be unreduced (larger than m) but becomes reduced # after subtracting m. The calling function should take care of that. # # We use a temporary space for unreduced product on the stack. # Therefore, this can not be used for large integers (anyway, the # algorithm is quadratic). # # WARNING: z is only n limbs but since it might be unreduced, there # could be a carry that does not fit in z. This carry is returned. include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc1 TYPE(GSYM_PREFIX`'mulredc1,`function') GSYM_PREFIX`'mulredc1: # %r8 : inv_m # %rcx : m # %rdx : y # %rsi : x # %rdi : z movq %rdx, %rax mulq %rsi movq %rdx, %r10 movq %rax, %r9 # store xy in [r9:r10] mulq %r8 # compute u mulq %rcx # compute u*m addq %r9, %rax # rax is 0, now (carry is important) adcq %r10, %rdx movq %rdx, (%rdi) adcq $0, %rax ret """ else: print mulredc_k_rolled(k) ecm-6.4.4/x86_64/mulredc3.asm0000644023561000001540000002451312113421640012370 00000000000000# mp_limb_t mulredc3(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc3 TYPE(GSYM_PREFIX`'mulredc`'3,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc3: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $32, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 8(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 16(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 24(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2. Don't fetch new data from y[j+1]. movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 8(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 16(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 24(%TP) # Store CY in tmp[j+1] cmpq $3, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq %rax, 16(%ZP) movl %CYl, %eax # use carry as return value addq $32, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc.h0000644023561000001540000001240312106741272011757 00000000000000#ifndef __ASM_REDC_H__ #define __ASM_REDC_H__ #include "config.h" #include /* Signals that we have assembly code for 1xN mul/redc */ #define HAVE_NATIVE_MULREDC1_N /* Signals that we have assembly code for variable size redc */ #define HAVE_ASM_REDC3 /* Call the mulredc*() function with MS Windows parameter passing if WINDOWS64_ABI is defined. This is useful for testing the functions with Microsoft ABI under Linux */ #ifdef WINDOWS64_ABI #define MULREDC_ABI __attribute__((ms_abi)) #else #define MULREDC_ABI #endif extern void ecm_redc3(mp_limb_t *, const mp_limb_t *, mp_size_t, mp_limb_t) MULREDC_ABI; /* WARNING: the size-1 version doesn't take pointers in input */ extern mp_limb_t mulredc1(mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc2(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc3(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc4(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc5(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc6(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc7(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc8(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc9(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc10(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc11(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc12(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc13(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc14(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc15(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc16(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc17(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc18(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc19(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc20(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_2(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_3(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_4(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_5(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_6(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_7(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_8(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_9(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_10(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_11(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_12(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_13(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_14(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_15(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_16(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_17(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_18(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_19(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; extern mp_limb_t mulredc1_20(mp_limb_t *, const mp_limb_t, const mp_limb_t *, const mp_limb_t *, mp_limb_t) MULREDC_ABI; #endif ecm-6.4.4/x86_64/mulredc14.asm0000644023561000001540000010647212113421640012457 00000000000000# mp_limb_t mulredc14(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc14 TYPE(GSYM_PREFIX`'mulredc`'14,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc14: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $120, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 24(%TP) `#' Store T0 in tmp[4-1] movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 32(%TP) `#' Store T0 in tmp[5-1] movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 48(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 40(%TP) `#' Store T0 in tmp[6-1] movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 56(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 48(%TP) `#' Store T0 in tmp[7-1] movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 64(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 56(%TP) `#' Store T0 in tmp[8-1] movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 72(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 64(%TP) `#' Store T0 in tmp[9-1] movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 80(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 72(%TP) `#' Store T0 in tmp[10-1] movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 88(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 80(%TP) `#' Store T0 in tmp[11-1] movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 96(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 88(%TP) `#' Store T0 in tmp[12-1] movq 104(%YP), %rax `#' Fetch y[j+1] = y[13] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 13. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 104(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 96(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 104(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 112(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 32(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 24(%TP) `#' Store T0 in tmp[4-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 48(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 40(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 32(%TP) `#' Store T0 in tmp[5-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 56(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 48(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 40(%TP) `#' Store T0 in tmp[6-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 64(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 56(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 48(%TP) `#' Store T0 in tmp[7-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 72(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 64(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 56(%TP) `#' Store T0 in tmp[8-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 80(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 72(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 64(%TP) `#' Store T0 in tmp[9-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 88(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 80(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 72(%TP) `#' Store T0 in tmp[10-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 96(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 88(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 80(%TP) `#' Store T0 in tmp[11-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 104(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 96(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 104(%YP), %rax `#' Fetch y[j+1] = y[13] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 88(%TP) `#' Store T0 in tmp[12-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 13. Don't fetch new data from y[j+1]. movq 112(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 104(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 96(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 104(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 112(%TP) # Store CY in tmp[j+1] cmpq $14, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq 40(%TP), %rdx movq %rax, 32(%ZP) movq %rdx, 40(%ZP) movq 48(%TP), %rax movq 56(%TP), %rdx movq %rax, 48(%ZP) movq %rdx, 56(%ZP) movq 64(%TP), %rax movq 72(%TP), %rdx movq %rax, 64(%ZP) movq %rdx, 72(%ZP) movq 80(%TP), %rax movq 88(%TP), %rdx movq %rax, 80(%ZP) movq %rdx, 88(%ZP) movq 96(%TP), %rax movq 104(%TP), %rdx movq %rax, 96(%ZP) movq %rdx, 104(%ZP) movl %CYl, %eax # use carry as return value addq $120, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc5.asm0000644023561000001540000003555712113421640012404 00000000000000# mp_limb_t mulredc5(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc5 TYPE(GSYM_PREFIX`'mulredc`'5,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc5: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $48, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 24(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 32(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 40(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4. Don't fetch new data from y[j+1]. movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 24(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 32(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 40(%TP) # Store CY in tmp[j+1] cmpq $5, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq %rax, 32(%ZP) movl %CYl, %eax # use carry as return value addq $48, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc1_8.asm0000644023561000001540000002514312113421641012616 00000000000000# mp_limb_t mulredc1_8(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_8 TYPE(GSYM_PREFIX`'mulredc1_`'8,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_8: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 24(ZP) # Store T0 in z[4-1] movq 40(YP), %rax # Fetch y[j+1] = y[5] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 5 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 40(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 32(ZP) # Store T0 in z[5-1] movq 48(YP), %rax # Fetch y[j+1] = y[6] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 6 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 48(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 40(ZP) # Store T0 in z[6-1] movq 56(YP), %rax # Fetch y[j+1] = y[7] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 7. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 56(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 48(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 56(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc1_13.asm0000644023561000001540000004073212113421641012673 00000000000000# mp_limb_t mulredc1_13(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_13 TYPE(GSYM_PREFIX`'mulredc1_`'13,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_13: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 24(ZP) # Store T0 in z[4-1] movq 40(YP), %rax # Fetch y[j+1] = y[5] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 5 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 40(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 32(ZP) # Store T0 in z[5-1] movq 48(YP), %rax # Fetch y[j+1] = y[6] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 6 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 48(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 40(ZP) # Store T0 in z[6-1] movq 56(YP), %rax # Fetch y[j+1] = y[7] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 7 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 56(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 48(ZP) # Store T0 in z[7-1] movq 64(YP), %rax # Fetch y[j+1] = y[8] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 8 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 64(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 56(ZP) # Store T0 in z[8-1] movq 72(YP), %rax # Fetch y[j+1] = y[9] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 9 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 72(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 64(ZP) # Store T0 in z[9-1] movq 80(YP), %rax # Fetch y[j+1] = y[10] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 10 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 80(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 72(ZP) # Store T0 in z[10-1] movq 88(YP), %rax # Fetch y[j+1] = y[11] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 11 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 88(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 80(ZP) # Store T0 in z[11-1] movq 96(YP), %rax # Fetch y[j+1] = y[12] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 12. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 96(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 88(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 96(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc1_11.asm0000644023561000001540000003416612113421641012675 00000000000000# mp_limb_t mulredc1_11(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_11 TYPE(GSYM_PREFIX`'mulredc1_`'11,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_11: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 24(ZP) # Store T0 in z[4-1] movq 40(YP), %rax # Fetch y[j+1] = y[5] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 5 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 40(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 32(ZP) # Store T0 in z[5-1] movq 48(YP), %rax # Fetch y[j+1] = y[6] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 6 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 48(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 40(ZP) # Store T0 in z[6-1] movq 56(YP), %rax # Fetch y[j+1] = y[7] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 7 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 56(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 48(ZP) # Store T0 in z[7-1] movq 64(YP), %rax # Fetch y[j+1] = y[8] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 8 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 64(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 56(ZP) # Store T0 in z[8-1] movq 72(YP), %rax # Fetch y[j+1] = y[9] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 9 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 72(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 64(ZP) # Store T0 in z[9-1] movq 80(YP), %rax # Fetch y[j+1] = y[10] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 10. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 80(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 72(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 80(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/generate_all0000755023561000001540000000026212106741272012523 00000000000000#!/bin/sh for i in 1 2; do ./autogen.py $i > mulredc$i.asm done for i in 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do m4 -DLENGTH=$i mulredc.m4 > mulredc$i.asm done ecm-6.4.4/x86_64/mulredc1_14.asm0000644023561000001540000004321712113421641012675 00000000000000# mp_limb_t mulredc1_14(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_14 TYPE(GSYM_PREFIX`'mulredc1_`'14,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_14: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 24(ZP) # Store T0 in z[4-1] movq 40(YP), %rax # Fetch y[j+1] = y[5] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 5 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 40(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 32(ZP) # Store T0 in z[5-1] movq 48(YP), %rax # Fetch y[j+1] = y[6] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 6 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 48(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 40(ZP) # Store T0 in z[6-1] movq 56(YP), %rax # Fetch y[j+1] = y[7] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 7 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 56(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 48(ZP) # Store T0 in z[7-1] movq 64(YP), %rax # Fetch y[j+1] = y[8] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 8 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 64(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 56(ZP) # Store T0 in z[8-1] movq 72(YP), %rax # Fetch y[j+1] = y[9] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 9 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 72(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 64(ZP) # Store T0 in z[9-1] movq 80(YP), %rax # Fetch y[j+1] = y[10] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 10 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 80(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 72(ZP) # Store T0 in z[10-1] movq 88(YP), %rax # Fetch y[j+1] = y[11] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 11 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 88(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 80(ZP) # Store T0 in z[11-1] movq 96(YP), %rax # Fetch y[j+1] = y[12] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 12 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 96(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 88(ZP) # Store T0 in z[12-1] movq 104(YP), %rax # Fetch y[j+1] = y[13] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 13. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 104(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 96(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 104(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc6.asm0000644023561000001540000004220112113421640012365 00000000000000# mp_limb_t mulredc6(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc6 TYPE(GSYM_PREFIX`'mulredc`'6,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc6: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $56, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 24(%TP) `#' Store T0 in tmp[4-1] movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 32(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 40(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 48(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 32(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 24(%TP) `#' Store T0 in tmp[4-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5. Don't fetch new data from y[j+1]. movq 48(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 32(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 40(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 48(%TP) # Store CY in tmp[j+1] cmpq $6, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq 40(%TP), %rdx movq %rax, 32(%ZP) movq %rdx, 40(%ZP) movl %CYl, %eax # use carry as return value addq $56, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc2.asm0000644023561000001540000002007112113421640012362 00000000000000# mp_limb_t mulredc2(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc2 TYPE(GSYM_PREFIX`'mulredc`'2,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc2: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $24, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 0(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 8(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 16(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1. Don't fetch new data from y[j+1]. movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 0(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 8(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 16(%TP) # Store CY in tmp[j+1] cmpq $2, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movl %CYl, %eax # use carry as return value addq $24, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc.m40000644023561000001540000003021212106741272012046 00000000000000`# mp_limb_t mulredc'LENGTH`(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y,' # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. divert(-1) # forloop(i, from, to, stmt) define(`forloop', `pushdef(`$1', `$2')_forloop(`$1', `$2', `$3', `$4')popdef(`$1')') define(`_forloop', `ifelse(eval($1 <= `$3'), 1, `$4'`define(`$1', incr($1))_forloop(`$1', `$2', `$3', `$4')')') divert `include(`config.m4')' TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX``''mulredc`'LENGTH TYPE(GSYM_PREFIX``''mulredc``''LENGTH,``function'') # Implements multiplication and REDC for two input numbers of LENGTH words `ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI')' # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry `define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl' dnl Put overview of register allocation into .s file ``#'' `Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U' ``#'' ` `YP' = YP, `MP' = MP, `TP' = TP' # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 define(`LOCALSPACE', `eval(8*(LENGTH + 1))')dnl define(`LOCALTMP', `(%rsp)')dnl GSYM_PREFIX``''mulredc`'LENGTH: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 `ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl' `ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl' subq $LOCALSPACE, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea LOCALTMP, %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 `ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ') dnl' movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 `ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf')' dnl Cycle ring buffer. Only mappings of T0 and T1 to regs change, no MOVs! `define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl' ``#'' Now ``T0'' = T0, ``T1'' = T1 forloop(`UNROLL', 1, eval(LENGTH - 2), `dnl define(`J', `eval(8 * UNROLL)')dnl define(`J8', `eval(J + 8)')dnl define(`JM8', `eval(J - 8)')dnl ``#'' Pass for j = UNROLL ``#'' Register values at entry: ``#'' %rax = y[j], %XI = x[i], %U = u ``#'' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined ``#'' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq J`'(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! `ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)')' mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, JM8`'(%TP) ``#'' Store T0 in tmp[UNROLL-1] movq J8`'(%YP), %rax ``#'' Fetch y[j+1] = y[eval(UNROLL+1)] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 dnl Cycle ring buffer. Only mappings of T0 and T1 to regs change, no MOVs! `define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl' ``#'' Now ``T0'' = T0, ``T1'' = T1 ')dnl # end forloop ``#'' Pass for j = eval(LENGTH - 1). Don't fetch new data from y[j+1]. define(`J', `eval(8*LENGTH - 8)')dnl define(`J8', `eval(J + 8)')dnl define(`JM8', `eval(J - 8)')dnl movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq J`'(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, JM8`'(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, J`'(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, J8`'(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] `ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf')' dnl Cycle ring buffer. Only mappings of T0 and T1 to regs change, no MOVs! `define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl' ``#'' Now ``T0'' = T0, ``T1'' = T1 forloop(`UNROLL', 1, eval(LENGTH - 2), `dnl define(`J', `eval(8 * UNROLL)')dnl define(`J8', `eval(J + 8)')dnl define(`JM8', `eval(J - 8)')dnl ``#'' Pass for j = UNROLL ``#'' Register values at entry: ``#'' %rax = y[j], %XI = x[i], %U = u ``#'' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in ``#'' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq J8`'(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq J`'(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq J8`'(%YP), %rax ``#'' Fetch y[j+1] = y[eval(UNROLL+1)] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, JM8`'(%TP) ``#'' Store T0 in tmp[UNROLL-1] dnl Cycle ring buffer. Only mappings of T0 and T1 to regs change, no MOVs! `define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl' ``#'' Now ``T0'' = T0, ``T1'' = T1 ')dnl # end forloop ``#'' Pass for j = eval(LENGTH - 1). Don't fetch new data from y[j+1]. define(`J', `eval(8*LENGTH - 8)')dnl define(`J8', `eval(J + 8)')dnl define(`JM8', `eval(J - 8)')dnl movq J8`'(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq J`'(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, JM8`'(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, J`'(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, J8`'(%TP) # Store CY in tmp[j+1] cmpq $LENGTH, %I jb 1b # Copy result from tmp memory to z dnl ==== THIS LOOP WILL NOT WORK FOR LENGTH <= 1 ==== forloop(`UNROLL', 0, eval(LENGTH / 2 - 1), `dnl define(`J', `eval(2 * UNROLL * 8)')dnl define(`J8', `eval(J + 8)')dnl ifelse(J, `0', dnl ` movq (%TP), %rax', dnl ` movq J`'(%TP), %rax') movq J8`'(%TP), %rdx ifelse(J, `0', dnl ` movq %rax, (%ZP)', dnl ` movq %rax, J`'(%ZP)') movq %rdx, J8`'(%ZP) ')dnl ifelse(eval(LENGTH % 2), 1, `dnl define(`J', `eval(LENGTH * 8 - 8)')dnl movq J`'(%TP), %rax movq %rax, J`'(%ZP) ')dnl movl %CYl, %eax # use carry as return value addq $LOCALSPACE, %rsp `ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl' popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc20.asm0000644023561000001540000014200012113421641012440 00000000000000# mp_limb_t mulredc20(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc20 TYPE(GSYM_PREFIX`'mulredc`'20,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc20: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $168, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 24(%TP) `#' Store T0 in tmp[4-1] movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 32(%TP) `#' Store T0 in tmp[5-1] movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 48(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 40(%TP) `#' Store T0 in tmp[6-1] movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 56(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 48(%TP) `#' Store T0 in tmp[7-1] movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 64(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 56(%TP) `#' Store T0 in tmp[8-1] movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 72(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 64(%TP) `#' Store T0 in tmp[9-1] movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 80(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 72(%TP) `#' Store T0 in tmp[10-1] movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 88(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 80(%TP) `#' Store T0 in tmp[11-1] movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 96(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 88(%TP) `#' Store T0 in tmp[12-1] movq 104(%YP), %rax `#' Fetch y[j+1] = y[13] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 13 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 104(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 96(%TP) `#' Store T0 in tmp[13-1] movq 112(%YP), %rax `#' Fetch y[j+1] = y[14] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 14 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 112(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 104(%TP) `#' Store T0 in tmp[14-1] movq 120(%YP), %rax `#' Fetch y[j+1] = y[15] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 15 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 120(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 112(%TP) `#' Store T0 in tmp[15-1] movq 128(%YP), %rax `#' Fetch y[j+1] = y[16] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 16 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 128(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 120(%TP) `#' Store T0 in tmp[16-1] movq 136(%YP), %rax `#' Fetch y[j+1] = y[17] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 17 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 136(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 128(%TP) `#' Store T0 in tmp[17-1] movq 144(%YP), %rax `#' Fetch y[j+1] = y[18] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 18 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 144(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 136(%TP) `#' Store T0 in tmp[18-1] movq 152(%YP), %rax `#' Fetch y[j+1] = y[19] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 19. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 152(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 144(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 152(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 160(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 32(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 24(%TP) `#' Store T0 in tmp[4-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 48(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 40(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 32(%TP) `#' Store T0 in tmp[5-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 56(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 48(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 40(%TP) `#' Store T0 in tmp[6-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 64(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 56(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 48(%TP) `#' Store T0 in tmp[7-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 72(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 64(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 56(%TP) `#' Store T0 in tmp[8-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 80(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 72(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 64(%TP) `#' Store T0 in tmp[9-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 88(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 80(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 72(%TP) `#' Store T0 in tmp[10-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 96(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 88(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 80(%TP) `#' Store T0 in tmp[11-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 104(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 96(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 104(%YP), %rax `#' Fetch y[j+1] = y[13] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 88(%TP) `#' Store T0 in tmp[12-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 13 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 112(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 104(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 112(%YP), %rax `#' Fetch y[j+1] = y[14] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 96(%TP) `#' Store T0 in tmp[13-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 14 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 120(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 112(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 120(%YP), %rax `#' Fetch y[j+1] = y[15] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 104(%TP) `#' Store T0 in tmp[14-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 15 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 128(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 120(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 128(%YP), %rax `#' Fetch y[j+1] = y[16] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 112(%TP) `#' Store T0 in tmp[15-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 16 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 136(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 128(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 136(%YP), %rax `#' Fetch y[j+1] = y[17] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 120(%TP) `#' Store T0 in tmp[16-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 17 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 144(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 136(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 144(%YP), %rax `#' Fetch y[j+1] = y[18] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 128(%TP) `#' Store T0 in tmp[17-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 18 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 152(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 144(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 152(%YP), %rax `#' Fetch y[j+1] = y[19] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 136(%TP) `#' Store T0 in tmp[18-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 19. Don't fetch new data from y[j+1]. movq 160(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 152(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 144(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 152(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 160(%TP) # Store CY in tmp[j+1] cmpq $20, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq 40(%TP), %rdx movq %rax, 32(%ZP) movq %rdx, 40(%ZP) movq 48(%TP), %rax movq 56(%TP), %rdx movq %rax, 48(%ZP) movq %rdx, 56(%ZP) movq 64(%TP), %rax movq 72(%TP), %rdx movq %rax, 64(%ZP) movq %rdx, 72(%ZP) movq 80(%TP), %rax movq 88(%TP), %rdx movq %rax, 80(%ZP) movq %rdx, 88(%ZP) movq 96(%TP), %rax movq 104(%TP), %rdx movq %rax, 96(%ZP) movq %rdx, 104(%ZP) movq 112(%TP), %rax movq 120(%TP), %rdx movq %rax, 112(%ZP) movq %rdx, 120(%ZP) movq 128(%TP), %rax movq 136(%TP), %rdx movq %rax, 128(%ZP) movq %rdx, 136(%ZP) movq 144(%TP), %rax movq 152(%TP), %rdx movq %rax, 144(%ZP) movq %rdx, 152(%ZP) movl %CYl, %eax # use carry as return value addq $168, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc17.asm0000644023561000001540000012423512113421640012457 00000000000000# mp_limb_t mulredc17(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc17 TYPE(GSYM_PREFIX`'mulredc`'17,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc17: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $144, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 24(%TP) `#' Store T0 in tmp[4-1] movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 32(%TP) `#' Store T0 in tmp[5-1] movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 48(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 40(%TP) `#' Store T0 in tmp[6-1] movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 56(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 48(%TP) `#' Store T0 in tmp[7-1] movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 64(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 56(%TP) `#' Store T0 in tmp[8-1] movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 72(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 64(%TP) `#' Store T0 in tmp[9-1] movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 80(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 72(%TP) `#' Store T0 in tmp[10-1] movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 88(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 80(%TP) `#' Store T0 in tmp[11-1] movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 96(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 88(%TP) `#' Store T0 in tmp[12-1] movq 104(%YP), %rax `#' Fetch y[j+1] = y[13] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 13 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 104(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 96(%TP) `#' Store T0 in tmp[13-1] movq 112(%YP), %rax `#' Fetch y[j+1] = y[14] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 14 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 112(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 104(%TP) `#' Store T0 in tmp[14-1] movq 120(%YP), %rax `#' Fetch y[j+1] = y[15] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 15 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 120(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 112(%TP) `#' Store T0 in tmp[15-1] movq 128(%YP), %rax `#' Fetch y[j+1] = y[16] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 16. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 128(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 120(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 128(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 136(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 32(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 24(%TP) `#' Store T0 in tmp[4-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 48(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 40(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 32(%TP) `#' Store T0 in tmp[5-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 56(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 48(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 40(%TP) `#' Store T0 in tmp[6-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 64(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 56(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 48(%TP) `#' Store T0 in tmp[7-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 72(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 64(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 56(%TP) `#' Store T0 in tmp[8-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 80(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 72(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 64(%TP) `#' Store T0 in tmp[9-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 88(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 80(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 72(%TP) `#' Store T0 in tmp[10-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 96(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 88(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 80(%TP) `#' Store T0 in tmp[11-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 104(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 96(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 104(%YP), %rax `#' Fetch y[j+1] = y[13] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 88(%TP) `#' Store T0 in tmp[12-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 13 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 112(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 104(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 112(%YP), %rax `#' Fetch y[j+1] = y[14] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 96(%TP) `#' Store T0 in tmp[13-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 14 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 120(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 112(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 120(%YP), %rax `#' Fetch y[j+1] = y[15] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 104(%TP) `#' Store T0 in tmp[14-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 15 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 128(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 120(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 128(%YP), %rax `#' Fetch y[j+1] = y[16] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 112(%TP) `#' Store T0 in tmp[15-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 16. Don't fetch new data from y[j+1]. movq 136(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 128(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 120(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 128(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 136(%TP) # Store CY in tmp[j+1] cmpq $17, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq 40(%TP), %rdx movq %rax, 32(%ZP) movq %rdx, 40(%ZP) movq 48(%TP), %rax movq 56(%TP), %rdx movq %rax, 48(%ZP) movq %rdx, 56(%ZP) movq 64(%TP), %rax movq 72(%TP), %rdx movq %rax, 64(%ZP) movq %rdx, 72(%ZP) movq 80(%TP), %rax movq 88(%TP), %rdx movq %rax, 80(%ZP) movq %rdx, 88(%ZP) movq 96(%TP), %rax movq 104(%TP), %rdx movq %rax, 96(%ZP) movq %rdx, 104(%ZP) movq 112(%TP), %rax movq 120(%TP), %rdx movq %rax, 112(%ZP) movq %rdx, 120(%ZP) movq 128(%TP), %rax movq %rax, 128(%ZP) movl %CYl, %eax # use carry as return value addq $144, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc1_4.asm0000644023561000001540000001364712113421641012620 00000000000000# mp_limb_t mulredc1_4(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_4 TYPE(GSYM_PREFIX`'mulredc1_`'4,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_4: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 16(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 24(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc1_3.asm0000644023561000001540000001137012113421641012606 00000000000000# mp_limb_t mulredc1_3(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_3 TYPE(GSYM_PREFIX`'mulredc1_`'3,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_3: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 8(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 16(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc1_20.asm0000644023561000001540000006131512113421641012671 00000000000000# mp_limb_t mulredc1_20(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_20 TYPE(GSYM_PREFIX`'mulredc1_`'20,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_20: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 24(ZP) # Store T0 in z[4-1] movq 40(YP), %rax # Fetch y[j+1] = y[5] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 5 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 40(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 32(ZP) # Store T0 in z[5-1] movq 48(YP), %rax # Fetch y[j+1] = y[6] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 6 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 48(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 40(ZP) # Store T0 in z[6-1] movq 56(YP), %rax # Fetch y[j+1] = y[7] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 7 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 56(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 48(ZP) # Store T0 in z[7-1] movq 64(YP), %rax # Fetch y[j+1] = y[8] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 8 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 64(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 56(ZP) # Store T0 in z[8-1] movq 72(YP), %rax # Fetch y[j+1] = y[9] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 9 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 72(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 64(ZP) # Store T0 in z[9-1] movq 80(YP), %rax # Fetch y[j+1] = y[10] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 10 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 80(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 72(ZP) # Store T0 in z[10-1] movq 88(YP), %rax # Fetch y[j+1] = y[11] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 11 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 88(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 80(ZP) # Store T0 in z[11-1] movq 96(YP), %rax # Fetch y[j+1] = y[12] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 12 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 96(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 88(ZP) # Store T0 in z[12-1] movq 104(YP), %rax # Fetch y[j+1] = y[13] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 13 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 104(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 96(ZP) # Store T0 in z[13-1] movq 112(YP), %rax # Fetch y[j+1] = y[14] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 14 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 112(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 104(ZP) # Store T0 in z[14-1] movq 120(YP), %rax # Fetch y[j+1] = y[15] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 15 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 120(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 112(ZP) # Store T0 in z[15-1] movq 128(YP), %rax # Fetch y[j+1] = y[16] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 16 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 128(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 120(ZP) # Store T0 in z[16-1] movq 136(YP), %rax # Fetch y[j+1] = y[17] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 17 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 136(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 128(ZP) # Store T0 in z[17-1] movq 144(YP), %rax # Fetch y[j+1] = y[18] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 18 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 144(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 136(ZP) # Store T0 in z[18-1] movq 152(YP), %rax # Fetch y[j+1] = y[19] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 19. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 152(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 144(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 152(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc1_9.asm0000644023561000001540000002742212113421641012621 00000000000000# mp_limb_t mulredc1_9(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_9 TYPE(GSYM_PREFIX`'mulredc1_`'9,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_9: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 24(ZP) # Store T0 in z[4-1] movq 40(YP), %rax # Fetch y[j+1] = y[5] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 5 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 40(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 32(ZP) # Store T0 in z[5-1] movq 48(YP), %rax # Fetch y[j+1] = y[6] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 6 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 48(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 40(ZP) # Store T0 in z[6-1] movq 56(YP), %rax # Fetch y[j+1] = y[7] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 7 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 56(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 48(ZP) # Store T0 in z[7-1] movq 64(YP), %rax # Fetch y[j+1] = y[8] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 8. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 64(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 56(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 64(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc1_15.asm0000644023561000001540000004550412113421641012677 00000000000000# mp_limb_t mulredc1_15(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_15 TYPE(GSYM_PREFIX`'mulredc1_`'15,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_15: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 24(ZP) # Store T0 in z[4-1] movq 40(YP), %rax # Fetch y[j+1] = y[5] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 5 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 40(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 32(ZP) # Store T0 in z[5-1] movq 48(YP), %rax # Fetch y[j+1] = y[6] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 6 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 48(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 40(ZP) # Store T0 in z[6-1] movq 56(YP), %rax # Fetch y[j+1] = y[7] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 7 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 56(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 48(ZP) # Store T0 in z[7-1] movq 64(YP), %rax # Fetch y[j+1] = y[8] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 8 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 64(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 56(ZP) # Store T0 in z[8-1] movq 72(YP), %rax # Fetch y[j+1] = y[9] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 9 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 72(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 64(ZP) # Store T0 in z[9-1] movq 80(YP), %rax # Fetch y[j+1] = y[10] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 10 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 80(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 72(ZP) # Store T0 in z[10-1] movq 88(YP), %rax # Fetch y[j+1] = y[11] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 11 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 88(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 80(ZP) # Store T0 in z[11-1] movq 96(YP), %rax # Fetch y[j+1] = y[12] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 12 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 96(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 88(ZP) # Store T0 in z[12-1] movq 104(YP), %rax # Fetch y[j+1] = y[13] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 13 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 104(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 96(ZP) # Store T0 in z[13-1] movq 112(YP), %rax # Fetch y[j+1] = y[14] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 14. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 112(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 104(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 112(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc18.asm0000644023561000001540000013067612113421640012466 00000000000000# mp_limb_t mulredc18(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc18 TYPE(GSYM_PREFIX`'mulredc`'18,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc18: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $152, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 24(%TP) `#' Store T0 in tmp[4-1] movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 32(%TP) `#' Store T0 in tmp[5-1] movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 48(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 40(%TP) `#' Store T0 in tmp[6-1] movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 56(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 48(%TP) `#' Store T0 in tmp[7-1] movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 64(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 56(%TP) `#' Store T0 in tmp[8-1] movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 72(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 64(%TP) `#' Store T0 in tmp[9-1] movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 80(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 72(%TP) `#' Store T0 in tmp[10-1] movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 88(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 80(%TP) `#' Store T0 in tmp[11-1] movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 96(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 88(%TP) `#' Store T0 in tmp[12-1] movq 104(%YP), %rax `#' Fetch y[j+1] = y[13] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 13 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 104(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 96(%TP) `#' Store T0 in tmp[13-1] movq 112(%YP), %rax `#' Fetch y[j+1] = y[14] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 14 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 112(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 104(%TP) `#' Store T0 in tmp[14-1] movq 120(%YP), %rax `#' Fetch y[j+1] = y[15] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 15 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 120(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 112(%TP) `#' Store T0 in tmp[15-1] movq 128(%YP), %rax `#' Fetch y[j+1] = y[16] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 16 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 128(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 120(%TP) `#' Store T0 in tmp[16-1] movq 136(%YP), %rax `#' Fetch y[j+1] = y[17] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 17. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 136(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 128(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 136(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 144(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 32(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 24(%TP) `#' Store T0 in tmp[4-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 48(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 40(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 32(%TP) `#' Store T0 in tmp[5-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 56(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 48(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 40(%TP) `#' Store T0 in tmp[6-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 64(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 56(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 48(%TP) `#' Store T0 in tmp[7-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 72(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 64(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 56(%TP) `#' Store T0 in tmp[8-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 80(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 72(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 64(%TP) `#' Store T0 in tmp[9-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 88(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 80(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 72(%TP) `#' Store T0 in tmp[10-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 96(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 88(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 80(%TP) `#' Store T0 in tmp[11-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 104(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 96(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 104(%YP), %rax `#' Fetch y[j+1] = y[13] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 88(%TP) `#' Store T0 in tmp[12-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 13 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 112(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 104(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 112(%YP), %rax `#' Fetch y[j+1] = y[14] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 96(%TP) `#' Store T0 in tmp[13-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 14 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 120(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 112(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 120(%YP), %rax `#' Fetch y[j+1] = y[15] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 104(%TP) `#' Store T0 in tmp[14-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 15 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 128(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 120(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 128(%YP), %rax `#' Fetch y[j+1] = y[16] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 112(%TP) `#' Store T0 in tmp[15-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 16 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 136(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 128(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 136(%YP), %rax `#' Fetch y[j+1] = y[17] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 120(%TP) `#' Store T0 in tmp[16-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 17. Don't fetch new data from y[j+1]. movq 144(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 136(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 128(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 136(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 144(%TP) # Store CY in tmp[j+1] cmpq $18, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq 40(%TP), %rdx movq %rax, 32(%ZP) movq %rdx, 40(%ZP) movq 48(%TP), %rax movq 56(%TP), %rdx movq %rax, 48(%ZP) movq %rdx, 56(%ZP) movq 64(%TP), %rax movq 72(%TP), %rdx movq %rax, 64(%ZP) movq %rdx, 72(%ZP) movq 80(%TP), %rax movq 88(%TP), %rdx movq %rax, 80(%ZP) movq %rdx, 88(%ZP) movq 96(%TP), %rax movq 104(%TP), %rdx movq %rax, 96(%ZP) movq %rdx, 104(%ZP) movq 112(%TP), %rax movq 120(%TP), %rdx movq %rax, 112(%ZP) movq %rdx, 120(%ZP) movq 128(%TP), %rax movq 136(%TP), %rdx movq %rax, 128(%ZP) movq %rdx, 136(%ZP) movl %CYl, %eax # use carry as return value addq $152, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc1_18.asm0000644023561000001540000005454312113421641012705 00000000000000# mp_limb_t mulredc1_18(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_18 TYPE(GSYM_PREFIX`'mulredc1_`'18,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_18: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 24(ZP) # Store T0 in z[4-1] movq 40(YP), %rax # Fetch y[j+1] = y[5] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 5 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 40(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 32(ZP) # Store T0 in z[5-1] movq 48(YP), %rax # Fetch y[j+1] = y[6] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 6 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 48(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 40(ZP) # Store T0 in z[6-1] movq 56(YP), %rax # Fetch y[j+1] = y[7] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 7 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 56(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 48(ZP) # Store T0 in z[7-1] movq 64(YP), %rax # Fetch y[j+1] = y[8] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 8 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 64(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 56(ZP) # Store T0 in z[8-1] movq 72(YP), %rax # Fetch y[j+1] = y[9] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 9 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 72(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 64(ZP) # Store T0 in z[9-1] movq 80(YP), %rax # Fetch y[j+1] = y[10] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 10 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 80(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 72(ZP) # Store T0 in z[10-1] movq 88(YP), %rax # Fetch y[j+1] = y[11] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 11 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 88(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 80(ZP) # Store T0 in z[11-1] movq 96(YP), %rax # Fetch y[j+1] = y[12] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 12 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 96(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 88(ZP) # Store T0 in z[12-1] movq 104(YP), %rax # Fetch y[j+1] = y[13] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 13 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 104(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 96(ZP) # Store T0 in z[13-1] movq 112(YP), %rax # Fetch y[j+1] = y[14] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 14 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 112(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 104(ZP) # Store T0 in z[14-1] movq 120(YP), %rax # Fetch y[j+1] = y[15] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 15 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 120(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 112(ZP) # Store T0 in z[15-1] movq 128(YP), %rax # Fetch y[j+1] = y[16] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 16 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 128(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 120(ZP) # Store T0 in z[16-1] movq 136(YP), %rax # Fetch y[j+1] = y[17] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 17. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 136(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 128(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 136(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc19.asm0000644023561000001540000013533712113421641012467 00000000000000# mp_limb_t mulredc19(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc19 TYPE(GSYM_PREFIX`'mulredc`'19,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc19: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $160, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 24(%TP) `#' Store T0 in tmp[4-1] movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 32(%TP) `#' Store T0 in tmp[5-1] movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 48(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 40(%TP) `#' Store T0 in tmp[6-1] movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 56(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 48(%TP) `#' Store T0 in tmp[7-1] movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 64(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 56(%TP) `#' Store T0 in tmp[8-1] movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 72(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 64(%TP) `#' Store T0 in tmp[9-1] movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 80(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 72(%TP) `#' Store T0 in tmp[10-1] movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 88(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 80(%TP) `#' Store T0 in tmp[11-1] movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 96(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 88(%TP) `#' Store T0 in tmp[12-1] movq 104(%YP), %rax `#' Fetch y[j+1] = y[13] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 13 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 104(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 96(%TP) `#' Store T0 in tmp[13-1] movq 112(%YP), %rax `#' Fetch y[j+1] = y[14] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 14 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 112(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 104(%TP) `#' Store T0 in tmp[14-1] movq 120(%YP), %rax `#' Fetch y[j+1] = y[15] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 15 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 120(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 112(%TP) `#' Store T0 in tmp[15-1] movq 128(%YP), %rax `#' Fetch y[j+1] = y[16] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 16 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 128(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 120(%TP) `#' Store T0 in tmp[16-1] movq 136(%YP), %rax `#' Fetch y[j+1] = y[17] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 17 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 136(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 128(%TP) `#' Store T0 in tmp[17-1] movq 144(%YP), %rax `#' Fetch y[j+1] = y[18] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 18. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 144(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 136(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 144(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 152(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 32(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 24(%TP) `#' Store T0 in tmp[4-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 48(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 40(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 32(%TP) `#' Store T0 in tmp[5-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 56(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 48(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 40(%TP) `#' Store T0 in tmp[6-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 64(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 56(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 48(%TP) `#' Store T0 in tmp[7-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 72(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 64(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 56(%TP) `#' Store T0 in tmp[8-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 80(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 72(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 64(%TP) `#' Store T0 in tmp[9-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 88(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 80(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 72(%TP) `#' Store T0 in tmp[10-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 96(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 88(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 80(%TP) `#' Store T0 in tmp[11-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 104(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 96(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 104(%YP), %rax `#' Fetch y[j+1] = y[13] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 88(%TP) `#' Store T0 in tmp[12-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 13 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 112(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 104(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 112(%YP), %rax `#' Fetch y[j+1] = y[14] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 96(%TP) `#' Store T0 in tmp[13-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 14 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 120(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 112(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 120(%YP), %rax `#' Fetch y[j+1] = y[15] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 104(%TP) `#' Store T0 in tmp[14-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 15 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 128(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 120(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 128(%YP), %rax `#' Fetch y[j+1] = y[16] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 112(%TP) `#' Store T0 in tmp[15-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 16 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 136(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 128(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 136(%YP), %rax `#' Fetch y[j+1] = y[17] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 120(%TP) `#' Store T0 in tmp[16-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 17 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 144(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 136(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 144(%YP), %rax `#' Fetch y[j+1] = y[18] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 128(%TP) `#' Store T0 in tmp[17-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 18. Don't fetch new data from y[j+1]. movq 152(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 144(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 136(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 144(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 152(%TP) # Store CY in tmp[j+1] cmpq $19, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq 40(%TP), %rdx movq %rax, 32(%ZP) movq %rdx, 40(%ZP) movq 48(%TP), %rax movq 56(%TP), %rdx movq %rax, 48(%ZP) movq %rdx, 56(%ZP) movq 64(%TP), %rax movq 72(%TP), %rdx movq %rax, 64(%ZP) movq %rdx, 72(%ZP) movq 80(%TP), %rax movq 88(%TP), %rdx movq %rax, 80(%ZP) movq %rdx, 88(%ZP) movq 96(%TP), %rax movq 104(%TP), %rdx movq %rax, 96(%ZP) movq %rdx, 104(%ZP) movq 112(%TP), %rax movq 120(%TP), %rdx movq %rax, 112(%ZP) movq %rdx, 120(%ZP) movq 128(%TP), %rax movq 136(%TP), %rdx movq %rax, 128(%ZP) movq %rdx, 136(%ZP) movq 144(%TP), %rax movq %rax, 144(%ZP) movl %CYl, %eax # use carry as return value addq $160, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc1_10.asm0000644023561000001540000003170512113421641012670 00000000000000# mp_limb_t mulredc1_10(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_10 TYPE(GSYM_PREFIX`'mulredc1_`'10,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_10: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 24(ZP) # Store T0 in z[4-1] movq 40(YP), %rax # Fetch y[j+1] = y[5] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 5 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 40(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 32(ZP) # Store T0 in z[5-1] movq 48(YP), %rax # Fetch y[j+1] = y[6] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 6 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 48(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 40(ZP) # Store T0 in z[6-1] movq 56(YP), %rax # Fetch y[j+1] = y[7] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 7 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 56(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 48(ZP) # Store T0 in z[7-1] movq 64(YP), %rax # Fetch y[j+1] = y[8] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 8 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 64(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 56(ZP) # Store T0 in z[8-1] movq 72(YP), %rax # Fetch y[j+1] = y[9] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 9. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 72(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 64(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 72(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc1_16.asm0000644023561000001540000004777112113421641012710 00000000000000# mp_limb_t mulredc1_16(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_16 TYPE(GSYM_PREFIX`'mulredc1_`'16,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_16: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 24(ZP) # Store T0 in z[4-1] movq 40(YP), %rax # Fetch y[j+1] = y[5] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 5 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 40(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 32(ZP) # Store T0 in z[5-1] movq 48(YP), %rax # Fetch y[j+1] = y[6] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 6 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 48(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 40(ZP) # Store T0 in z[6-1] movq 56(YP), %rax # Fetch y[j+1] = y[7] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 7 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 56(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 48(ZP) # Store T0 in z[7-1] movq 64(YP), %rax # Fetch y[j+1] = y[8] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 8 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 64(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 56(ZP) # Store T0 in z[8-1] movq 72(YP), %rax # Fetch y[j+1] = y[9] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 9 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 72(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 64(ZP) # Store T0 in z[9-1] movq 80(YP), %rax # Fetch y[j+1] = y[10] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 10 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 80(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 72(ZP) # Store T0 in z[10-1] movq 88(YP), %rax # Fetch y[j+1] = y[11] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 11 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 88(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 80(ZP) # Store T0 in z[11-1] movq 96(YP), %rax # Fetch y[j+1] = y[12] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 12 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 96(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 88(ZP) # Store T0 in z[12-1] movq 104(YP), %rax # Fetch y[j+1] = y[13] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 13 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 104(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 96(ZP) # Store T0 in z[13-1] movq 112(YP), %rax # Fetch y[j+1] = y[14] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 14 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 112(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 104(ZP) # Store T0 in z[14-1] movq 120(YP), %rax # Fetch y[j+1] = y[15] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 15. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 120(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 112(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 120(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc1.m40000644023561000001540000001320112106741272012126 00000000000000`# mp_limb_t mulredc1_'LENGTH`(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y,' # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored divert(-1)dnl # forloop(i, from, to, stmt)dnl define(`forloop', `pushdef(`$1', `$2')_forloop(`$1', `$2', `$3', `$4')popdef(`$1')')dnl define(`_forloop', `ifelse(eval($1 <= `$3'), 1, `$4'`define(`$1', incr($1))_forloop(`$1', `$2', `$3', `$4')')')dnl divert `include(`config.m4')' `ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl' TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX``''mulredc1_`'LENGTH TYPE(GSYM_PREFIX``''mulredc1_``''LENGTH,``function'') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word `ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI')' # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry `define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z' dnl Put overview of register allocation into generated code ``#'' Register vars: ``T0'' = T0, ``T1'' = T1, ``CY'' = CY, ``X'' = X, ``U'' = U ``#'' ``YP'' = YP, ``MP'' = MP GSYM_PREFIX``''mulredc1_`'LENGTH: ######################################################################### # i = 0 pass ######################################################################### ``#'' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 `ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' )' xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 `ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ')' dnl Cycle ring buffer. Only mappings of T0 and T1 to regs change, no MOVs! `define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl' ``#'' Now ``T0'' = T0, ``T1'' = T1 forloop(`UNROLL', 1, eval(LENGTH - 2), `dnl define(`J', `eval(8 * UNROLL)')dnl define(`J8', `eval(J + 8)')dnl define(`JM8', `eval(J - 8)')dnl `#' Pass for j = UNROLL `#' Register values at entry: `#' %rax = y[j], X = x, U = u `#' T0 = value to store in tmp[j], T1 undefined `#' CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq J`'(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! `ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`')' mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, JM8`'(ZP) `#' Store T0 in z[UNROLL-1] movq J8`'(YP), %rax `#' Fetch y[j+1] = y[eval(UNROLL+1)] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 dnl Cycle ring buffer. Only mappings of T0 and T1 to regs change, no MOVs! `define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl' ``#'' Now ``T0'' = T0, ``T1'' = T1 ')dnl # end forloop `#' Pass for j = eval(LENGTH - 1). Don't fetch new data from y[j+1]. define(`J', `eval(8*LENGTH - 8)')dnl define(`J8', `eval(J + 8)')dnl define(`JM8', `eval(J - 8)')dnl movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq J`'(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, JM8`'(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, J`'(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value `ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl' popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc9.asm0000644023561000001540000005766712113421640012416 00000000000000# mp_limb_t mulredc9(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc9 TYPE(GSYM_PREFIX`'mulredc`'9,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc9: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $80, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 24(%TP) `#' Store T0 in tmp[4-1] movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 32(%TP) `#' Store T0 in tmp[5-1] movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 48(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 40(%TP) `#' Store T0 in tmp[6-1] movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 56(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 48(%TP) `#' Store T0 in tmp[7-1] movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 64(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 56(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 64(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 72(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 32(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 24(%TP) `#' Store T0 in tmp[4-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 48(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 40(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 32(%TP) `#' Store T0 in tmp[5-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 56(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 48(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 40(%TP) `#' Store T0 in tmp[6-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 64(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 56(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 48(%TP) `#' Store T0 in tmp[7-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8. Don't fetch new data from y[j+1]. movq 72(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 64(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 56(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 64(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 72(%TP) # Store CY in tmp[j+1] cmpq $9, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq 40(%TP), %rdx movq %rax, 32(%ZP) movq %rdx, 40(%ZP) movq 48(%TP), %rax movq 56(%TP), %rdx movq %rax, 48(%ZP) movq %rdx, 56(%ZP) movq 64(%TP), %rax movq %rax, 64(%ZP) movl %CYl, %eax # use carry as return value addq $80, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc1_12.asm0000644023561000001540000003645012113421641012674 00000000000000# mp_limb_t mulredc1_12(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_12 TYPE(GSYM_PREFIX`'mulredc1_`'12,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_12: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 24(ZP) # Store T0 in z[4-1] movq 40(YP), %rax # Fetch y[j+1] = y[5] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 5 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 40(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 32(ZP) # Store T0 in z[5-1] movq 48(YP), %rax # Fetch y[j+1] = y[6] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 6 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 48(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 40(ZP) # Store T0 in z[6-1] movq 56(YP), %rax # Fetch y[j+1] = y[7] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 7 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 56(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 48(ZP) # Store T0 in z[7-1] movq 64(YP), %rax # Fetch y[j+1] = y[8] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 8 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 64(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 56(ZP) # Store T0 in z[8-1] movq 72(YP), %rax # Fetch y[j+1] = y[9] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 9 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 72(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 64(ZP) # Store T0 in z[9-1] movq 80(YP), %rax # Fetch y[j+1] = y[10] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 10 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 80(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 72(ZP) # Store T0 in z[10-1] movq 88(YP), %rax # Fetch y[j+1] = y[11] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 11. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 88(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 80(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 88(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc13.asm0000644023561000001540000010203112113421640012441 00000000000000# mp_limb_t mulredc13(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc13 TYPE(GSYM_PREFIX`'mulredc`'13,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc13: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $112, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 24(%TP) `#' Store T0 in tmp[4-1] movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 32(%TP) `#' Store T0 in tmp[5-1] movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 48(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 40(%TP) `#' Store T0 in tmp[6-1] movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 56(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 48(%TP) `#' Store T0 in tmp[7-1] movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 64(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 56(%TP) `#' Store T0 in tmp[8-1] movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 72(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 64(%TP) `#' Store T0 in tmp[9-1] movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 80(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 72(%TP) `#' Store T0 in tmp[10-1] movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 88(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 80(%TP) `#' Store T0 in tmp[11-1] movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 96(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 88(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 96(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 104(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 32(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 24(%TP) `#' Store T0 in tmp[4-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 48(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 40(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 32(%TP) `#' Store T0 in tmp[5-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 56(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 48(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 40(%TP) `#' Store T0 in tmp[6-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 64(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 56(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 48(%TP) `#' Store T0 in tmp[7-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 72(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 64(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 56(%TP) `#' Store T0 in tmp[8-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 80(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 72(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 64(%TP) `#' Store T0 in tmp[9-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 88(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 80(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 72(%TP) `#' Store T0 in tmp[10-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 96(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 88(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 80(%TP) `#' Store T0 in tmp[11-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12. Don't fetch new data from y[j+1]. movq 104(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 96(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 88(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 96(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 104(%TP) # Store CY in tmp[j+1] cmpq $13, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq 40(%TP), %rdx movq %rax, 32(%ZP) movq %rdx, 40(%ZP) movq 48(%TP), %rax movq 56(%TP), %rdx movq %rax, 48(%ZP) movq %rdx, 56(%ZP) movq 64(%TP), %rax movq 72(%TP), %rdx movq %rax, 64(%ZP) movq %rdx, 72(%ZP) movq 80(%TP), %rax movq 88(%TP), %rdx movq %rax, 80(%ZP) movq %rdx, 88(%ZP) movq 96(%TP), %rax movq %rax, 96(%ZP) movl %CYl, %eax # use carry as return value addq $112, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc12.asm0000644023561000001540000007537612113421640012465 00000000000000# mp_limb_t mulredc12(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc12 TYPE(GSYM_PREFIX`'mulredc`'12,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc12: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $104, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 24(%TP) `#' Store T0 in tmp[4-1] movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 32(%TP) `#' Store T0 in tmp[5-1] movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 48(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 40(%TP) `#' Store T0 in tmp[6-1] movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 56(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 48(%TP) `#' Store T0 in tmp[7-1] movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 64(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 56(%TP) `#' Store T0 in tmp[8-1] movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 72(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 64(%TP) `#' Store T0 in tmp[9-1] movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 80(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 72(%TP) `#' Store T0 in tmp[10-1] movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 88(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 80(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 88(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 96(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 32(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 24(%TP) `#' Store T0 in tmp[4-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 48(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 40(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 32(%TP) `#' Store T0 in tmp[5-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 56(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 48(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 40(%TP) `#' Store T0 in tmp[6-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 64(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 56(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 48(%TP) `#' Store T0 in tmp[7-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 72(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 64(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 56(%TP) `#' Store T0 in tmp[8-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 80(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 72(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 64(%TP) `#' Store T0 in tmp[9-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 88(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 80(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 72(%TP) `#' Store T0 in tmp[10-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11. Don't fetch new data from y[j+1]. movq 96(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 88(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 80(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 88(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 96(%TP) # Store CY in tmp[j+1] cmpq $12, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq 40(%TP), %rdx movq %rax, 32(%ZP) movq %rdx, 40(%ZP) movq 48(%TP), %rax movq 56(%TP), %rdx movq %rax, 48(%ZP) movq %rdx, 56(%ZP) movq 64(%TP), %rax movq 72(%TP), %rdx movq %rax, 64(%ZP) movq %rdx, 72(%ZP) movq 80(%TP), %rax movq 88(%TP), %rdx movq %rax, 80(%ZP) movq %rdx, 88(%ZP) movl %CYl, %eax # use carry as return value addq $104, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc1_19.asm0000644023561000001540000005703012113421641012700 00000000000000# mp_limb_t mulredc1_19(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_19 TYPE(GSYM_PREFIX`'mulredc1_`'19,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_19: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 24(ZP) # Store T0 in z[4-1] movq 40(YP), %rax # Fetch y[j+1] = y[5] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 5 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 40(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 32(ZP) # Store T0 in z[5-1] movq 48(YP), %rax # Fetch y[j+1] = y[6] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 6 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 48(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 40(ZP) # Store T0 in z[6-1] movq 56(YP), %rax # Fetch y[j+1] = y[7] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 7 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 56(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 48(ZP) # Store T0 in z[7-1] movq 64(YP), %rax # Fetch y[j+1] = y[8] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 8 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 64(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 56(ZP) # Store T0 in z[8-1] movq 72(YP), %rax # Fetch y[j+1] = y[9] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 9 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 72(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 64(ZP) # Store T0 in z[9-1] movq 80(YP), %rax # Fetch y[j+1] = y[10] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 10 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 80(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 72(ZP) # Store T0 in z[10-1] movq 88(YP), %rax # Fetch y[j+1] = y[11] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 11 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 88(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 80(ZP) # Store T0 in z[11-1] movq 96(YP), %rax # Fetch y[j+1] = y[12] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 12 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 96(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 88(ZP) # Store T0 in z[12-1] movq 104(YP), %rax # Fetch y[j+1] = y[13] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 13 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 104(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 96(ZP) # Store T0 in z[13-1] movq 112(YP), %rax # Fetch y[j+1] = y[14] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 14 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 112(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 104(ZP) # Store T0 in z[14-1] movq 120(YP), %rax # Fetch y[j+1] = y[15] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 15 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 120(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 112(ZP) # Store T0 in z[15-1] movq 128(YP), %rax # Fetch y[j+1] = y[16] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 16 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 128(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 120(ZP) # Store T0 in z[16-1] movq 136(YP), %rax # Fetch y[j+1] = y[17] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 17 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 136(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 128(ZP) # Store T0 in z[17-1] movq 144(YP), %rax # Fetch y[j+1] = y[18] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 18. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 144(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 136(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 144(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/Makefile.in0000644023561000001540000004314312113353770012225 00000000000000# Makefile.in generated by automake 1.11.3 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = x86_64 DIST_COMMON = README $(noinst_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = LTLIBRARIES = $(noinst_LTLIBRARIES) libmulredc_la_LIBADD = am__objects_1 = mulredc1.lo mulredc2.lo mulredc3.lo mulredc4.lo \ mulredc5.lo mulredc6.lo mulredc7.lo mulredc8.lo mulredc9.lo \ mulredc10.lo mulredc11.lo mulredc12.lo mulredc13.lo \ mulredc14.lo mulredc15.lo mulredc16.lo mulredc17.lo \ mulredc18.lo mulredc19.lo mulredc20.lo am__objects_2 = mulredc1_2.lo mulredc1_3.lo mulredc1_4.lo \ mulredc1_5.lo mulredc1_6.lo mulredc1_7.lo mulredc1_8.lo \ mulredc1_9.lo mulredc1_10.lo mulredc1_11.lo mulredc1_12.lo \ mulredc1_13.lo mulredc1_14.lo mulredc1_15.lo mulredc1_16.lo \ mulredc1_17.lo mulredc1_18.lo mulredc1_19.lo mulredc1_20.lo am_libmulredc_la_OBJECTS = $(am__objects_1) $(am__objects_2) libmulredc_la_OBJECTS = $(am_libmulredc_la_OBJECTS) DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) CCASCOMPILE = $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS) LTCCASCOMPILE = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=compile $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS) COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) CCLD = $(CC) LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ SOURCES = $(libmulredc_la_SOURCES) DIST_SOURCES = $(libmulredc_la_SOURCES) HEADERS = $(noinst_HEADERS) ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ ALLOCA = @ALLOCA@ AMTAR = @AMTAR@ AR = @AR@ ASMPATH = @ASMPATH@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCAS = @CCAS@ CCASDEPMODE = @CCASDEPMODE@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GMPLIB = @GMPLIB@ GREP = @GREP@ GSL_LD_FLAGS = @GSL_LD_FLAGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LD = @LD@ LDFLAGS = LIBOBJS = @LIBOBJS@ # The asm code does not depend on any libraries except libc for abort() # if assertions are enabled LIBS = LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ M4 = @M4@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ POW_LIB = @POW_LIB@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VALGRIND = @VALGRIND@ VERSION = @VERSION@ XSLDIR = @XSLDIR@ XSLTPROC = @XSLTPROC@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ MULREDC = mulredc1.asm mulredc2.asm mulredc3.asm mulredc4.asm mulredc5.asm mulredc6.asm \ mulredc7.asm mulredc8.asm mulredc9.asm mulredc10.asm mulredc11.asm \ mulredc12.asm mulredc13.asm mulredc14.asm mulredc15.asm mulredc16.asm \ mulredc17.asm mulredc18.asm mulredc19.asm mulredc20.asm MULREDC1 = mulredc1_2.asm mulredc1_3.asm mulredc1_4.asm mulredc1_5.asm \ mulredc1_6.asm mulredc1_7.asm mulredc1_8.asm mulredc1_9.asm mulredc1_10.asm \ mulredc1_11.asm mulredc1_12.asm mulredc1_13.asm mulredc1_14.asm \ mulredc1_15.asm mulredc1_16.asm mulredc1_17.asm mulredc1_18.asm \ mulredc1_19.asm mulredc1_20.asm EXTRA_DIST = autogen.py generate_all mulredc.m4 mulredc1.m4 noinst_LTLIBRARIES = libmulredc.la noinst_HEADERS = mulredc.h # This library definition also causes the mulredc[n].asm, mulredc1_[n].asm, # and redc.asm files to go in the distribution - no need for having # them in EXTRA_DIST # redc.asm is removed, is slower than GMP and not ported to Win64 ABI libmulredc_la_SOURCES = $(MULREDC) $(MULREDC1) # It's actually the .s files that depend on config.m4, but automake # knows them only as intermediate files, not as targets. Adding the # dependency to libmulredc.la should work so long as no stale .s # files exist. libmulredc_la_DEPENDENCIES = $(top_builddir)/config.m4 all: all-am .SUFFIXES: .SUFFIXES: .asm .lo .o .obj .s $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu x86_64/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu x86_64/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-noinstLTLIBRARIES: -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ test "$$dir" != "$$p" || dir=.; \ echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done libmulredc.la: $(libmulredc_la_OBJECTS) $(libmulredc_la_DEPENDENCIES) $(EXTRA_libmulredc_la_DEPENDENCIES) $(LINK) $(libmulredc_la_OBJECTS) $(libmulredc_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c .s.o: $(CCASCOMPILE) -c -o $@ $< .s.obj: $(CCASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .s.lo: $(LTCCASCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ mkid -fID $$unique tags: TAGS TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) set x; \ here=`pwd`; \ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: CTAGS CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(LTLIBRARIES) $(HEADERS) installdirs: install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: .MAKE: install-am install-strip .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ clean-libtool clean-noinstLTLIBRARIES ctags distclean \ distclean-compile distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ pdf pdf-am ps ps-am tags uninstall uninstall-am # There has to be a way of making this automatically mulredc2.asm: mulredc.m4 $(M4) -DLENGTH=2 $< > $@ mulredc3.asm: mulredc.m4 $(M4) -DLENGTH=3 $< > $@ mulredc4.asm: mulredc.m4 $(M4) -DLENGTH=4 $< > $@ mulredc5.asm: mulredc.m4 $(M4) -DLENGTH=5 $< > $@ mulredc6.asm: mulredc.m4 $(M4) -DLENGTH=6 $< > $@ mulredc7.asm: mulredc.m4 $(M4) -DLENGTH=7 $< > $@ mulredc8.asm: mulredc.m4 $(M4) -DLENGTH=8 $< > $@ mulredc9.asm: mulredc.m4 $(M4) -DLENGTH=9 $< > $@ mulredc10.asm: mulredc.m4 $(M4) -DLENGTH=10 $< > $@ mulredc11.asm: mulredc.m4 $(M4) -DLENGTH=11 $< > $@ mulredc12.asm: mulredc.m4 $(M4) -DLENGTH=12 $< > $@ mulredc13.asm: mulredc.m4 $(M4) -DLENGTH=13 $< > $@ mulredc14.asm: mulredc.m4 $(M4) -DLENGTH=14 $< > $@ mulredc15.asm: mulredc.m4 $(M4) -DLENGTH=15 $< > $@ mulredc16.asm: mulredc.m4 $(M4) -DLENGTH=16 $< > $@ mulredc17.asm: mulredc.m4 $(M4) -DLENGTH=17 $< > $@ mulredc18.asm: mulredc.m4 $(M4) -DLENGTH=18 $< > $@ mulredc19.asm: mulredc.m4 $(M4) -DLENGTH=19 $< > $@ mulredc20.asm: mulredc.m4 $(M4) -DLENGTH=20 $< > $@ mulredc1_2.asm: mulredc1.m4 $(M4) -DLENGTH=2 $< > $@ mulredc1_3.asm: mulredc1.m4 $(M4) -DLENGTH=3 $< > $@ mulredc1_4.asm: mulredc1.m4 $(M4) -DLENGTH=4 $< > $@ mulredc1_5.asm: mulredc1.m4 $(M4) -DLENGTH=5 $< > $@ mulredc1_6.asm: mulredc1.m4 $(M4) -DLENGTH=6 $< > $@ mulredc1_7.asm: mulredc1.m4 $(M4) -DLENGTH=7 $< > $@ mulredc1_8.asm: mulredc1.m4 $(M4) -DLENGTH=8 $< > $@ mulredc1_9.asm: mulredc1.m4 $(M4) -DLENGTH=9 $< > $@ mulredc1_10.asm: mulredc1.m4 $(M4) -DLENGTH=10 $< > $@ mulredc1_11.asm: mulredc1.m4 $(M4) -DLENGTH=11 $< > $@ mulredc1_12.asm: mulredc1.m4 $(M4) -DLENGTH=12 $< > $@ mulredc1_13.asm: mulredc1.m4 $(M4) -DLENGTH=13 $< > $@ mulredc1_14.asm: mulredc1.m4 $(M4) -DLENGTH=14 $< > $@ mulredc1_15.asm: mulredc1.m4 $(M4) -DLENGTH=15 $< > $@ mulredc1_16.asm: mulredc1.m4 $(M4) -DLENGTH=16 $< > $@ mulredc1_17.asm: mulredc1.m4 $(M4) -DLENGTH=17 $< > $@ mulredc1_18.asm: mulredc1.m4 $(M4) -DLENGTH=18 $< > $@ mulredc1_19.asm: mulredc1.m4 $(M4) -DLENGTH=19 $< > $@ mulredc1_20.asm: mulredc1.m4 $(M4) -DLENGTH=20 $< > $@ .asm.s: $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.s # Nothing here needs the C preprocessor, and including this rule causes # "make" to build .S, then .s files which fails on case-insensitive # filesystems #.asm.S: # $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.S # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: ecm-6.4.4/x86_64/mulredc1_17.asm0000644023561000001540000005225612113421641012703 00000000000000# mp_limb_t mulredc1_17(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_17 TYPE(GSYM_PREFIX`'mulredc1_`'17,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_17: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 24(ZP) # Store T0 in z[4-1] movq 40(YP), %rax # Fetch y[j+1] = y[5] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 5 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 40(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 32(ZP) # Store T0 in z[5-1] movq 48(YP), %rax # Fetch y[j+1] = y[6] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 6 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 48(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 40(ZP) # Store T0 in z[6-1] movq 56(YP), %rax # Fetch y[j+1] = y[7] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 7 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 56(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 48(ZP) # Store T0 in z[7-1] movq 64(YP), %rax # Fetch y[j+1] = y[8] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 8 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 64(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 56(ZP) # Store T0 in z[8-1] movq 72(YP), %rax # Fetch y[j+1] = y[9] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 9 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 72(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 64(ZP) # Store T0 in z[9-1] movq 80(YP), %rax # Fetch y[j+1] = y[10] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 10 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 80(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 72(ZP) # Store T0 in z[10-1] movq 88(YP), %rax # Fetch y[j+1] = y[11] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 11 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 88(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 80(ZP) # Store T0 in z[11-1] movq 96(YP), %rax # Fetch y[j+1] = y[12] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 12 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 96(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 88(ZP) # Store T0 in z[12-1] movq 104(YP), %rax # Fetch y[j+1] = y[13] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 13 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 104(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 96(ZP) # Store T0 in z[13-1] movq 112(YP), %rax # Fetch y[j+1] = y[14] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 14 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 112(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 104(ZP) # Store T0 in z[14-1] movq 120(YP), %rax # Fetch y[j+1] = y[15] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 15 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 120(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 112(ZP) # Store T0 in z[15-1] movq 128(YP), %rax # Fetch y[j+1] = y[16] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 16. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 128(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 120(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 128(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc16.asm0000644023561000001540000011757412113421640012466 00000000000000# mp_limb_t mulredc16(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc16 TYPE(GSYM_PREFIX`'mulredc`'16,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc16: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $136, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 24(%TP) `#' Store T0 in tmp[4-1] movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 32(%TP) `#' Store T0 in tmp[5-1] movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 48(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 40(%TP) `#' Store T0 in tmp[6-1] movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 56(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 48(%TP) `#' Store T0 in tmp[7-1] movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 64(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 56(%TP) `#' Store T0 in tmp[8-1] movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 72(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 64(%TP) `#' Store T0 in tmp[9-1] movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 80(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 72(%TP) `#' Store T0 in tmp[10-1] movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 88(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 80(%TP) `#' Store T0 in tmp[11-1] movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 96(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 88(%TP) `#' Store T0 in tmp[12-1] movq 104(%YP), %rax `#' Fetch y[j+1] = y[13] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 13 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 104(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 96(%TP) `#' Store T0 in tmp[13-1] movq 112(%YP), %rax `#' Fetch y[j+1] = y[14] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 14 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 112(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 104(%TP) `#' Store T0 in tmp[14-1] movq 120(%YP), %rax `#' Fetch y[j+1] = y[15] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 15. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 120(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 112(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 120(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 128(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 32(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 24(%TP) `#' Store T0 in tmp[4-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 48(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 40(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 32(%TP) `#' Store T0 in tmp[5-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 56(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 48(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 40(%TP) `#' Store T0 in tmp[6-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 64(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 56(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 48(%TP) `#' Store T0 in tmp[7-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 72(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 64(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 56(%TP) `#' Store T0 in tmp[8-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 80(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 72(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 64(%TP) `#' Store T0 in tmp[9-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 88(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 80(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 72(%TP) `#' Store T0 in tmp[10-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 96(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 88(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 80(%TP) `#' Store T0 in tmp[11-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 104(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 96(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 104(%YP), %rax `#' Fetch y[j+1] = y[13] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 88(%TP) `#' Store T0 in tmp[12-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 13 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 112(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 104(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 112(%YP), %rax `#' Fetch y[j+1] = y[14] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 96(%TP) `#' Store T0 in tmp[13-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 14 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 120(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 112(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 120(%YP), %rax `#' Fetch y[j+1] = y[15] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 104(%TP) `#' Store T0 in tmp[14-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 15. Don't fetch new data from y[j+1]. movq 128(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 120(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 112(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 120(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 128(%TP) # Store CY in tmp[j+1] cmpq $16, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq 40(%TP), %rdx movq %rax, 32(%ZP) movq %rdx, 40(%ZP) movq 48(%TP), %rax movq 56(%TP), %rdx movq %rax, 48(%ZP) movq %rdx, 56(%ZP) movq 64(%TP), %rax movq 72(%TP), %rdx movq %rax, 64(%ZP) movq %rdx, 72(%ZP) movq 80(%TP), %rax movq 88(%TP), %rdx movq %rax, 80(%ZP) movq %rdx, 88(%ZP) movq 96(%TP), %rax movq 104(%TP), %rdx movq %rax, 96(%ZP) movq %rdx, 104(%ZP) movq 112(%TP), %rax movq 120(%TP), %rdx movq %rax, 112(%ZP) movq %rdx, 120(%ZP) movl %CYl, %eax # use carry as return value addq $136, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc1.asm0000644023561000001540000000322612106741272012374 00000000000000# # mp_limb_t mulredc1(mp_limb_t * z, const mp_limb_t x, const mp_limb_t y, # const mp_limb_t m, mp_limb_t inv_m) # # Compute z := x*y mod m, in Montgomery representation, where x, y < m # and m is n limb wide. inv_m is the less significant limb of the # inverse of m modulo 2^(n*GMP_LIMB_BITS) # # The result might be unreduced (larger than m) but becomes reduced # after subtracting m. The calling function should take care of that. # # We use a temporary space for unreduced product on the stack. # Therefore, this can not be used for large integers (anyway, the # algorithm is quadratic). # # WARNING: z is only n limbs but since it might be unreduced, there # could be a carry that does not fit in z. This carry is returned. include(`config.m4') TEXT GLOBL GSYM_PREFIX`'mulredc1 TYPE(GSYM_PREFIX`'mulredc1,`function') ifdef(`WINDOWS64_ABI', # stack: inv_m, %r9: m, %r8: y, %rdx: x, %rcx: *z `define(`INV_M', `0x28(%rsp)') define(`M', `%r9') define(`Y', `%r8') define(`X', `%rdx') define(`Z', `%rcx') define(`TMP2', `%r10') define(`TMP1', `%r8')', # %r8: inv_m, %rcx: m, %rdx: y, %rsi : x, %rdi : *z `define(`INV_M', `%r8') define(`M', `%rcx') define(`Y', `%rdx') define(`X', `%rsi') define(`Z', `%rdi') define(`TMP2', `%r10') define(`TMP1', `%r9')') GSYM_PREFIX`'mulredc1: movq Y, %rax mulq X movq %rdx, TMP2 movq %rax, TMP1 # store xy in [r9:r10] mulq INV_M # compute u mulq M # compute u*m addq TMP1, %rax # rax is 0, now (carry is important) ifdef(`WANT_ASSERT', ` jz 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx call abort@plt LABEL_SUFFIX(1)') adcq TMP2, %rdx movq %rdx, (Z) adcq $0, %rax ret ecm-6.4.4/x86_64/mulredc1_5.asm0000644023561000001540000001612612113421641012614 00000000000000# mp_limb_t mulredc1_5(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_5 TYPE(GSYM_PREFIX`'mulredc1_`'5,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_5: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 24(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 32(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc15.asm0000644023561000001540000011313312113421640012450 00000000000000# mp_limb_t mulredc15(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc15 TYPE(GSYM_PREFIX`'mulredc`'15,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc15: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $128, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 24(%TP) `#' Store T0 in tmp[4-1] movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 32(%TP) `#' Store T0 in tmp[5-1] movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 48(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 40(%TP) `#' Store T0 in tmp[6-1] movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 56(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 48(%TP) `#' Store T0 in tmp[7-1] movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 64(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 56(%TP) `#' Store T0 in tmp[8-1] movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 72(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 64(%TP) `#' Store T0 in tmp[9-1] movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 80(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 72(%TP) `#' Store T0 in tmp[10-1] movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 88(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 80(%TP) `#' Store T0 in tmp[11-1] movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 96(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 88(%TP) `#' Store T0 in tmp[12-1] movq 104(%YP), %rax `#' Fetch y[j+1] = y[13] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 13 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 104(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 96(%TP) `#' Store T0 in tmp[13-1] movq 112(%YP), %rax `#' Fetch y[j+1] = y[14] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 14. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 112(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 104(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 112(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 120(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 32(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 24(%TP) `#' Store T0 in tmp[4-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 48(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 40(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 32(%TP) `#' Store T0 in tmp[5-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 56(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 48(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 40(%TP) `#' Store T0 in tmp[6-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 64(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 56(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 48(%TP) `#' Store T0 in tmp[7-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 72(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 64(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 56(%TP) `#' Store T0 in tmp[8-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 80(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 72(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 64(%TP) `#' Store T0 in tmp[9-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 88(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 80(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 88(%YP), %rax `#' Fetch y[j+1] = y[11] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 72(%TP) `#' Store T0 in tmp[10-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 11 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 96(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 88(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 96(%YP), %rax `#' Fetch y[j+1] = y[12] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 80(%TP) `#' Store T0 in tmp[11-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 12 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 104(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 96(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 104(%YP), %rax `#' Fetch y[j+1] = y[13] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 88(%TP) `#' Store T0 in tmp[12-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 13 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 112(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 104(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 112(%YP), %rax `#' Fetch y[j+1] = y[14] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 96(%TP) `#' Store T0 in tmp[13-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 14. Don't fetch new data from y[j+1]. movq 120(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 112(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 104(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 112(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 120(%TP) # Store CY in tmp[j+1] cmpq $15, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq 40(%TP), %rdx movq %rax, 32(%ZP) movq %rdx, 40(%ZP) movq 48(%TP), %rax movq 56(%TP), %rdx movq %rax, 48(%ZP) movq %rdx, 56(%ZP) movq 64(%TP), %rax movq 72(%TP), %rdx movq %rax, 64(%ZP) movq %rdx, 72(%ZP) movq 80(%TP), %rax movq 88(%TP), %rdx movq %rax, 80(%ZP) movq %rdx, 88(%ZP) movq 96(%TP), %rax movq 104(%TP), %rdx movq %rax, 96(%ZP) movq %rdx, 104(%ZP) movq 112(%TP), %rax movq %rax, 112(%ZP) movl %CYl, %eax # use carry as return value addq $128, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc1_6.asm0000644023561000001540000002040512113421641012610 00000000000000# mp_limb_t mulredc1_6(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_6 TYPE(GSYM_PREFIX`'mulredc1_`'6,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_6: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 24(ZP) # Store T0 in z[4-1] movq 40(YP), %rax # Fetch y[j+1] = y[5] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 5. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 40(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 32(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 40(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc1_7.asm0000644023561000001540000002266412113421641012622 00000000000000# mp_limb_t mulredc1_7(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_7 TYPE(GSYM_PREFIX`'mulredc1_`'7,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_7: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 0(ZP) # Store T0 in z[1-1] movq 16(YP), %rax # Fetch y[j+1] = y[2] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 2 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 16(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 8(ZP) # Store T0 in z[2-1] movq 24(YP), %rax # Fetch y[j+1] = y[3] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 3 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 24(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 16(ZP) # Store T0 in z[3-1] movq 32(YP), %rax # Fetch y[j+1] = y[4] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 4 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 32(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 24(ZP) # Store T0 in z[4-1] movq 40(YP), %rax # Fetch y[j+1] = y[5] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 5 # Register values at entry: # %rax = y[j], X = x, U = u # T0 = value to store in tmp[j], T1 undefined # CY = carry into T1 (is <= 2) # We have CY:T1 <= 2 * 2^64 - 2 movq CY, T1 # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq X # y[j] * x # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, T0 # Add low word to T0 movq 40(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 1f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt 1: ',`') mulq U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq T0, %rax # Add T0 and low word movq %rax, 32(ZP) # Store T0 in z[5-1] movq 48(YP), %rax # Fetch y[j+1] = y[6] into %rax adcq %rdx, T1 # Add high word with carry to T1 setc CYb # CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 6. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 48(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 40(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 48(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/mulredc10.asm0000644023561000001540000006431612113421640012453 00000000000000# mp_limb_t mulredc10(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc10 TYPE(GSYM_PREFIX`'mulredc`'10,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc10: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $88, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 24(%TP) `#' Store T0 in tmp[4-1] movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 32(%TP) `#' Store T0 in tmp[5-1] movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 48(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 40(%TP) `#' Store T0 in tmp[6-1] movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 56(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 48(%TP) `#' Store T0 in tmp[7-1] movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 64(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 56(%TP) `#' Store T0 in tmp[8-1] movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 72(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 64(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 72(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 80(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 32(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 24(%TP) `#' Store T0 in tmp[4-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 48(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 40(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 32(%TP) `#' Store T0 in tmp[5-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 56(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 48(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 40(%TP) `#' Store T0 in tmp[6-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 64(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 56(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 48(%TP) `#' Store T0 in tmp[7-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 72(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 64(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 56(%TP) `#' Store T0 in tmp[8-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9. Don't fetch new data from y[j+1]. movq 80(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 72(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 64(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 72(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 80(%TP) # Store CY in tmp[j+1] cmpq $10, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq 40(%TP), %rdx movq %rax, 32(%ZP) movq %rdx, 40(%ZP) movq 48(%TP), %rax movq 56(%TP), %rdx movq %rax, 48(%ZP) movq %rdx, 56(%ZP) movq 64(%TP), %rax movq 72(%TP), %rdx movq %rax, 64(%ZP) movq %rdx, 72(%ZP) movl %CYl, %eax # use carry as return value addq $88, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc1_2.asm0000644023561000001540000000711112113421641012603 00000000000000# mp_limb_t mulredc1_2(mp_limb_t * z, const mp_limb_t x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored include(`config.m4') ifdef(`WINDOWS64_ABI', `define(`Y_PARAM', `%r8')dnl define(`INVM_PARAM',`72(%rsp)')dnl' , `define(`Y_PARAM', `%rdx')dnl define(`INVM_PARAM',`%r8')dnl' )dnl TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc1_2 TYPE(GSYM_PREFIX`'mulredc1_`'2,`function') # Implements multiplication and REDC for one input numbers of LENGTH words # and a multiplier of one word ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # YP = y, MP = m, # X = x, T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `%rsi')dnl define(`T1', `%rbx')dnl define(`CY', `%rcx')dnl define(`CYl', `%ecx')dnl define(`CYb', `%cl')dnl define(`X', `%r14')dnl # register that holds x value define(`U', `%r11')dnl define(`YP', `%r9')dnl # register that points to the y array define(`MP', `%r10')dnl # register that points to the m array define(`ZP', `%rdi')dnl # register that holds z `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `X' = X, `U' = U `#' `YP' = YP, `MP' = MP GSYM_PREFIX`'mulredc1_2: ######################################################################### # i = 0 pass ######################################################################### `#' register values at loop entry: YP = y, MP = m # We need to compute u movq (Y_PARAM), %rax # rax = y[0] (time critical, do first) pushq %rbx pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi movq %r9, MP # store m in MP movq Y_PARAM, YP movq %rcx, ZP movq %rdx, X' , ` movq Y_PARAM, YP movq %rcx, MP movq %rsi, X # store x in X # ZP is same as passed in' ) xorl CYl, CYl # set %CY to 0 mulq X # rdx:rax = y[0] * x movq %rax, T0 # Move low word of product to T0 movq %rdx, T1 # Move high word of product to T1 imulq INVM_PARAM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, U # this is the new u value mulq (MP) # multipy u*m[0] addq %rax, T0 # Now %T0 = 0, need not be stored movq 8(YP), %rax # Fetch y[1] adcq %rdx, T1 # setc CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq T0, T0 jz assert1 lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt assert1: popf ') define(`TT', defn(`T0'))dnl define(`T0', defn(`T1'))dnl define(`T1', defn(`TT'))dnl undefine(`TT')dnl `#' Now `T0' = T0, `T1' = T1 # Pass for j = 1. Don't fetch new data from y[j+1]. movq CY, T1 # T1 = CY <= 1 mulq X # y[j] * x[i] addq %rax, T0 # Add low word to T0 movq 8(MP), %rax # Fetch m[j] into %rax adcq %rdx, T1 # Add high word with carry to T1 mulq U # m[j]*u addq %rax, T0 # Add low word to T0 movq T0, 0(ZP) # Store T0 in z[j-1] adcq %rdx, T1 # Add high word with carry to T1 movq T1, 8(ZP) # Store T1 in tmp[j] setc CYb # %CY <= 1 movq CY, %rax # use carry as return value ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %rbx ret ecm-6.4.4/x86_64/Makefile.am0000644023561000001540000000745512106741272012222 00000000000000MULREDC = mulredc1.asm mulredc2.asm mulredc3.asm mulredc4.asm mulredc5.asm mulredc6.asm \ mulredc7.asm mulredc8.asm mulredc9.asm mulredc10.asm mulredc11.asm \ mulredc12.asm mulredc13.asm mulredc14.asm mulredc15.asm mulredc16.asm \ mulredc17.asm mulredc18.asm mulredc19.asm mulredc20.asm MULREDC1 = mulredc1_2.asm mulredc1_3.asm mulredc1_4.asm mulredc1_5.asm \ mulredc1_6.asm mulredc1_7.asm mulredc1_8.asm mulredc1_9.asm mulredc1_10.asm \ mulredc1_11.asm mulredc1_12.asm mulredc1_13.asm mulredc1_14.asm \ mulredc1_15.asm mulredc1_16.asm mulredc1_17.asm mulredc1_18.asm \ mulredc1_19.asm mulredc1_20.asm EXTRA_DIST = autogen.py generate_all mulredc.m4 mulredc1.m4 noinst_LTLIBRARIES = libmulredc.la noinst_HEADERS = mulredc.h # This library definition also causes the mulredc[n].asm, mulredc1_[n].asm, # and redc.asm files to go in the distribution - no need for having # them in EXTRA_DIST # redc.asm is removed, is slower than GMP and not ported to Win64 ABI libmulredc_la_SOURCES = $(MULREDC) $(MULREDC1) # It's actually the .s files that depend on config.m4, but automake # knows them only as intermediate files, not as targets. Adding the # dependency to libmulredc.la should work so long as no stale .s # files exist. libmulredc_la_DEPENDENCIES = $(top_builddir)/config.m4 # The asm code does not depend on any libraries except libc for abort() # if assertions are enabled LIBS = LDFLAGS = # There has to be a way of making this automatically mulredc2.asm: mulredc.m4 $(M4) -DLENGTH=2 $< > $@ mulredc3.asm: mulredc.m4 $(M4) -DLENGTH=3 $< > $@ mulredc4.asm: mulredc.m4 $(M4) -DLENGTH=4 $< > $@ mulredc5.asm: mulredc.m4 $(M4) -DLENGTH=5 $< > $@ mulredc6.asm: mulredc.m4 $(M4) -DLENGTH=6 $< > $@ mulredc7.asm: mulredc.m4 $(M4) -DLENGTH=7 $< > $@ mulredc8.asm: mulredc.m4 $(M4) -DLENGTH=8 $< > $@ mulredc9.asm: mulredc.m4 $(M4) -DLENGTH=9 $< > $@ mulredc10.asm: mulredc.m4 $(M4) -DLENGTH=10 $< > $@ mulredc11.asm: mulredc.m4 $(M4) -DLENGTH=11 $< > $@ mulredc12.asm: mulredc.m4 $(M4) -DLENGTH=12 $< > $@ mulredc13.asm: mulredc.m4 $(M4) -DLENGTH=13 $< > $@ mulredc14.asm: mulredc.m4 $(M4) -DLENGTH=14 $< > $@ mulredc15.asm: mulredc.m4 $(M4) -DLENGTH=15 $< > $@ mulredc16.asm: mulredc.m4 $(M4) -DLENGTH=16 $< > $@ mulredc17.asm: mulredc.m4 $(M4) -DLENGTH=17 $< > $@ mulredc18.asm: mulredc.m4 $(M4) -DLENGTH=18 $< > $@ mulredc19.asm: mulredc.m4 $(M4) -DLENGTH=19 $< > $@ mulredc20.asm: mulredc.m4 $(M4) -DLENGTH=20 $< > $@ mulredc1_2.asm: mulredc1.m4 $(M4) -DLENGTH=2 $< > $@ mulredc1_3.asm: mulredc1.m4 $(M4) -DLENGTH=3 $< > $@ mulredc1_4.asm: mulredc1.m4 $(M4) -DLENGTH=4 $< > $@ mulredc1_5.asm: mulredc1.m4 $(M4) -DLENGTH=5 $< > $@ mulredc1_6.asm: mulredc1.m4 $(M4) -DLENGTH=6 $< > $@ mulredc1_7.asm: mulredc1.m4 $(M4) -DLENGTH=7 $< > $@ mulredc1_8.asm: mulredc1.m4 $(M4) -DLENGTH=8 $< > $@ mulredc1_9.asm: mulredc1.m4 $(M4) -DLENGTH=9 $< > $@ mulredc1_10.asm: mulredc1.m4 $(M4) -DLENGTH=10 $< > $@ mulredc1_11.asm: mulredc1.m4 $(M4) -DLENGTH=11 $< > $@ mulredc1_12.asm: mulredc1.m4 $(M4) -DLENGTH=12 $< > $@ mulredc1_13.asm: mulredc1.m4 $(M4) -DLENGTH=13 $< > $@ mulredc1_14.asm: mulredc1.m4 $(M4) -DLENGTH=14 $< > $@ mulredc1_15.asm: mulredc1.m4 $(M4) -DLENGTH=15 $< > $@ mulredc1_16.asm: mulredc1.m4 $(M4) -DLENGTH=16 $< > $@ mulredc1_17.asm: mulredc1.m4 $(M4) -DLENGTH=17 $< > $@ mulredc1_18.asm: mulredc1.m4 $(M4) -DLENGTH=18 $< > $@ mulredc1_19.asm: mulredc1.m4 $(M4) -DLENGTH=19 $< > $@ mulredc1_20.asm: mulredc1.m4 $(M4) -DLENGTH=20 $< > $@ .asm.s: $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.s # Nothing here needs the C preprocessor, and including this rule causes # "make" to build .S, then .s files which fails on case-insensitive # filesystems #.asm.S: # $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.S ecm-6.4.4/x86_64/mulredc4.asm0000644023561000001540000003113512113421640012367 00000000000000# mp_limb_t mulredc4(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc4 TYPE(GSYM_PREFIX`'mulredc`'4,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc4: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $40, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 16(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 24(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 32(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3. Don't fetch new data from y[j+1]. movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 16(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 24(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 32(%TP) # Store CY in tmp[j+1] cmpq $4, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movl %CYl, %eax # use carry as return value addq $40, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc8.asm0000644023561000001540000005324512113421640012401 00000000000000# mp_limb_t mulredc8(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc8 TYPE(GSYM_PREFIX`'mulredc`'8,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc8: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $72, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 24(%TP) `#' Store T0 in tmp[4-1] movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 32(%TP) `#' Store T0 in tmp[5-1] movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 48(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 40(%TP) `#' Store T0 in tmp[6-1] movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 56(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 48(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 56(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 64(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 32(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 24(%TP) `#' Store T0 in tmp[4-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 48(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 40(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 32(%TP) `#' Store T0 in tmp[5-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 56(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 48(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 40(%TP) `#' Store T0 in tmp[6-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7. Don't fetch new data from y[j+1]. movq 64(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 56(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 48(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 56(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 64(%TP) # Store CY in tmp[j+1] cmpq $8, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq 40(%TP), %rdx movq %rax, 32(%ZP) movq %rdx, 40(%ZP) movq 48(%TP), %rax movq 56(%TP), %rdx movq %rax, 48(%ZP) movq %rdx, 56(%ZP) movl %CYl, %eax # use carry as return value addq $72, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/README0000644023561000001540000000146512106741272011041 00000000000000mulredc[1..20].asm are size-specific asm functions for mulredc. These are generated by the Python autogen.py script (old version, still used for sizes 1 and 2) and the m4 script mulredc.m4 (all other sizes). In order to avoid dependency on the Python and m4 packages, this generation is not done automatically with the autoconf/automake stuff. If you need to regenerate them, the syntax is ./autogen.py 1 > mulredc1.asm ./autogen.py 2 > mulredc2.asm m4 -DLENGTH=3 mulredc.m4 > mulredc3.asm m4 -DLENGTH=4 mulredc.m4 > mulredc4.asm etc., up to LENGTH=20. If you have problems, you should reconfigure with the --disable-asm-redc option. redc.asm is a version of redc separated from the multiplication, since there are cases where it is needed. test_mulredc.c, bench.c and the Makefile.dev are for development.ecm-6.4.4/x86_64/mulredc11.asm0000644023561000001540000007074412113421640012456 00000000000000# mp_limb_t mulredc11(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc11 TYPE(GSYM_PREFIX`'mulredc`'11,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc11: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $96, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 24(%TP) `#' Store T0 in tmp[4-1] movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 32(%TP) `#' Store T0 in tmp[5-1] movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 48(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 40(%TP) `#' Store T0 in tmp[6-1] movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 56(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 48(%TP) `#' Store T0 in tmp[7-1] movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 64(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 56(%TP) `#' Store T0 in tmp[8-1] movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 72(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 64(%TP) `#' Store T0 in tmp[9-1] movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 80(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 72(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 80(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 88(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 32(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 24(%TP) `#' Store T0 in tmp[4-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 48(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 40(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 32(%TP) `#' Store T0 in tmp[5-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 56(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 48(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 56(%YP), %rax `#' Fetch y[j+1] = y[7] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 40(%TP) `#' Store T0 in tmp[6-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 7 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 64(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 56(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 64(%YP), %rax `#' Fetch y[j+1] = y[8] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 48(%TP) `#' Store T0 in tmp[7-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 8 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 72(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 64(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 72(%YP), %rax `#' Fetch y[j+1] = y[9] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 56(%TP) `#' Store T0 in tmp[8-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 9 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 80(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 72(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 80(%YP), %rax `#' Fetch y[j+1] = y[10] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 64(%TP) `#' Store T0 in tmp[9-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 10. Don't fetch new data from y[j+1]. movq 88(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 80(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 72(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 80(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 88(%TP) # Store CY in tmp[j+1] cmpq $11, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq 40(%TP), %rdx movq %rax, 32(%ZP) movq %rdx, 40(%ZP) movq 48(%TP), %rax movq 56(%TP), %rdx movq %rax, 48(%ZP) movq %rdx, 56(%ZP) movq 64(%TP), %rax movq 72(%TP), %rdx movq %rax, 64(%ZP) movq %rdx, 72(%ZP) movq 80(%TP), %rax movq %rax, 80(%ZP) movl %CYl, %eax # use carry as return value addq $96, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/x86_64/mulredc7.asm0000644023561000001540000004662312113421640012402 00000000000000# mp_limb_t mulredc7(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, # const mp_limb_t *m, mp_limb_t inv_m); # # Linux: z: %rdi, x: %rsi, y: %rdx, m: %rcx, inv_m: %r8 # Needs %rbx, %rsp, %rbp, %r12-%r15 restored # Windows: z: %rcx, x: %rdx, y: %r8, m: %r9, inv_m: 28(%rsp) # Needs %rbx, %rbp, %rdi, %rsi, %r12...%15 restored # This stuff is run through M4 twice, first when generating the # mulredc*.asm files from the mulredc.m4 file (when preparing the distro) # and again when generating the mulredc*.s files from the mulredc*.asm files # when the user compiles the program. # We used to substitute XP etc. by register names in the first pass, # but now with switching between Linux and Windows ABI, we do it in # the second pass instead when we know which ABI we have, as that # allows us to assign registers differently for the two ABIs. # That means that the defines for XP etc., need to be quoted once to be # protected in the first M4 pass, so that they are processed and # occurrences of XP etc. happen only in the second pass. include(`config.m4') TEXT .align 64 # Opteron L1 code cache line is 64 bytes long GLOBL GSYM_PREFIX`'mulredc7 TYPE(GSYM_PREFIX`'mulredc`'7,`function') # Implements multiplication and REDC for two input numbers of LENGTH words ifdef(`WINDOWS64_ABI', `# Uses Windows ABI', `# Uses Linux ABI') # tmp[0 ... len+1] = 0 # for (i = 0; i < len; i++) # { # t = x[i] * y[0]; /* Keep and reuse this product */ # u = ((t + tmp[0]) * invm) % 2^64 # tmp[0] += (t + m[0]*u) / 2^64; /* put carry in cy. */ # for (j = 1; j < len; j++) # { # tmp[j-1 ... j] += x[i]*y[j] + m[j]*u + (cy << BITS_PER_WORD); # /* put new carry in cy */ # } # tmp[len] = cy; # } # z[0 ... len-1] = tmp[0 ... len-1] # return (tmp[len]) # Values that are referenced only once in the loop over j go into r8 .. r14, # In the inner loop (over j), tmp, x[i], y, m, and u are constant. # tmp[j], tmp[j+1], tmp[j+2] are updated frequently. These 8 values # stay in registers and are referenced as # TP = tmp, YP = y, MP = m, # XI = x[i], T0 = tmp[j], T1 = tmp[j+1], CY = carry define(`T0', `rsi')dnl define(`T0l', `esi')dnl define(`T1', `rbx')dnl define(`T1l', `ebx')dnl define(`CY', `rcx')dnl define(`CYl', `ecx')dnl define(`CYb', `cl')dnl define(`XI', `r14')dnl # register that holds x[i] value define(`U', `r11')dnl define(`XP', `r13')dnl # register that points to the x arraz define(`TP', `rbp')dnl # register that points to t + i define(`I', `r12')dnl # register that holds loop counter i define(`Il', `r12d')dnl # register that holds loop counter i define(`ZP', `rdi')dnl # register that holds z. Same as passed in ifdef(`WINDOWS64_ABI', `define(`YP', `r8')dnl # points to y array, same as passed in define(`MP', `r9')dnl # points to m array, same as passed in define(`INVM', `r10')dnl # register that holds invm. Same as passed in' , `define(`YP', `r9')dnl # register that points to the y array define(`MP', `r10')dnl # register that points to the m array define(`INVM', `r8')dnl # register that holds invm. Same as passed in' )dnl `#' Register vars: `T0' = T0, `T1' = T1, `CY' = CY, `XI' = XI, `U' = U `#' `YP' = YP, `MP' = MP, `TP' = TP # local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words # The tmp array needs LENGTH+1 entries, the last one is so that we can # store CY at tmp[j+1] for j == len-1 GSYM_PREFIX`'mulredc7: pushq %rbx pushq %rbp pushq %r12 pushq %r13 pushq %r14 ifdef(`WINDOWS64_ABI', ` pushq %rsi pushq %rdi ') dnl ifdef(`WINDOWS64_ABI', ` movq %rdx, %XP movq %rcx, %ZP movq 96(%rsp), %INVM # 7 push, ret addr, 4 reg vars = 96 bytes' , ` movq %rsi, %XP # store x in XP movq %rdx, %YP # store y in YP movq %rcx, %MP # store m in MP' ) dnl subq $64, %rsp # subtract size of local vars ######################################################################### # i = 0 pass ######################################################################### # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be != 0) # Pass for j = 0. We need to fetch x[i] from memory and compute the new u movq (%XP), %XI # XI = x[0] movq (%YP), %rax # rax = y[0] xorl %CYl, %CYl # set %CY to 0 lea (%rsp), %TP # store addr of tmp array in TP movl %CYl, %Il # Set %I to 0 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I movq %rax, %T0 # Move low word of product to T0 movq %rdx, %T1 # Move high word of product to T1 ifdef(`MULREDC_SVOBODA', , `' ` imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64' ) movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored movq 8(%YP), %rax # Fetch y[1] adcq %rdx, %T1 # setc %CYb # CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence # CY:T1 <= 2*2^64 - 4 ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 2f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(2) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 8(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 0(%TP) `#' Store T0 in tmp[1-1] movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 16(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 8(%TP) `#' Store T0 in tmp[2-1] movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 24(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 16(%TP) `#' Store T0 in tmp[3-1] movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 32(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 24(%TP) `#' Store T0 in tmp[4-1] movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 undefined `#' %CY = carry into T1 (is <= 2) # We have %CY:%T1 <= 2 * 2^64 - 2 movl %CYl, %T1l # T1 = CY <= 1 # Here, T1:T0 <= 2*2^64 - 2 mulq %XI # y[j] * x[i] # rdx:rax <= (2^64-1)^2 <= 2^128 - 2*2^64 + 1 addq %rax, %T0 # Add low word to T0 movq 40(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 # T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 2 <= 2^128 - 1, no carry! ifdef(`WANT_ASSERT', ` jnc 3f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(3)') mulq %U # m[j]*u # rdx:rax <= 2^128 - 2*2^64 + 1, T1:T0 <= 2^128 - 1 addq %T0, %rax # Add T0 and low word movq %rax, 32(%TP) `#' Store T0 in tmp[5-1] movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 # CY:T1:T0 <= 2^128 - 1 + 2^128 - 2*2^64 + 1 <= # 2 * 2^128 - 2*2^64 ==> CY:T1 <= 2 * 2^64 - 2 define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6. Don't fetch new data from y[j+1]. movl %CYl, %T1l # T1 = CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 48(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 40(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 48(%TP) # Store T1 in tmp[j] setc %CYb # %CY <= 1 movq %CY, 56(%TP) # Store CY in tmp[j+1] ######################################################################### # i > 0 passes ######################################################################### .align 32,,16 LABEL_SUFFIX(1) # register values at loop entry: %TP = tmp, %I = i, %YP = y, %MP = m # %CY < 255 (i.e. only low byte may be > 0) # Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory # and compute the new u movq (%XP,%I,8), %XI # XI = x[i] movq (%YP), %rax # rax = y[0] #init the register tmp ring buffer movq (%TP), %T0 # Load tmp[0] into T0 movq 8(%TP), %T1 # Load tmp[1] into T1 mulq %XI # rdx:rax = y[0] * x[i] addq $1, %I addq %T0, %rax # Add T0 to low word adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 movq %rax, %T0 # Save sum of low words in T0 imulq %INVM, %rax # %rax = ((x[i]*y[0]+tmp[0])*invm)%2^64 movq %rax, %U # this is the new u value mulq (%MP) # multipy u*m[0] addq %rax, %T0 # Now %T0 = 0, need not be stored adcq %rdx, %T1 # movq 8(%YP), %rax # Fetch y[1] ifdef(`WANT_ASSERT', ` pushf testq %T0, %T0 jz 4f lea _GLOBAL_OFFSET_TABLE_(%rip), %rbx # if we do PIC code, we # need to set rbx; if not, it doesnt hurt call GSYM_PREFIX`'abort@plt LABEL_SUFFIX(4) popf') define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 1 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 16(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 8(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 16(%YP), %rax `#' Fetch y[j+1] = y[2] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 0(%TP) `#' Store T0 in tmp[1-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 2 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 24(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 16(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 24(%YP), %rax `#' Fetch y[j+1] = y[3] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 8(%TP) `#' Store T0 in tmp[2-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 3 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 32(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 24(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 32(%YP), %rax `#' Fetch y[j+1] = y[4] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 16(%TP) `#' Store T0 in tmp[3-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 4 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 40(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 32(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 40(%YP), %rax `#' Fetch y[j+1] = y[5] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 24(%TP) `#' Store T0 in tmp[4-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 5 `#' Register values at entry: `#' %rax = y[j], %XI = x[i], %U = u `#' %TP = tmp, %T0 = value to store in tmp[j], %T1 value to store in `#' tmp[j+1], %CY = carry into T1, carry flag: also carry into T1 movq 48(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] setc %CYb # %CY <= 1 mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq %U, %rax adcq %rdx, %T1 # Add high word with carry to T1 adcb $0, %CYb # %CY <= 2 mulq 40(%MP) # m[j]*u addq %rax, %T0 # Add T0 and low word movq 48(%YP), %rax `#' Fetch y[j+1] = y[6] into %rax adcq %rdx, %T1 # Add high word with carry to T1 movq %T0, 32(%TP) `#' Store T0 in tmp[5-1] define(`TT', defn(`T0'))dnl define(`TTl', defn(`T0l'))dnl define(`T0', defn(`T1'))dnl define(`T0l', defn(`T1l'))dnl define(`T1', defn(`TT'))dnl define(`T1l', defn(`TTl'))dnl undefine(`TT')dnl undefine(`TTl')dnl `#' Now `T0' = T0, `T1' = T1 `#' Pass for j = 6. Don't fetch new data from y[j+1]. movq 56(%TP), %T1 adcq %CY, %T1 # T1 = CY + tmp[j+1] mulq %XI # y[j] * x[i] addq %rax, %T0 # Add low word to T0 movq 48(%MP), %rax # Fetch m[j] into %rax adcq %rdx, %T1 # Add high word with carry to T1 setc %CYb # %CY <= 1 mulq %U # m[j]*u addq %rax, %T0 # Add low word to T0 movq %T0, 40(%TP) # Store T0 in tmp[j-1] adcq %rdx, %T1 # Add high word with carry to T1 movq %T1, 48(%TP) # Store T1 in tmp[j] adcb $0, %CYb # %CY <= 2 movq %CY, 56(%TP) # Store CY in tmp[j+1] cmpq $7, %I jb 1b # Copy result from tmp memory to z movq (%TP), %rax movq 8(%TP), %rdx movq %rax, (%ZP) movq %rdx, 8(%ZP) movq 16(%TP), %rax movq 24(%TP), %rdx movq %rax, 16(%ZP) movq %rdx, 24(%ZP) movq 32(%TP), %rax movq 40(%TP), %rdx movq %rax, 32(%ZP) movq %rdx, 40(%ZP) movq 48(%TP), %rax movq %rax, 48(%ZP) movl %CYl, %eax # use carry as return value addq $64, %rsp ifdef(`WINDOWS64_ABI', ` popq %rdi popq %rsi ') dnl popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx ret ecm-6.4.4/b1_ainc.c0000644023561000001540000002132512106741273010661 00000000000000/* Code to compute "Automatic calculated" B1 incrementation Copyright 2003, 2005, 2006 Jim Fougeron, Paul Zimmermann. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "ecm-ecm.h" #include /* * Version #2 function is the one we are using with a const * adjustment of 1.33 */ /* Version #1 and Version #3 are not being used, but they have been kept in the source, so that they can be refered to if and when changes are made */ double calc_B1_AutoIncrement_v3 (double cur_B1, double incB1val, int calcInc); double calc_B1_AutoIncrement_v1 (double cur_B1, double incB1val, int calcInc); /* Here is my "first" attempt at a B1 adjustment function. * Parameters: * cur_B1 the current B1 level * incB1val This is ether a constant or an "adjustment factor" * calcInc Tells whether incB1val is a constant, or whether we * should compute the optimal B1 adjustment, and then * "adjust" that optimal value based up incB1val, which * is then treadted as a scaling factor. * * Returns: the new B1 value to use. * * Assumption The return value is based upon providing the recommended * optimal number of curves for a "range" of B1's, then * computing the amount of adjustment needed to push B1 to * the next level. NOTE this may be too slow of a push. * If it optimal curves at 250000 is 500, using 500 curves * in an ever advancing B1 level from 250000 to the next * level (1e6) is considerably more work than the simple * 500 curves at B1=250000. It might be prudent to make * the adjustment deal with the low bound value, the high * bound value, and how far the current B1 is from the low * and the high B1 boundary. */ double calc_B1_AutoIncrement_v1 (double cur_B1, double incB1val, int calcInc) { double B1Mod; if (!calcInc) return cur_B1 + incB1val; /* incB1val is a constant to add to B1 */ /* This simple table was "created" based upon the "Optimal B1 table" in the README file */ if (cur_B1 < 2000.) B1Mod = 200.; else if (cur_B1 < 11000.) /* 30 curves from B1=2000 to 11000 */ B1Mod = 300.; else if (cur_B1 < 50000.) /* 90 curves from B1=11000 to 50000 */ B1Mod = 433.3334; else if (cur_B1 < 250000.) /* 240 curves from B1=50000 to 250000 */ B1Mod = 833.3334; else if (cur_B1 < 1000000.) /* 500 curves from B1=250000 to 1e6 */ B1Mod = 1500.; else if (cur_B1 < 3000000.) /* 1100 curves from B1=1e6 to 3e6 */ B1Mod = 1818.18182; else if (cur_B1 < 11000000.) /* 2900 curves from B1=3e6 to 11e6 */ B1Mod = 2758.621; else if (cur_B1 < 43000000.) /* 5500 curves from B1=11e6 to 43e6 */ B1Mod = 5818.18182; else if (cur_B1 < 110000000.) /* 9000 curves from B1=43e6 to 11e7 */ B1Mod = 7444.44445; else if (cur_B1 < 260000000.) /* 22000 curves from B1=11e7 to 26e7 */ B1Mod = 6818.18182; else if (cur_B1 < 850000000.) /* 52000 curves from B1=26e7 to 85e7 */ B1Mod = 11346.1539; else if (cur_B1 < 2900000000.) /* 83000 curves from B1=85e7 to 29e8 */ B1Mod = 24698.8; else B1Mod = 35000.; return floor (cur_B1 + (B1Mod*incB1val) + 0.5); } /* Here is my "second" attempt at a B1 adjustment function. * this version looks pretty good * * THIS is the version being used. */ double calc_B1_AutoIncrement (double cur_B1, double incB1val, int calcInc) { const double const_adj = 1.33; double B1Mod; if (!calcInc) return cur_B1 + incB1val; /* incB1val is a constant to add to B1 */ /* This simple table was "created" based upon the "Optimal B1 table" in the README file */ if (cur_B1 < 2000.) B1Mod = 200.; else if (cur_B1 < 11000.) /* 30 curves from B1=2000 to 11000 */ { B1Mod = 300. * (1. - ((cur_B1 - 2000.) / 9000.)); B1Mod +=433.334 * (1. - ((11000. - cur_B1) / 9000.)); } else if (cur_B1 < 50000.) /* 90 curves from B1=11000 to 50000 */ { B1Mod = 433.334 * (1. - ((cur_B1 - 11000.) / 39000.)); B1Mod +=833.334 * (1. - ((50000. - cur_B1) / 39000.)); } else if (cur_B1 < 250000.) /* 240 curves from B1=50000 to 250000 */ { B1Mod = 833.334 * (1. - ((cur_B1 - 50000.) / 200000.)); B1Mod +=1500. * (1. - ((250000. - cur_B1) / 200000.)); } else if (cur_B1 < 1000000.) /* 500 curves from B1=250000 to 1e6 */ { B1Mod = 1500. * (1. - ((cur_B1 - 250000.) / 750000.)); B1Mod +=1818.18182 * (1. - ((1000000. - cur_B1) / 750000.)); } else if (cur_B1 < 3000000.) /* 1100 curves from B1=1e6 to 3e6 */ { B1Mod = 1818.18182 * (1. - ((cur_B1 - 1000000.) / 2000000.)); B1Mod +=2758.621 * (1. - ((3000000. - cur_B1) / 2000000.)); } else if (cur_B1 < 11000000.) /* 2900 curves from B1=3e6 to 11e6 */ { B1Mod = 2758.621 * (1. - ((cur_B1 - 3000000.) / 8000000.)); B1Mod +=5818.18182 * (1. - ((11000000. - cur_B1) / 8000000.)); } else if (cur_B1 < 43000000.) /* 5500 curves from B1=11e6 to 43e6 */ { B1Mod = 5818.18182 * (1. - ((cur_B1 - 11000000.) / 32000000.)); B1Mod +=7444.44445 * (1. - ((43000000. - cur_B1) / 32000000.)); } else if (cur_B1 < 110000000.) /* 9000 curves from B1=43e6 to 11e7 */ { B1Mod = 7444.44445 * (1. - ((cur_B1 - 43000000.) / 67000000.)); B1Mod +=6818.18182 * (1. - ((110000000. - cur_B1) / 67000000.)); } else if (cur_B1 < 260000000.) /* 22000 curves from B1=11e7 to 26e7 */ { B1Mod = 6818.18182 * (1. - ((cur_B1 - 110000000.) / 150000000.)); B1Mod +=11346.1539 * (1. - ((260000000. - cur_B1) / 150000000.)); } else if (cur_B1 < 850000000.) /* 52000 curves from B1=26e7 to 85e7 */ { B1Mod = 11346.1539 * (1. - ((cur_B1 - 260000000.) / 590000000.)); B1Mod +=24698.8 * (1. - ((850000000. - cur_B1) / 590000000.)); } else if (cur_B1 < 2900000000.) /* 83000 curves from B1=85e7 to 29e8 */ { B1Mod = 24698.8 * (1. - ((cur_B1 - 850000000.) / 2050000000.)); B1Mod +=50000.0 * (1. - ((2900000000. - cur_B1) / 2050000000.)); } else B1Mod = 50000.; return floor (cur_B1 + const_adj*(B1Mod*incB1val) + 0.5); } /* Here is my "third" attempt at a B1 adjustment function. * It seems to adjust too quickly */ double B1Min[12] = { 2000.0, 11000.0, 50000.0, 250000.0, 1000000.0, 3000000.0, 11000000.0, 43000000.0, 110000000.0, 260000000.0, 850000000.0, 2900000000.0 }; double B1Max[12] = { 11000.0, 50000.0, 250000.0, 1000000.0, 3000000.0, 11000000.0, 43000000.0, 110000000.0, 260000000.0, 850000000.0, 2900000000.0, 9000000000.0 }; double B1Inc[12] = { 300.0, 433.334, 833.334, 1500.0, 1818.1819, 2758.621, 5818.1819, 7444.4445, 6818.1819, 11346.1539, 24698.8, 50000.0 }; /*B1Table_t B1Table[12] = {300,0 ,2000.0 ,11000.0 }, {433.334, ,11000.0 ,50000.0 }, {833.334, ,50000.0 ,250000.0 }, {1500.0 ,250000.0 ,1000000.0 }, {1818.1819, ,1000000.0 ,3000000.0 }, {2758.621, ,3000000.0 ,11000000.0 }, {5818.1819, ,11000000.0 ,43000000.0 }, {7444.4445, ,43000000.0 ,110000000.0 }, {6818.1819, ,110000000.0 ,260000000.0 }, NOTE the increment does not look larger enough here!! {11346.1539, ,260000000.0 ,850000000.0 }, {24698.8, ,850000000.0 ,2900000000.0 }, {50000.0, ,2900000000.0 ,9000000000.0 }; */ double calc_B1_AutoIncrement_v3 (double cur_B1, double incB1val, int calcInc) { double B1Mod; if (!calcInc) return cur_B1 + incB1val; /* incB1val is a constant to add to B1 */ /* This simple table was "created" based upon the "Optimal B1 table" in the README file */ if (cur_B1 < 2000.) B1Mod = 200.; else if (cur_B1 > 2900000000.) B1Mod = 50000; else { double OrigMin; int i = 0; while (i < 11 && B1Max[i] < cur_B1) ++i; B1Mod = B1Inc[i] * (1. - ((cur_B1 - B1Min[i]) / (B1Max[i] - B1Min[i]))); OrigMin = B1Min[i]; while (++i < 12) { B1Mod += B1Inc[i] * (1. - ((B1Min[i] - cur_B1) / (B1Min[i] - OrigMin))); } } return floor (cur_B1 + (B1Mod*incB1val) + 0.5); } ecm-6.4.4/ecm-gmp.h0000644023561000001540000001152312110713341010704 00000000000000/* Part of file gmp-impl.h from GNU MP. Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. This file contains modified code from the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU MP Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _ECM_GMP_H #define _ECM_GMP_H 1 #include "config.h" #include #ifndef alloca #ifdef __GNUC__ # define alloca __builtin_alloca #elif defined (__DECC) # define alloca(x) __ALLOCA(x) #elif defined (_MSC_VER) # include # define alloca _alloca #elif defined(HAVE_ALLOCA_H) || defined (sun) # include #elif defined (_AIX) || defined (_IBMR2) #pragma alloca #else char *alloca (); #endif #endif #define ABSIZ(x) ABS (SIZ (x)) #define ALLOC(x) ((x)->_mp_alloc) #define PTR(x) ((x)->_mp_d) #define SIZ(x) ((x)->_mp_size) #define TMP_DECL(m) #define TMP_ALLOC(x) alloca(x) #define TMP_MARK(m) #define TMP_FREE(m) #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type))) #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t) #ifndef MPZ_REALLOC #define MPZ_REALLOC(z,n) ((n) > ALLOC(z) ? _mpz_realloc(z,n) : PTR(z)) #endif #ifndef MPN_COPY #include /* for memcpy */ #define MPN_COPY(d,s,n) memcpy((d),(s),(n)*sizeof(mp_limb_t)) #endif #ifndef MPN_NORMALIZE #define MPN_NORMALIZE(DST, NLIMBS) \ do { \ while (NLIMBS > 0) \ { \ if ((DST)[(NLIMBS) - 1] != 0) \ break; \ NLIMBS--; \ } \ } while (0) #endif #ifndef MPN_ZERO #define MPN_ZERO(dst, n) \ do { \ if ((n) != 0) \ { \ mp_ptr __dst = (dst); \ mp_size_t __n = (n); \ do \ *__dst++ = 0; \ while (--__n); \ } \ } while (0) #endif /* Return non-zero if xp,xsize and yp,ysize overlap. If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no overlap. If both these are false, there's an overlap. */ #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \ ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp)) /* Return non-zero if xp,xsize and yp,ysize are either identical or not overlapping. Return zero if they're partially overlapping. */ #define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \ MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size) #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \ ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize)) #ifndef mpn_com_n #define mpn_com_n(d,s,n) \ do { \ mp_ptr __d = (d); \ mp_srcptr __s = (s); \ mp_size_t __n = (n); \ ASSERT (__n >= 1); \ ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \ do \ *__d++ = (~ *__s++) & GMP_NUMB_MASK; \ while (--__n); \ } while (0) #endif #ifdef HAVE___GMPN_ADD_NC #ifndef __gmpn_add_nc __GMP_DECLSPEC mp_limb_t __gmpn_add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); #endif #endif #define ECM_VERSION_NUM(a,b,c) (((a) << 16L) | ((b) << 8) | (c)) #if !defined( __MPIR_RELEASE ) && ECM_VERSION_NUM(__GNU_MP_VERSION,__GNU_MP_VERSION_MINOR,__GNU_MP_VERSION_PATCHLEVEL) >= ECM_VERSION_NUM(5,1,0) #define MPN_REDC12_RETURNS_CARRY 1 #endif /* GMP currently does not define prototypes for these, but MPIR does */ #if defined(HAVE___GMPN_REDC_1) && !defined( __MPIR_RELEASE ) #ifdef MPN_REDC12_RETURNS_CARRY mp_limb_t __gmpn_redc_1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); #else void __gmpn_redc_1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); #endif #endif #if defined(HAVE___GMPN_REDC_2) && !defined( __MPIR_RELEASE ) #ifdef MPN_REDC12_RETURNS_CARRY mp_limb_t __gmpn_redc_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); #else void __gmpn_redc_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); #endif #endif #if defined(HAVE___GMPN_REDC_N) void __gmpn_redc_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); #endif #if defined(HAVE___GMPN_MULLO_N) void __gmpn_mullo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); #endif #endif /* _ECM_GMP_H */ ecm-6.4.4/mul_fft-params.h.pentium40000644023561000001540000001073112106741273014052 00000000000000#define MUL_FFT_MODF_THRESHOLD 480 #define SQR_FFT_MODF_THRESHOLD 480 #define MUL_FFT_TABLE2 {{1, 4 /*66*/}, {305, 5 /*95*/}, {321, 4 /*97*/}, {337, 5 /*95*/}, {353, 4 /*97*/}, {369, 5 /*96*/}, {801, 6 /*96*/}, {1281, 7 /*91*/}, {1409, 6 /*97*/}, {1601, 7 /*92*/}, {1921, 6 /*98*/}, {1985, 7 /*94*/}, {2689, 8 /*91*/}, {2817, 7 /*95*/}, {3201, 8 /*92*/}, {3329, 7 /*96*/}, {3457, 8 /*87*/}, {3841, 7 /*96*/}, {3969, 8 /*88*/}, {4865, 7 /*97*/}, {4993, 8 /*90*/}, {6913, 9 /*87*/}, {7681, 8 /*96*/}, {8961, 9 /*90*/}, {9729, 8 /*97*/}, {9985, 9 /*83*/}, {11777, 8 /*97*/}, {12033, 9 /*85*/}, {13825, 10 /*87*/}, {15361, 9 /*96*/}, {15873, 8 /*98*/}, {16129, 9 /*88*/}, {19969, 10 /*83*/}, {23553, 9 /*97*/}, {26113, 10 /*81*/}, {31745, 9 /*98*/}, {34305, 10 /*85*/}, {39937, 9 /*98*/}, {40449, 10 /*83*/}, {48129, 11 /*75*/}, {63489, 10 /*98*/}, {80897, 11 /*83*/}, {96257, 12 /*75*/}, {126977, 11 /*98*/}, {129025, 9 /*98*/}, {130561, 11 /*80*/}, {194561, 12 /*75*/}, {258049, 10 /*98*/}, {261121, 9 /*99*/}, {261633, 10 /*94*/}, {277505, 9 /*99*/}, {278017, 10 /*94*/}, {293889, 9 /*99*/}, {294401, 7 /*99*/}, {294529, 8 /*99*/}, {294657, 10 /*94*/}, {310273, 9 /*99*/}, {310785, 10 /*95*/}, {326657, 12 /*83*/}, {389121, 13 /*75*/}, {516097, 11 /*98*/}, {522241, 10 /*99*/}, {523265, 11 /*94*/}, {587777, 10 /*99*/}, {588801, 11 /*94*/}, {620545, 10 /*99*/}, {621569, 9 /*99*/}, {622081, 11 /*95*/}, {653313, 10 /*99*/}, {662529, 11 /*96*/}, {686081, 10 /*99*/}, {687105, 9 /*99*/}, {687617, 11 /*95*/}, {718849, 10 /*99*/}, {752641, 9 /*99*/}, {753153, 11 /*95*/}, {784385, 10 /*99*/}, {818177, 9 /*99*/}, {818689, 11 /*96*/}, {849921, 10 /*99*/}, {850945, 11 /*96*/}, {882689, 10 /*99*/}, {883713, 9 /*99*/}, {884225, 11 /*96*/}, {980993, 10 /*99*/}, {982017, 12 /*93*/}, {LONG_MAX, 0}} #define MUL_FFTM_TABLE2 {{1, 4 /*66*/}, {273, 5 /*94*/}, {289, 4 /*97*/}, {305, 5 /*95*/}, {609, 6 /*95*/}, {641, 5 /*97*/}, {673, 6 /*95*/}, {705, 5 /*97*/}, {737, 6 /*96*/}, {1473, 7 /*96*/}, {1537, 6 /*98*/}, {1601, 7 /*96*/}, {1665, 6 /*98*/}, {1729, 7 /*96*/}, {2689, 8 /*91*/}, {2817, 7 /*97*/}, {2945, 8 /*92*/}, {3329, 7 /*98*/}, {3457, 8 /*93*/}, {5377, 9 /*91*/}, {5633, 8 /*95*/}, {6401, 9 /*92*/}, {6657, 8 /*96*/}, {6913, 9 /*87*/}, {7681, 8 /*96*/}, {7937, 9 /*88*/}, {8705, 8 /*97*/}, {8961, 9 /*90*/}, {13825, 10 /*87*/}, {15361, 9 /*96*/}, {17921, 10 /*90*/}, {19457, 9 /*97*/}, {19969, 10 /*83*/}, {23553, 9 /*97*/}, {24065, 10 /*85*/}, {27649, 11 /*87*/}, {30721, 10 /*96*/}, {31745, 9 /*98*/}, {32257, 10 /*88*/}, {39937, 11 /*83*/}, {47105, 10 /*97*/}, {48129, 12 /*75*/}, {61441, 11 /*96*/}, {63489, 10 /*98*/}, {68609, 11 /*85*/}, {79873, 10 /*98*/}, {80897, 11 /*83*/}, {96257, 12 /*75*/}, {126977, 11 /*98*/}, {161793, 12 /*83*/}, {192513, 13 /*75*/}, {253953, 12 /*98*/}, {258049, 10 /*98*/}, {261121, 9 /*99*/}, {261633, 10 /*94*/}, {277505, 12 /*85*/}, {323585, 10 /*99*/}, {326657, 9 /*99*/}, {327169, 10 /*95*/}, {330753, 12 /*84*/}, {389121, 10 /*99*/}, {392193, 9 /*99*/}, {392705, 10 /*96*/}, {408577, 9 /*99*/}, {409089, 8 /*99*/}, {409345, 10 /*96*/}, {412673, 12 /*90*/}, {454657, 13 /*87*/}, {516097, 11 /*98*/}, {522241, 10 /*99*/}, {523265, 11 /*94*/}, {555009, 10 /*99*/}, {556033, 9 /*99*/}, {556545, 11 /*94*/}, {587777, 10 /*99*/}, {588801, 11 /*94*/}, {620545, 10 /*99*/}, {621569, 9 /*99*/}, {622081, 11 /*95*/}, {653313, 10 /*99*/}, {654337, 11 /*95*/}, {686081, 13 /*87*/}, {778241, 11 /*99*/}, {817153, 10 /*99*/}, {818177, 9 /*99*/}, {818689, 11 /*96*/}, {849921, 10 /*99*/}, {850945, 11 /*96*/}, {882689, 10 /*99*/}, {883713, 9 /*99*/}, {884225, 11 /*96*/}, {915457, 12 /*93*/}, {978945, 14 /*93*/}, {LONG_MAX, 0}} #define MUL_FFT_FULL_TABLE2 {{100, 2}, {216, 1}, {256, 2}, {264, 1}, {304, 2}, {312, 1}, {544, 4}, {560, 1}, {704, 2}, {720, 1}, {896, 2}, {960, 7}, {40960, 2}, {47616, 1}, {49152, 6}, {53760, 4}, {56320, 1}, {64512, 4}, {71680, 5}, {86016, 2}, {96768, 4}, {99840, 1}, {131072, 6}, {136192, 7}, {147456, 6}, {150528, 4}, {161280, 1}, {161792, 3}, {172032, 2}, {193536, 1}, {259072, 6}, {286720, 7}, {294912, 6}, {301056, 4}, {322560, 3}, {344064, 2}, {387072, 1}, {393216, 4}, {404480, 3}, {409600, 1}, {417792, 3}, {425984, 1}, {524288, 6}, {530432, 7}, {557056, 6}, {566272, 5}, {577536, 4}, {593920, 6}, {602112, 5}, {614400, 4}, {645120, 3}, {647168, 4}, {652800, 1}, {654336, 6}, {673792, 3}, {688128, 2}, {724992, 4}, {727040, 1}, {753664, 2}, {783360, 4}, {816640, 6}, {831488, 1}, {851968, 2}, {860160, 3}, {868352, 2}, {881664, 7}, {884736, 1}, {921600, 7}, {950272, 1}, {LONG_MAX, 1}} ecm-6.4.4/ecm-impl.h0000644023561000001540000007452112110712022011064 00000000000000/* ecm-impl.h - header file for libecm Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Paul Zimmermann, Alexander Kruppa and Cyril Bouvier. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef _ECM_IMPL_H #define _ECM_IMPL_H 1 #include "config.h" #include "ecm.h" #ifdef HAVE_SYS_TYPES_H #include /* needed for size_t */ #endif #if HAVE_STDINT_H #include /* needed for int64_t and uint64_t */ /* or configure will define these for us if possible */ #endif #if defined UINT64_MAX || defined uint64_t typedef int64_t ecm_int; typedef uint64_t ecm_uint; #define ECM_INT_MAX INT64_MAX #define ECM_UINT_MAX UINT64_MAX #elif defined HAVE_LONG_LONG_INT typedef long long ecm_int; typedef unsigned long long ecm_uint; #define ECM_INT_MAX LLONG_MAX #define ECM_UINT_MAX ULLONG_MAX #else typedef long ecm_int; typedef unsigned long ecm_uint; #define ECM_INT_MAX LONG_MAX #define ECM_UINT_MAX ULONG_MAX #endif #ifndef TUNE #include "ecm-params.h" #else extern size_t MPZMOD_THRESHOLD; extern size_t REDC_THRESHOLD; #endif extern size_t mpn_mul_lo_threshold[]; #include /* needed for "FILE *" */ #include #if defined (__STDC__) \ || defined (__cplusplus) \ || defined (_AIX) \ || defined (__DECC) \ || (defined (__mips) && defined (_SYSTYPE_SVR4)) \ || defined (_MSC_VER) \ || defined (_WIN32) #define __ECM_HAVE_TOKEN_PASTE 1 #else #define __ECM_HAVE_TOKEN_PASTE 0 #endif #ifndef __ECM #if __ECM_HAVE_TOKEN_PASTE #define __ECM(x) __ecm_##x #else #define __ECM(x) __ecm_/**/x #endif #endif #define ECM_STDOUT __ecm_stdout #define ECM_STDERR __ecm_stderr extern FILE *ECM_STDOUT, *ECM_STDERR; /* Warnings about unused parameters by gcc can be suppressed by prefixing parameter with ATTRIBUTE_UNUSED when parameter can't be removed, i.e. for interface consistency reasons */ #ifdef __GNUC__ #if __GNUC__ >= 3 #define ATTRIBUTE_UNUSED __attribute__ ((unused)) #else #define ATTRIBUTE_UNUSED #endif #define ATTRIBUTE_CONST __attribute__ ((const)) #else #define ATTRIBUTE_UNUSED #define ATTRIBUTE_CONST #endif #ifndef LIKELY #if defined(__GNUC__) #define LIKELY(x) __builtin_expect ((x) != 0, 1) #else #define LIKELY(x) x #endif #endif #ifndef UNLIKELY #if defined(__GNUC__) #define UNLIKELY(x) __builtin_expect ((x) != 0, 0) #else #define UNLIKELY(x) x #endif #endif /* default B2 choice: pow (B1 * METHOD_COST / 6.0, DEFAULT_B2_EXPONENT) */ #define DEFAULT_B2_EXPONENT 1.43 #define PM1_COST 1.0 / 6.0 #define PP1_COST 2.0 / 6.0 #define ECM_COST 11.0 / 6.0 /* For new P-/+1 stage 2: */ #define PM1FS2_DEFAULT_B2_EXPONENT 1.7 #define PM1FS2_COST 1.0 / 4.0 #define PP1FS2_COST 1.0 / 4.0 /* if POLYEVALTELLEGEN is defined, use polyeval_tellegen(), otherwise use polyeval() */ #define POLYEVALTELLEGEN /* use Kronecker-Scho"nhage's multiplication */ #define KS_MULTIPLY /* define top-level multiplication */ #define KARA 2 #define TOOM3 3 #define TOOM4 4 #define KS 5 #define NTT 6 /* maximal limb size of assembly mulredc */ #define MULREDC_ASSEMBLY_MAX 20 #include "sp.h" /* compile with -DMULT=2 to override default */ #ifndef MULT #ifdef KS_MULTIPLY #define MULT KS #else #define MULT TOOM4 #endif #endif #ifdef POLYEVALTELLEGEN #define USE_SHORT_PRODUCT #endif #include #define ASSERT_ALWAYS(expr) assert (expr) #ifdef WANT_ASSERT #define ASSERT(expr) assert (expr) #else #define ASSERT(expr) do {} while (0) #endif #ifdef MEMORY_DEBUG void tests_free (void *, size_t); void tests_memory_set_location (char *, unsigned int); #define FREE(ptr,size) tests_free(ptr,size) #define MEMORY_TAG tests_memory_set_location(__FILE__,__LINE__) #define MEMORY_UNTAG tests_memory_set_location("",0) #define MPZ_INIT(x) {MEMORY_TAG;mpz_init(x);MEMORY_UNTAG;} #define MPZ_INIT2(x,n) {MEMORY_TAG;mpz_init2(x,n);MEMORY_UNTAG;} #else #define FREE(ptr,size) free(ptr) #define MEMORY_TAG do{}while(0) #define MEMORY_UNTAG do{}while(0) #define MPZ_INIT(x) mpz_init(x) #define MPZ_INIT2(x,n) mpz_init2(x,n) #endif /* thresholds */ #define MPN_MUL_LO_THRESHOLD 32 /* base2mod is used when size(2^n+/-1) <= BASE2_THRESHOLD * size(cofactor) */ #define BASE2_THRESHOLD 1.4 /* default number of probable prime tests */ #define PROBAB_PRIME_TESTS 1 /* kronecker_schonhage() is used instead of toomcook4() when bitsize(poly) >= KS_MUL_THRESHOLD */ #define KS_MUL_THRESHOLD 1e6 /* same for median product */ #define KS_TMUL_THRESHOLD 8e5 #define ABS(x) ((x) >= 0 ? (x) : -(x)) /* getprime */ #define WANT_FREE_PRIME_TABLE(p) (p < 0.0) #define FREE_PRIME_TABLE -1.0 /* 2^n+-1 with n < MOD_MINBASE2 cannot use base-2 reduction */ #define MOD_MINBASE2 16 /* Various logging levels */ /* OUTPUT_ALWAYS means print always, regardless of verbose value */ #define OUTPUT_ALWAYS 0 /* OUTPUT_NORMAL means print during normal program execution */ #define OUTPUT_NORMAL 1 /* OUTPUT_VERBOSE means print if the user requested more verbosity */ #define OUTPUT_VERBOSE 2 /* OUTPUT_RESVERBOSE is for printing residues (after stage 1 etc) */ #define OUTPUT_RESVERBOSE 3 /* OUTPUT_DEVVERBOSE is for printing internal parameters (for developers) */ #define OUTPUT_DEVVERBOSE 4 /* OUTPUT_TRACE is for printing trace data, produces lots of output */ #define OUTPUT_TRACE 5 /* OUTPUT_ERROR is for printing error messages */ #define OUTPUT_ERROR -1 /* Interval length for writing checkpoints in stage 1, in milliseconds */ #define CHKPNT_PERIOD 600000 typedef mpz_t mpres_t; typedef mpz_t* listz_t; typedef struct { mpres_t x; mpres_t y; } __point_struct; typedef __point_struct point; typedef struct { mpres_t x; mpres_t y; mpres_t A; } __curve_struct; typedef __curve_struct curve; typedef struct { unsigned long d1; unsigned long d2; mpz_t i0; int S; } __root_params_t; typedef __root_params_t root_params_t; typedef struct { unsigned long P, s_1, s_2, l; mpz_t m_1; } __faststage2_param_t; typedef __faststage2_param_t faststage2_param_t; #define EC_MONTGOMERY_FORM 0 #define EC_WEIERSTRASS_FORM 1 typedef struct { unsigned int size_fd; /* How many entries .fd has, always nr * (S+1) */ unsigned int nr; /* How many separate progressions there are */ unsigned int next; /* From which progression to take the next root */ unsigned int S; /* Degree of the polynomials */ unsigned int dsieve; /* Values not coprime to dsieve are skipped */ unsigned int rsieve; /* Which residue mod dsieve current .next belongs to */ int dickson_a; /* Parameter for Dickson polynomials */ } progression_params_t; typedef struct { progression_params_t params; point *fd; unsigned int size_T; /* How many entries T has */ mpres_t *T; /* For temp values. FIXME: should go! */ curve *X; /* The curve the points are on */ } ecm_roots_state_t; typedef struct { progression_params_t params; mpres_t *fd; int invtrick; } pm1_roots_state_t; typedef struct { progression_params_t params; point *fd; /* for S != 1 */ mpres_t tmp[4]; /* for S=1 */ } pp1_roots_state_t; typedef struct { int alloc; int degree; listz_t coeff; } __polyz_struct; typedef __polyz_struct polyz_t[1]; typedef struct { int repr; /* ECM_MOD_MPZ: plain modulus, possibly normalized ECM_MOD_BASE2: base 2 number ECM_MOD_MODMULN: MODMULN ECM_MOD_REDC: REDC representation */ int bits; /* in case of a base 2 number, 2^k[+-]1, bits = [+-]k in case of MODMULN or REDC representation, nr. of bits b so that 2^b > orig_modulus and GMP_NUMB_BITS | b */ int Fermat; /* If repr = 1 (base 2 number): If modulus is 2^(2^m)+1, i.e. bits = 2^m, then Fermat = 2^m, 0 otherwise. If repr != 1, undefined */ mp_limb_t *Nprim; /* For MODMULN */ mpz_t orig_modulus; /* The original modulus N */ mpz_t aux_modulus; /* Used only for MPZ and REDC: - the auxiliary modulus value (i.e. normalized modulus, or -1/N (mod 2^bits) for REDC, - B^(n + ceil(n/2)) mod N for MPZ, where B = 2^GMP_NUMB_BITS */ mpz_t multiple; /* The smallest multiple of N that is larger or equal to 2^bits for REDC/MODMULN */ mpz_t R2, R3; /* For MODMULN and REDC, R^2 and R^3 (mod orig_modulus), where R = 2^bits. */ mpz_t temp1, temp2; /* Temp values used during multiplication etc. */ } __mpmod_struct; typedef __mpmod_struct mpmod_t[1]; #if defined (__cplusplus) extern "C" { #endif /* getprime.c */ #define getprime __ECM(getprime) double getprime (); #define getprime_clear __ECM(getprime_clear) void getprime_clear (); #define getprime_seek __ECM(getprime_seek) void getprime_seek (double); /* pm1.c */ #define pm1_rootsF __ECM(pm1_rootsF) int pm1_rootsF (mpz_t, listz_t, root_params_t *, unsigned long, mpres_t *, listz_t, mpmod_t); #define pm1_rootsG_init __ECM(pm1_rootsG_init) pm1_roots_state_t* pm1_rootsG_init (mpres_t *, root_params_t *, mpmod_t); #define pm1_rootsG __ECM(pm1_rootsG) int pm1_rootsG (mpz_t, listz_t, unsigned long, pm1_roots_state_t *, listz_t, mpmod_t); #define pm1_rootsG_clear __ECM(pm1_rootsG_clear) void pm1_rootsG_clear (pm1_roots_state_t *, mpmod_t); /* pm1fs2.c */ #define pm1fs2_memory_use __ECM(pm1fs2_ntt_memory_use) size_t pm1fs2_memory_use (const unsigned long, const mpz_t, const int); #define pm1fs2_maxlen __ECM(pm1fs2_maxlen) unsigned long pm1fs2_maxlen (const size_t, const mpz_t, const int); #define pp1fs2_memory_use __ECM(pp1fs2_ntt_memory_use) size_t pp1fs2_memory_use (const unsigned long, const mpz_t, const int, const int); #define pp1fs2_maxlen __ECM(pp1fs2_maxlen) unsigned long pp1fs2_maxlen (const size_t, const mpz_t, const int, const int); #define choose_P __ECM(choose_P) long choose_P (const mpz_t, const mpz_t, const unsigned long, const unsigned long, faststage2_param_t *, mpz_t, mpz_t, const int, const int); #define pm1fs2 __ECM(pm1fs2) int pm1fs2 (mpz_t, const mpres_t, mpmod_t, const faststage2_param_t *); #define pm1fs2_ntt __ECM(pm1fs2_ntt) int pm1fs2_ntt (mpz_t, const mpres_t, mpmod_t, const faststage2_param_t *); #define pp1fs2 __ECM(pp1fs2) int pp1fs2 (mpz_t, const mpres_t, mpmod_t, const faststage2_param_t *); #define pp1fs2_ntt __ECM(pp1fs2_ntt) int pp1fs2_ntt (mpz_t, const mpres_t, mpmod_t, const faststage2_param_t *, const int); /* bestd.c */ #define bestD __ECM(bestD) int bestD (root_params_t *, unsigned long *, unsigned long *, mpz_t, mpz_t, int, int, double, int, mpmod_t); /* ecm.c */ #define choose_S __ECM(choose_S) int choose_S (mpz_t); #define add3 __ECM(add3) void add3 (mpres_t, mpres_t, mpres_t, mpres_t, mpres_t, mpres_t, mpres_t, mpres_t, mpmod_t, mpres_t, mpres_t, mpres_t); #define duplicate __ECM(duplicate) void duplicate (mpres_t, mpres_t, mpres_t, mpres_t, mpmod_t, mpres_t, mpres_t, mpres_t, mpres_t); #define ecm_mul __ECM(ecm_mul) void ecm_mul (mpres_t, mpres_t, mpz_t, mpmod_t, mpres_t); #define print_B1_B2_poly __ECM(print_B1_B2_poly) void print_B1_B2_poly (int, int, double, double, mpz_t, mpz_t, mpz_t, int S, mpz_t, int, mpz_t); /* ecm2.c */ #define ecm_rootsF __ECM(ecm_rootsF) int ecm_rootsF (mpz_t, listz_t, root_params_t *, unsigned long, curve *, mpmod_t); #define ecm_rootsG_init __ECM(ecm_rootsG_init) ecm_roots_state_t* ecm_rootsG_init (mpz_t, curve *, root_params_t *, unsigned long, unsigned long, mpmod_t); #define ecm_rootsG __ECM(ecm_rootsG) int ecm_rootsG (mpz_t, listz_t, unsigned long, ecm_roots_state_t *, mpmod_t); #define ecm_rootsG_clear __ECM(ecm_rootsG_clear) void ecm_rootsG_clear (ecm_roots_state_t *, mpmod_t); #define ecm_findmatch __ECM(ecm_findmatch) int ecm_findmatch (unsigned long *, const unsigned long, root_params_t *, const curve *, mpmod_t, const mpz_t); /* lucas.c */ #define pp1_mul_prac __ECM(pp1_mul_prac) void pp1_mul_prac (mpres_t, ecm_uint, mpmod_t, mpres_t, mpres_t, mpres_t, mpres_t, mpres_t); /* pp1.c */ #define pp1_rootsF __ECM(pp1_rootsF) int pp1_rootsF (listz_t, root_params_t *, unsigned long, mpres_t *, listz_t, mpmod_t); #define pp1_rootsG __ECM(pp1_rootsG) int pp1_rootsG (listz_t, unsigned long, pp1_roots_state_t *, mpmod_t, mpres_t*); #define pp1_rootsG_init __ECM(pp1_rootsG_init) pp1_roots_state_t* pp1_rootsG_init (mpres_t*, root_params_t *, mpmod_t); #define pp1_rootsG_clear __ECM(pp1_rootsG_clear) void pp1_rootsG_clear (pp1_roots_state_t *, mpmod_t); /* stage2.c */ #define stage2 __ECM(stage2) int stage2 (mpz_t, void *, mpmod_t, unsigned long, unsigned long, root_params_t *, int, int, char *, int (*)(void)); #define init_progression_coeffs __ECM(init_progression_coeffs) listz_t init_progression_coeffs (mpz_t, const unsigned long, const unsigned long, const unsigned int, const unsigned int, const unsigned int, const int); #define init_roots_params __ECM(init_roots_params) void init_roots_params (progression_params_t *, const int, const unsigned long, const unsigned long, const double); #define memory_use __ECM(memory_use) double memory_use (unsigned long, unsigned int, unsigned int, mpmod_t); /* listz.c */ #define list_mul_mem __ECM(list_mul_mem) int list_mul_mem (unsigned int); #define init_list __ECM(init_list) listz_t init_list (unsigned int); #define init_list2 __ECM(init_list2) listz_t init_list2 (unsigned int, unsigned int); #define clear_list __ECM(clear_list) void clear_list (listz_t, unsigned int); #define list_inp_raw __ECM(list_inp_raw) int list_inp_raw (listz_t, FILE *, unsigned int); #define list_out_raw __ECM(list_out_raw) int list_out_raw (FILE *, listz_t, unsigned int); #define print_list __ECM(print_list) void print_list (listz_t, unsigned int); #define list_set __ECM(list_set) void list_set (listz_t, listz_t, unsigned int); #define list_revert __ECM(list_revert) void list_revert (listz_t, unsigned int); #define list_swap __ECM(list_swap) void list_swap (listz_t, listz_t, unsigned int); #define list_neg __ECM(list_neg) void list_neg (listz_t, listz_t, unsigned int, mpz_t); #define list_mod __ECM(list_mod) void list_mod (listz_t, listz_t, unsigned int, mpz_t); #define list_add __ECM(list_add) void list_add (listz_t, listz_t, listz_t, unsigned int); #define list_sub __ECM(list_sub) void list_sub (listz_t, listz_t, listz_t, unsigned int); #define list_mul_z __ECM(list_mul_z) void list_mul_z (listz_t, listz_t, mpz_t, unsigned int, mpz_t); #define list_gcd __ECM(list_gcd) int list_gcd (mpz_t, listz_t, unsigned int, mpz_t); #define list_mulup __ECM(list_mulup) void list_mulup (listz_t, unsigned int, mpz_t, mpz_t); #define list_zero __ECM(list_zero) void list_zero (listz_t, unsigned int); #define list_mul __ECM(list_mul) void list_mul (listz_t, listz_t, unsigned int, int, listz_t, unsigned int, int, listz_t); #define list_mul_high __ECM(list_mul_high) void list_mul_high (listz_t, listz_t, listz_t, unsigned int, listz_t); #define karatsuba __ECM(karatsuba) void karatsuba (listz_t, listz_t, listz_t, unsigned int, listz_t); #define list_mulmod __ECM(list_mulmod) void list_mulmod (listz_t, listz_t, listz_t, listz_t, unsigned int, listz_t, mpz_t); #define list_invert __ECM(list_invert) int list_invert (listz_t, listz_t, unsigned long, mpz_t, mpmod_t); #define PolyFromRoots __ECM(PolyFromRoots) void PolyFromRoots (listz_t, listz_t, unsigned int, listz_t, mpz_t); #define PolyFromRoots_Tree __ECM(PolyFromRoots_Tree) int PolyFromRoots_Tree (listz_t, listz_t, unsigned int, listz_t, int, mpz_t, listz_t*, FILE*, unsigned int); #define ntt_PolyFromRoots __ECM(ntt_PolyFromRoots) void ntt_PolyFromRoots (mpzv_t, mpzv_t, spv_size_t, mpzv_t, mpzspm_t); #define ntt_PolyFromRoots_Tree __ECM(ntt_PolyFromRoots_Tree) int ntt_PolyFromRoots_Tree (mpzv_t, mpzv_t, spv_size_t, mpzv_t, int, mpzspm_t, mpzv_t *, FILE *); #define ntt_polyevalT __ECM(ntt_polyevalT) int ntt_polyevalT (mpzv_t, spv_size_t, mpzv_t *, mpzv_t, mpzspv_t, mpzspm_t, char *); #define ntt_mul __ECM(ntt_mul) void ntt_mul (mpzv_t, mpzv_t, mpzv_t, spv_size_t, mpzv_t, int, mpzspm_t); #define ntt_PrerevertDivision __ECM(ntt_PrerevertDivision) void ntt_PrerevertDivision (mpzv_t, mpzv_t, mpzv_t, mpzspv_t, mpzspv_t, spv_size_t, mpzv_t, mpzspm_t); #define ntt_PolyInvert __ECM(ntt_PolyInvert) void ntt_PolyInvert (mpzv_t, mpzv_t, spv_size_t, mpzv_t, mpzspm_t); #define PrerevertDivision __ECM(PrerevertDivision) int PrerevertDivision (listz_t, listz_t, listz_t, unsigned int, listz_t, mpz_t); #define PolyInvert __ECM(PolyInvert) void PolyInvert (listz_t, listz_t, unsigned int, listz_t, mpz_t); #define RecursiveDivision __ECM(RecursiveDivision) void RecursiveDivision (listz_t, listz_t, listz_t, unsigned int, listz_t, mpz_t, int); /* polyeval.c */ #define polyeval __ECM(polyeval) void polyeval (listz_t, unsigned int, listz_t*, listz_t, mpz_t, unsigned int); #define polyeval_tellegen __ECM(polyeval_tellegen) int polyeval_tellegen (listz_t, unsigned int, listz_t*, listz_t, unsigned int, listz_t, mpz_t, char *); #define TUpTree __ECM(TUpTree) void TUpTree (listz_t, listz_t *, unsigned int, listz_t, int, unsigned int, mpz_t, FILE *); /* toomcook.c */ #define toomcook3 __ECM(toomcook3) void toomcook3 (listz_t, listz_t, listz_t, unsigned int, listz_t); #define toomcook4 __ECM(toomcook4) void toomcook4 (listz_t, listz_t, listz_t, unsigned int, listz_t); /* ks-multiply.c */ #define kronecker_schonhage __ECM(kronecker_schonhage) void kronecker_schonhage (listz_t, listz_t, listz_t, unsigned int, listz_t); #define TMulKS __ECM(TMulKS) int TMulKS (listz_t, unsigned int, listz_t, unsigned int, listz_t, unsigned int, mpz_t, int); #define ks_wrapmul_m __ECM(ks_wrapmul_m) unsigned int ks_wrapmul_m (unsigned int, unsigned int, mpz_t); #define ks_wrapmul __ECM(ks_wrapmul) unsigned int ks_wrapmul (listz_t, unsigned int, listz_t, unsigned int, listz_t, unsigned int, mpz_t); /* mpmod.c */ /* Define MPRESN_NO_ADJUSTMENT if mpresn_add, mpresn_sub and mpresn_addsub should perform no adjustment step. This yields constraints on N. */ /* #define MPRESN_NO_ADJUSTMENT */ #define isbase2 __ECM(isbase2) int isbase2 (const mpz_t, const double); #define mpmod_init __ECM(mpmod_init) int mpmod_init (mpmod_t, const mpz_t, int); #define mpmod_init_MPZ __ECM(mpmod_init_MPZ) void mpmod_init_MPZ (mpmod_t, const mpz_t); #define mpmod_init_BASE2 __ECM(mpmod_init_BASE2) int mpmod_init_BASE2 (mpmod_t, const int, const mpz_t); #define mpmod_init_MODMULN __ECM(mpmod_init_MODMULN) void mpmod_init_MODMULN (mpmod_t, const mpz_t); #define mpmod_init_REDC __ECM(mpmod_init_REDC) void mpmod_init_REDC (mpmod_t, const mpz_t); #define mpmod_clear __ECM(mpmod_clear) void mpmod_clear (mpmod_t); #define mpmod_init_set __ECM(mpmod_init_set) void mpmod_init_set (mpmod_t, const mpmod_t); #define mpmod_pausegw __ECM(mpmod_pausegw) void mpmod_pausegw (const mpmod_t modulus); #define mpmod_contgw __ECM(mpmod_contgw) void mpmod_contgw (const mpmod_t modulus); #define mpres_equal __ECM(mpres_equal) int mpres_equal (const mpres_t, const mpres_t, mpmod_t); #define mpres_pow __ECM(mpres_pow) void mpres_pow (mpres_t, const mpres_t, const mpz_t, mpmod_t); #define mpres_ui_pow __ECM(mpres_ui_pow) void mpres_ui_pow (mpres_t, const unsigned long, const mpres_t, mpmod_t); #define mpres_mul __ECM(mpres_mul) void mpres_mul (mpres_t, const mpres_t, const mpres_t, mpmod_t) ATTRIBUTE_HOT; #define mpres_sqr __ECM(mpres_sqr) void mpres_sqr (mpres_t, const mpres_t, mpmod_t) ATTRIBUTE_HOT; #define mpres_mul_z_to_z __ECM(mpres_mul_z_to_z) void mpres_mul_z_to_z (mpz_t, const mpres_t, const mpz_t, mpmod_t); #define mpres_set_z_for_gcd __ECM(mpres_set_z_for_gcd) void mpres_set_z_for_gcd (mpres_t, const mpz_t, mpmod_t); #define mpres_div_2exp __ECM(mpres_div_2exp) void mpres_div_2exp (mpres_t, const mpres_t, const unsigned int, mpmod_t); #define mpres_add_ui __ECM(mpres_add_ui) void mpres_add_ui (mpres_t, const mpres_t, const unsigned long, mpmod_t); #define mpres_add __ECM(mpres_add) void mpres_add (mpres_t, const mpres_t, const mpres_t, mpmod_t) ATTRIBUTE_HOT; #define mpres_sub_ui __ECM(mpres_sub_ui) void mpres_sub_ui (mpres_t, const mpres_t, const unsigned long, mpmod_t); #define mpres_ui_sub __ECM(mpres_ui_sub) void mpres_ui_sub (mpres_t, const unsigned long, const mpres_t, mpmod_t); #define mpres_sub __ECM(mpres_sub) void mpres_sub (mpres_t, const mpres_t, const mpres_t, mpmod_t) ATTRIBUTE_HOT; #define mpres_set_z __ECM(mpres_set_z) void mpres_set_z (mpres_t, const mpz_t, mpmod_t); #define mpres_get_z __ECM(mpres_get_z) void mpres_get_z (mpz_t, const mpres_t, mpmod_t); #define mpres_set_ui __ECM(mpres_set_ui) void mpres_set_ui (mpres_t, const unsigned long, mpmod_t); #define mpres_set_si __ECM(mpres_set_si) void mpres_set_si (mpres_t, const long, mpmod_t); #define mpres_init __ECM(mpres_init) void mpres_init (mpres_t, const mpmod_t); #define mpres_clear __ECM(mpres_clear) void mpres_clear (mpres_t, const mpmod_t); #define mpres_realloc __ECM(mpres_realloc) void mpres_realloc (mpres_t, const mpmod_t); #define mpres_mul_ui __ECM(mpres_mul_ui) void mpres_mul_ui (mpres_t, const mpres_t, const unsigned long, mpmod_t); #define mpres_mul_2exp __ECM(mpres_mul_2exp) void mpres_mul_2exp (mpres_t, const mpres_t, const unsigned long, mpmod_t); #define mpres_muldivbysomething_si __ECM(mpres_muldivbysomething_si) void mpres_muldivbysomething_si (mpres_t, const mpres_t, const long, mpmod_t); #define mpres_neg __ECM(mpres_neg) void mpres_neg (mpres_t, const mpres_t, mpmod_t); #define mpres_invert __ECM(mpres_invert) int mpres_invert (mpres_t, const mpres_t, mpmod_t); #define mpres_gcd __ECM(mpres_gcd) void mpres_gcd (mpz_t, const mpres_t, const mpmod_t); #define mpres_out_str __ECM(mpres_out_str) void mpres_out_str (FILE *, const unsigned int, const mpres_t, mpmod_t); #define mpres_is_zero __ECM(mpres_is_zero) int mpres_is_zero (const mpres_t, mpmod_t); #define mpres_set(a,b,n) mpz_set (a, b) #define mpres_swap(a,b,n) mpz_swap (a, b) #define mpresn_mul __ECM(mpresn_mul) void mpresn_mul (mpres_t, const mpres_t, const mpres_t, mpmod_t); #define mpresn_addsub __ECM(mpresn_addsub) void mpresn_addsub (mpres_t, mpres_t, const mpres_t, const mpres_t, mpmod_t); #define mpresn_pad __ECM(mpresn_pad) void mpresn_pad (mpres_t R, mpmod_t N); #define mpresn_unpad __ECM(mpresn_unpad) void mpresn_unpad (mpres_t R); #define mpresn_sqr __ECM(mpresn_sqr) void mpresn_sqr (mpres_t, const mpres_t, mpmod_t); #define mpresn_add __ECM(mpresn_add) void mpresn_add (mpres_t, const mpres_t, const mpres_t, mpmod_t); #define mpresn_sub __ECM(mpresn_sub) void mpresn_sub (mpres_t, const mpres_t, const mpres_t, mpmod_t); #define mpresn_mul_1 __ECM(mpresn_mul_ui) void mpresn_mul_1 (mpres_t, const mpres_t, const mp_limb_t, mpmod_t); /* mul_lo.c */ #define ecm_mul_lo_n __ECM(ecm_mul_lo_n) void ecm_mul_lo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); #define ecm_mul_lo_basecase __ECM(ecm_mul_lo_basecase) void ecm_mul_lo_basecase (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); /* median.c */ #define TMulGen __ECM(TMulGen) int TMulGen (listz_t, unsigned int, listz_t, unsigned int, listz_t, unsigned int, listz_t, mpz_t); #define TMulGen_space __ECM(TMulGen_space) unsigned int TMulGen_space (unsigned int, unsigned int, unsigned int); /* schoen_strass.c */ #define DEFAULT 0 #define MONIC 1 #define NOPAD 2 #define F_mul __ECM(F_mul) unsigned int F_mul (mpz_t *, mpz_t *, mpz_t *, unsigned int, int, unsigned int, mpz_t *); #define F_mul_trans __ECM(F_mul_trans) unsigned int F_mul_trans (mpz_t *, mpz_t *, mpz_t *, unsigned int, unsigned int, unsigned int, mpz_t *); #define F_clear __ECM(F_clear) void F_clear (); /* rho.c */ #define rhoinit __ECM(rhoinit) void rhoinit (int, int); #define ecmprob __ECM(ecmprob) double ecmprob (double, double, double, double, int); double pm1prob (double, double, double, double, int, const mpz_t); /* auxlib.c */ #define mpz_add_si __ECM(mpz_add_si) void mpz_add_si (mpz_t, mpz_t, long); #define mpz_sub_si __ECM(mpz_sub_si) void mpz_sub_si (mpz_t, mpz_t, long); #define mpz_divby3_1op __ECM(mpz_divby3_1op) void mpz_divby3_1op (mpz_t); #define double_to_size __ECM(double_to_size) size_t double_to_size (double d); #define cputime __ECM(cputime) long cputime (void); #define realtime __ECM(realtime) long realtime (void); #define elltime __ECM(elltime) long elltime (long, long); #define test_verbose __ECM(test_verbose) int test_verbose (int); #define get_verbose __ECM(get_verbose) int get_verbose (void); #define set_verbose __ECM(set_verbose) void set_verbose (int); #define inc_verbose __ECM(inc_verbose) int inc_verbose (void); #define outputf __ECM(outputf) int outputf (int, char *, ...); #define writechkfile __ECM(writechkfile) void writechkfile (char *, int, double, mpmod_t, mpres_t, mpres_t, mpres_t); /* auxarith.c */ #define gcd __ECM(gcd) unsigned long gcd (unsigned long, unsigned long); #define eulerphi __ECM(eulerphi) unsigned long eulerphi (unsigned long); #define ceil_log2 __ECM(ceil_log2) unsigned int ceil_log2 (unsigned long); #define is_prime __ECM(is_prime) int is_prime (const unsigned long); #define next_prime __ECM(next_prime) unsigned long next_prime (const unsigned long); #define find_factor __ECM(find_factor) unsigned long find_factor (const unsigned long); /* random.c */ #define pp1_random_seed __ECM(pp1_random_seed) void pp1_random_seed (mpz_t, mpz_t, gmp_randstate_t); #define pm1_random_seed __ECM(pm1_random_seed) void pm1_random_seed (mpz_t, mpz_t, gmp_randstate_t); #define get_random_ul __ECM(get_random_ul) unsigned long get_random_ul (void); /* Fgw.c */ #ifdef HAVE_GWNUM int gw_ecm_stage1 (mpz_t, curve *, mpmod_t, double, double *, mpz_t, double, unsigned long, unsigned long, signed long); #endif /* mul_fft.h */ #define mpn_mul_fft __ECM(mpn_mul_fft) int mpn_mul_fft (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int); #define mpn_mul_fft_full __ECM(mpn_mul_fft_full) void mpn_mul_fft_full (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); #define mpn_fft_best_k __ECM(mpn_fft_best_k) int mpn_fft_best_k (mp_size_t, int); #define mpn_fft_next_size __ECM(mpn_fft_next_size) mp_size_t mpn_fft_next_size (mp_size_t, int); /* batch.c */ #define compute_s __ECM(compute_s ) void compute_s (mpz_t, unsigned long); #define write_s_in_file __ECM(write_s_in_file) int write_s_in_file (char *, mpz_t); #define read_s_from_file __ECM(read_s_from_file) void read_s_from_file (mpz_t, char *); #define ecm_stage1_batch __ECM(ecm_stage1_batch) int ecm_stage1_batch (mpz_t, mpres_t, mpres_t, mpmod_t, double, double *, int, mpz_t); /* ellparam_batch.c */ #define get_curve_from_ell_parametrization \ __ECM(get_curve_from_ell_parametrization ) int get_curve_from_ell_parametrization (mpz_t, mpres_t, mpz_t, mpmod_t); /* sets_long.c */ /* A set of long ints */ typedef struct { unsigned long card; long elem[1]; } set_long_t; /* A set of sets of long ints */ typedef struct { unsigned long nr; set_long_t sets[1]; } sets_long_t; #define quicksort_long __ECM(quicksort_long) void quicksort_long (long *, unsigned long); #define sets_print __ECM(sets_print) void sets_print (const int, sets_long_t *); #define sets_max __ECM(sets_max) void sets_max (mpz_t, const unsigned long); #define sets_sumset __ECM(sets_sumset) void sets_sumset (set_long_t *, const sets_long_t *); #define sets_sumset_minmax __ECM(sets_sumset_minmax) void sets_sumset_minmax (mpz_t, const sets_long_t *, const int); #define sets_extract __ECM(sets_extract) void sets_extract (sets_long_t *, size_t *, sets_long_t *, const unsigned long); #define sets_get_factored_sorted __ECM(sets_get_factored_sorted) sets_long_t * sets_get_factored_sorted (const unsigned long); /* Return the size in bytes of a set of cardinality c */ #define set_sizeof __ECM(set_sizeof) ATTRIBUTE_UNUSED static size_t set_sizeof (const unsigned long c) { return sizeof (long) + (size_t) c * sizeof (unsigned long); } /* Return pointer to the next set in "*sets" */ ATTRIBUTE_UNUSED static set_long_t * sets_nextset (const set_long_t *sets) { return (set_long_t *) ((char *)sets + sizeof(unsigned long) + sets->card * sizeof(long)); } #if defined (__cplusplus) } #endif /* a <- b * c where a and b are mpz, c is a double, and t an auxiliary mpz */ /* Not sure how the preprocessor handles shifts by more than the integer width on 32 bit machines, so do the shift by 53 in two pieces */ #if (((ULONG_MAX >> 27) >> 26) >= 1) #define mpz_mul_d(a, b, c, t) \ mpz_mul_ui (a, b, (unsigned long int) c); #else #define mpz_mul_d(a, b, c, t) \ if (c < (double) ULONG_MAX) \ mpz_mul_ui (a, b, (unsigned long int) c); \ else { \ mpz_set_d (t, c); \ mpz_mul (a, b, t); } #endif #endif /* _ECM_IMPL_H */ ecm-6.4.4/mul_lo.c0000644023561000001540000000432412106741273010654 00000000000000/* Low-half short product (quadratic and Mulders' algorithms). Copyright 2003, 2005, 2006 Paul Zimmermann, Alexander Kruppa, Dave Newman. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "ecm-impl.h" /* puts in {rp, n} the low part of {np, n} times {mp, n}, i.e. equivalent to: mp_ptr tp; TMP_DECL(marker); TMP_MARK(marker); tp = TMP_ALLOC_LIMBS (2 * n); mpn_mul_n (tp, np, mp, n); MPN_COPY (rp, tp, n); TMP_FREE(marker); */ void ecm_mul_lo_basecase (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n) { mpn_mul_1 (rp, np, n, mp[0]); for (; --n;) mpn_addmul_1 (++rp, np, n, (++mp)[0]); } #ifdef MPN_MUL_LO_THRESHOLD_TABLE size_t mpn_mul_lo_threshold[MPN_MUL_LO_THRESHOLD] = MPN_MUL_LO_THRESHOLD_TABLE; #else size_t mpn_mul_lo_threshold[MPN_MUL_LO_THRESHOLD]; #endif void ecm_mul_lo_n (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n) { mp_size_t k; if (n < MPN_MUL_LO_THRESHOLD) { switch (k = mpn_mul_lo_threshold[n]) { case 0: { mpn_mul_n (rp, np, mp, n); return; } case 1: { ecm_mul_lo_basecase (rp, np, mp, n); return; } /* else go through */ } } else k = (mp_size_t) (0.75 * (double) n); mpn_mul_n (rp, np, mp, k); rp += k; n -= k; ecm_mul_lo_n (rp + n, np + k, mp, n); mpn_add_n (rp, rp, rp + n, n); ecm_mul_lo_n (rp + n, np, mp + k, n); mpn_add_n (rp, rp, rp + n, n); } ecm-6.4.4/powerpc64/0000755023561000001540000000000012113421641011117 500000000000000ecm-6.4.4/powerpc64/powerpc-defs.m40000644023561000001540000000260412106741272013710 00000000000000divert(-1) dnl m4 macros for PowerPC assembler (32 and 64). dnl Inspired from GMP 4.1.4 dnl Copyright 2000 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or dnl modify it under the terms of the GNU Lesser General Public License as dnl published by the Free Software Foundation; either version 2.1 of the dnl License, or (at your option) any later version. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, dnl but WITHOUT ANY WARRANTY; without even the implied warranty of dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU dnl Lesser General Public License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 59 Temple Place - dnl Suite 330, Boston, MA 02111-1307, USA. dnl Usage: r0 ... r31, cr0 ... cr7 dnl dnl Registers names, either left as "r0" etc or mapped to plain 0 etc, dnl according to the result of GMP_ASM_POWERPC_REGISTERS. define(r0,0) define(r1,1) define(r3,3) define(r4,4) define(r5,5) define(r6,6) define(r7,7) define(r8,8) define(r9,9) define(r10,10) define(r11,11) define(r12,12) define(r13,13) define(r14,14) define(r15,15) define(r16,16) divert ecm-6.4.4/powerpc64/mulredc3.asm0000644023561000001540000002271712113421641013270 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc3(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc3 GLOBL .GSYM_PREFIX`'mulredc3 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc3: .quad .GSYM_PREFIX`'mulredc3, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc3, 24 C Implements multiplication and REDC for two input numbers of 3 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 3] array, having 3+1 8-byte words C The tmp array needs 3+1 entries, but tmp[3] is stored in C r15, so only 3 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'3,`@function') .GSYM_PREFIX`'mulredc3: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 24 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 8(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 16(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 2 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 16(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) stdu r8, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc3, .-.GSYM_PREFIX`'mulredc3 ecm-6.4.4/powerpc64/mulredc.h0000644023561000001540000000462512106741272012661 00000000000000#ifndef __ASM_REDC_H__ #define __ASM_REDC_H__ #include /* Signals that we have assembly code for variable size redc */ #define HAVE_ASM_REDC3 extern void ecm_redc3(mp_limb_t *, const mp_limb_t *, mp_size_t, mp_limb_t); /* WARNING: the size-1 version doesn't take pointers in input */ extern mp_limb_t mulredc1(mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t); extern mp_limb_t mulredc2(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc3(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc4(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc5(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc6(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc7(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc8(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc9(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc10(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc11(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc12(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc13(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc14(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc15(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc16(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc17(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc18(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc19(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); extern mp_limb_t mulredc20(mp_limb_t *, const mp_limb_t *, const mp_limb_t *, const mp_limb_t *, mp_limb_t); #endif ecm-6.4.4/powerpc64/mulredc14.asm0000644023561000001540000006216512113421641013353 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc14(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc14 GLOBL .GSYM_PREFIX`'mulredc14 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc14: .quad .GSYM_PREFIX`'mulredc14, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc14, 24 C Implements multiplication and REDC for two input numbers of 14 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 14] array, having 14+1 8-byte words C The tmp array needs 14+1 entries, but tmp[14] is stored in C r15, so only 14 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'14,`@function') .GSYM_PREFIX`'mulredc14: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 112 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 5 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 6 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 7 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 8 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 9 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 10 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 11 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 12 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 104(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 88(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 13. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 104(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 96(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 104(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 13 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4 ld r14, 40(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 5 ld r14, 48(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 6 ld r14, 56(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 7 ld r14, 64(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 8 ld r14, 72(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 9 ld r14, 80(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 10 ld r14, 88(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 11 ld r14, 96(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 12 ld r14, 104(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 104(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 88(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 13. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 104(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 96(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 104(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc14, .-.GSYM_PREFIX`'mulredc14 ecm-6.4.4/powerpc64/mulredc5.asm0000644023561000001540000003045512113421641013270 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc5(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc5 GLOBL .GSYM_PREFIX`'mulredc5 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc5: .quad .GSYM_PREFIX`'mulredc5, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc5, 24 C Implements multiplication and REDC for two input numbers of 5 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 5] array, having 5+1 8-byte words C The tmp array needs 5+1 entries, but tmp[5] is stored in C r15, so only 5 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'5,`@function') .GSYM_PREFIX`'mulredc5: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 40 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 24(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 32(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 4 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 32(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) stdu r8, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc5, .-.GSYM_PREFIX`'mulredc5 ecm-6.4.4/powerpc64/Makefile.dev0000755023561000001540000000176712106741272013301 00000000000000.PHONY: all all: test_mulredc bench CFLAGS:=-m64 -mcpu=970 -O3 ALLMULRED:= mulredc1.o mulredc2.o mulredc3.o mulredc4.o mulredc5.o\ mulredc6.o mulredc7.o mulredc8.o mulredc9.o mulredc10.o\ mulredc11.o mulredc12.o mulredc13.o mulredc14.o\ mulredc15.o mulredc16.o mulredc17.o mulredc18.o\ mulredc19.o mulredc20.o redc.s: redc.asm m4 -I../ redc.asm > redc.s redc.o: redc.s gcc -c $(CFLAGS) redc.s -o redc.o mulredc%.o: mulredc%.asm m4 $< > tmp-mulred.s gcc -c $(CFLAGS) tmp-mulred.s -o $@ rm tmp-mulred.s mulredc1.asm: ./mulredc_1_2.m4 m4 -DLENGTH=1 $< > $@ mulredc2.asm: ./mulredc_1_2.m4 m4 -DLENGTH=2 $< > $@ mulredc%.asm: ./mulredc.m4 m4 -DLENGTH=$* $< > $@ test_mulredc: test_mulredc.c redc.o $(ALLMULRED) gcc -o test_mulredc $(CFLAGS) test_mulredc.c $(ALLMULRED) redc.o -lgmp bench: bench.c redc.o $(ALLMULRED) gcc -o bench $(CFLAGS) bench.c $(ALLMULRED) redc.o -lgmp clean: rm redc.s *.o mulredc[0-9]*.s mulredc[0-9]*.asm test_mulredc ecm-6.4.4/powerpc64/generate_all0000755023561000001540000000030012106741272013407 00000000000000#!/bin/sh for i in 1 2; do m4 -DLENGTH=$i mulredc_1_2.m4 > mulredc$i.asm done for i in 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do m4 -DLENGTH=$i mulredc.m4 > mulredc$i.asm done ecm-6.4.4/powerpc64/mulredc6.asm0000644023561000001540000003333412113421641013270 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc6(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc6 GLOBL .GSYM_PREFIX`'mulredc6 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc6: .quad .GSYM_PREFIX`'mulredc6, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc6, 24 C Implements multiplication and REDC for two input numbers of 6 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 6] array, having 6+1 8-byte words C The tmp array needs 6+1 entries, but tmp[6] is stored in C r15, so only 6 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'6,`@function') .GSYM_PREFIX`'mulredc6: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 48 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 5. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 32(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 40(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 5 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4 ld r14, 40(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 5. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 32(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 40(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc6, .-.GSYM_PREFIX`'mulredc6 ecm-6.4.4/powerpc64/mulredc2.asm0000644023561000001540000001107612113421641013263 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc2(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc2 GLOBL .GSYM_PREFIX`'mulredc2 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc2: .quad .GSYM_PREFIX`'mulredc2, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc2, 24 TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'2,`@function') .GSYM_PREFIX`'mulredc2: ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result zero mulld r8, r0, r12 C x[0]*y[1] low half adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[0]*y[1] high half ld r0, 8(r6) C m[1] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[1] low adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulhdu r9, r0, r11 C U*m[1] high ldu r12, 8(r4) C x[1] ld r0, 0(r5) C y[0] addc r13, r8, r13 C add T0 and low word mulld r8, r0, r12 C x[1]*y[0] low half adde r14, r9, r14 C add high word with carry to T1 addze r15, r16 C put carry in r15 (tmp[len] <= 1) mulhdu r9, r0, r12 C x[1]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending mulld r8, r0, r12 C x[1]*y[1] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[1]*y[1] high half ld r0, 8(r6) C m[1] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[1] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[1] high addc r8, r8, r13 C add T0 and low word adde r9, r9, r14 C T1, carry pending std r8, 0(r3) C copy result to z stdu r9, 8(r3) addze r3, r10 C return tmp(len) ld r16, 0(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc2, .-.GSYM_PREFIX`'mulredc2 ecm-6.4.4/powerpc64/mulredc.m40000755023561000001540000002543212110743510012744 00000000000000`dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ******************************************************************************' dnl Use `C' to remove comments in .asm -> .s conversion. dnl Copied from GMP 4.2. `define(C, ` dnl')' C mp_limb_t mulredc`'LENGTH`'(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 divert(-1) dnl forloop(i, from, to, stmt) define(`forloop', `pushdef(`$1', `$2')_forloop(`$1', `$2', `$3', `$4')popdef(`$1')') define(`_forloop', `$4`'ifelse($1, `$3', , `define(`$1', incr($1))_forloop(`$1', `$2', `$3', `$4')')') divert `include(`config.m4')' GLOBL GSYM_PREFIX``''mulredc`'LENGTH GLOBL .GSYM_PREFIX``''mulredc`'LENGTH .section ".opd", "aw" .align 3 GSYM_PREFIX``''mulredc`'LENGTH: .quad .GSYM_PREFIX``''mulredc`'LENGTH, .TOC.@tocbase, 0 .size GSYM_PREFIX``''mulredc`'LENGTH, 24 C Implements multiplication and REDC for two input numbers of LENGTH words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... LENGTH] array, having LENGTH+1 8-byte words C The tmp array needs LENGTH+1 entries, but tmp[LENGTH] is stored in C r15, so only LENGTH entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX``''mulredc``''LENGTH,``@function'') .GSYM_PREFIX``''mulredc`'LENGTH: define(`S', `eval(8 * LENGTH)')dnl C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, S C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 forloop(`UNROLL', 1, eval(LENGTH - 2), `dnl define(`J', `eval(8 * UNROLL)')dnl define(`J8', `eval(J + 8)')dnl define(`JM8', `eval(J - 8)')dnl C Pass for j = UNROLL mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, J`'(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, J8`'(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, JM8`'(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 ')dnl end forloop C Pass for j = eval(LENGTH - 1). Don't fetch new data from y[j+1]. define(`J', `eval(8*LENGTH - 8)')dnl define(`JM8', `eval(J - 8)')dnl mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, J`'(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, JM8`'(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, J`'(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### define(`LM1', `eval(LENGTH - 1)')dnl li r9, LM1 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 forloop(`UNROLL', 1, eval(LENGTH - 2), `dnl define(`J', `eval(8 * UNROLL)')dnl define(`J8', `eval(J + 8)')dnl define(`JM8', `eval(J - 8)')dnl C Pass for j = UNROLL ld r14, J8`'(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, J`'(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, J8`'(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, JM8`'(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 ')dnl end forloop C Pass for j = eval(LENGTH - 1). Don't fetch new data from y[j+1]. define(`J', `eval(8*LENGTH - 8)')dnl define(`JM8', `eval(J - 8)')dnl mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, J`'(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, JM8`'(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, J`'(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z dnl ==== THIS LOOP WILL NOT WORK FOR LENGTH <= 1 ==== forloop(`UNROLL', 0, eval(LENGTH / 2 - 1), `dnl define(`J', `eval(UNROLL)')dnl ifelse(J, `0', dnl ` ld r8, 0(r1)', dnl ` ldu r8, 8(r1)') ldu r9, 8(r1) ifelse(J, `0', dnl ` std r8, 0(r3)', dnl ` stdu r8, 8(r3)') stdu r9, 8(r3) ')dnl ifelse(eval(LENGTH % 2), 1, `dnl ldu r8, 8(r1) stdu r8, 8(r3) ')dnl mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX``''mulredc`'LENGTH, .-.GSYM_PREFIX``''mulredc`'LENGTH ecm-6.4.4/powerpc64/mulredc20.asm0000644023561000001540000010330512113421641013340 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc20(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc20 GLOBL .GSYM_PREFIX`'mulredc20 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc20: .quad .GSYM_PREFIX`'mulredc20, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc20, 24 C Implements multiplication and REDC for two input numbers of 20 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 20] array, having 20+1 8-byte words C The tmp array needs 20+1 entries, but tmp[20] is stored in C r15, so only 20 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'20,`@function') .GSYM_PREFIX`'mulredc20: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 160 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 5 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 6 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 7 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 8 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 9 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 10 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 11 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 12 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 104(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 88(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 13 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 104(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 112(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 96(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 14 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 112(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 120(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 104(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 15 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 120(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 128(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 112(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 16 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 128(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 136(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 120(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 17 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 136(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 144(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 128(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 18 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 144(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 152(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 136(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 19. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 152(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 144(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 152(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 19 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4 ld r14, 40(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 5 ld r14, 48(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 6 ld r14, 56(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 7 ld r14, 64(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 8 ld r14, 72(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 9 ld r14, 80(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 10 ld r14, 88(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 11 ld r14, 96(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 12 ld r14, 104(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 104(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 88(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 13 ld r14, 112(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 104(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 112(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 96(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 14 ld r14, 120(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 112(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 120(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 104(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 15 ld r14, 128(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 120(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 128(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 112(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 16 ld r14, 136(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 128(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 136(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 120(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 17 ld r14, 144(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 136(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 144(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 128(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 18 ld r14, 152(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 144(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 152(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 136(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 19. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 152(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 144(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 152(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc20, .-.GSYM_PREFIX`'mulredc20 ecm-6.4.4/powerpc64/mulredc17.asm0000644023561000001540000007263512113421641013361 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc17(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc17 GLOBL .GSYM_PREFIX`'mulredc17 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc17: .quad .GSYM_PREFIX`'mulredc17, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc17, 24 C Implements multiplication and REDC for two input numbers of 17 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 17] array, having 17+1 8-byte words C The tmp array needs 17+1 entries, but tmp[17] is stored in C r15, so only 17 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'17,`@function') .GSYM_PREFIX`'mulredc17: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 136 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 5 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 6 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 7 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 8 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 9 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 10 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 11 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 12 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 104(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 88(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 13 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 104(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 112(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 96(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 14 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 112(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 120(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 104(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 15 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 120(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 128(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 112(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 16. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 128(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 120(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 128(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 16 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4 ld r14, 40(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 5 ld r14, 48(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 6 ld r14, 56(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 7 ld r14, 64(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 8 ld r14, 72(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 9 ld r14, 80(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 10 ld r14, 88(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 11 ld r14, 96(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 12 ld r14, 104(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 104(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 88(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 13 ld r14, 112(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 104(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 112(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 96(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 14 ld r14, 120(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 112(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 120(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 104(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 15 ld r14, 128(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 120(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 128(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 112(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 16. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 128(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 120(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 128(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) stdu r8, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc17, .-.GSYM_PREFIX`'mulredc17 ecm-6.4.4/powerpc64/redc.asm0000755023561000001540000003076312110743510012471 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** dnl dnl void ecm_redc3(mp_limb_t * c, const mp_limb_t * m, size_t n, mp_limb_t m_inv) dnl dnl input arguments: dnl dnl r3: ptr to c[0], the least significant word of the number to be reduced dnl c[0 ... 2*n-1] is of length 2*n words dnl r4: ptr to m[0], the least significant word of the modulus m of length n dnl r5: the length n dnl r6: m_inv = -1/m mod 2^64 dnl dnl the residue (before adding the word carries) will be in c[n ... 2*n-1]. dnl c[0 ... n-1] will contain the high word carries from each inner loop pass. dnl These carry words are added by the calling routine to obtain the final dnl residue. dnl Use `C' to remove comments in .asm -> .s conversion. dnl Copied from GMP 4.2. define(C, ` dnl') include(`config.m4') GLOBL GSYM_PREFIX`'ecm_redc3 GLOBL .GSYM_PREFIX`'ecm_redc3 .section ".opd", "aw" .align 3 GSYM_PREFIX`'ecm_redc3: .quad .GSYM_PREFIX`'ecm_redc3, .TOC.@tocbase, 0 .size GSYM_PREFIX`'ecm_redc3, 24 TEXT .align 5 C 32 byte alignment TYPE(.GSYM_PREFIX`'ecm_redc3,`@function') .GSYM_PREFIX`'ecm_redc3: cmpdi r5, 1 C length = 1? bne 1f ld r12, 0(r3) C c[0] ld r0, 0(r4) C m[0] mulld r7, r6, r12 C u = c[0] * m_inv mod 2^64 mulld r11, r0, r7 C m[0]*u low mulhdu r10, r0, r7 C m[0]*u high addc r11, r11, r12 C c[0] + m[0]*u low = 0 addze r10, r10 C carry to high half std r10, 0(r3) C store the "carry" word blr nop nop nop nop nop 1: mflr r0 C save return addr stdu r0, -8(r1) C on the stack stdu r13, -8(r1) C save r13 dnl dnl get inner loop count and jump offset dnl subi r7, r5, 2 C r7 = n - 2 andi. r8, r7, 15 C r8 = (n - 2) mod 16 sldi r8, r8, 5 C r8 * 32 = byte offset srdi r7, r7, 4 C int((n - 2)/16) dnl dnl compute the address of inner loop end and subtract the offset dnl bl nxt C put the address of the next instruction C into the link register nxt: C mflr r9 C r9 = address of this instruction addi r9, r9, 640 C add offset to v_1 from nxt C WARNING: any changes to the code between C the labels "nxt" and "v_1" may require C recomputation of the offset above. sub r9, r9, r8 C offset back to desired starting point mtlr r9 C and now we can branch directly to our target mtctr r5 C outer loop count n addi r13, r7, 1 C inner loop counter nop nop OuterLoop: C execute n times dnl compute u, set addr's ld r12, 0(r3) C c[0] mr r8, r4 C r8 = working copy of m address ld r0, 0(r8) C m[0] mulld r7, r6, r12 C u = c[0] * m_inv mod 2^64 mfctr r5 C save current outer loop count dnl start inner mulld r11, r0, r7 C m[0]*u low mtctr r13 C inner loop count mulhdu r10, r0, r7 C m[0]*u high ldu r0, 8(r8) C m[1] addc r11, r11, r12 C m[0]*u low + c[0] (don't bother storing zero) mulld r11, r0, r7 C m[1]*u low ldu r12, 8(r3) C c[1], update c address mr r9, r3 C r9 = working copy of c addr mulhdu r0, r0, r7 C m[1]*u high adde r11, r10, r11 C m[1]*u low + m[0]*u high + cy addze r10, r0 C m[1]*u high + cy blr C jump to start of the (n-2) mod 16 section C (or to v_1, if (n-2) mod 16 = 0) nop nop nop nop nop nop nop ILoop: ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy dnl dnl start (n-2) mod 16 = 15 dnl ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy dnl dnl start (n-2) mod 16 = 14 dnl ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy dnl dnl start (n-2) mod 16 = 13 dnl ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy dnl dnl start (n-2) mod 16 = 12 dnl ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy dnl dnl start (n-2) mod 16 = 11 dnl ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy dnl dnl start (n-2) mod 16 = 10 dnl ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy dnl dnl start (n-2) mod 16 = 9 dnl ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy dnl dnl start (n-2) mod 16 = 8 dnl ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy dnl dnl start (n-2) mod 16 = 7 dnl ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy dnl dnl start (n-2) mod 16 = 6 dnl ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy dnl dnl start (n-2) mod 16 = 5 dnl ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy dnl dnl start (n-2) mod 16 = 4 dnl ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy dnl dnl start (n-2) mod 16 = 3 dnl ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy dnl dnl start (n-2) mod 16 = 2 dnl ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy dnl dnl start (n-2) mod 16 = 1 dnl ldu r0, 8(r8) C m[i] addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] mulld r11, r0, r7 C m[i]*u low ldu r12, 8(r9) C c[i] mulhdu r0, r0, r7 C m[i]*u high adde r11, r10, r11 C m[i]*u low + m[i-1]*u high + cy addze r10, r0 C r10 = m[i]*u + cy v_1: bdnz ILoop C blr above jumps directly to this bdnz instruction C when (n-2) mod 16 = 0 dnl finish inner addc r11, r11, r12 C m[i-1]*u low + m[i-2]*u high + c[i-1] std r11, 0(r9) C store it in c[i-1] addze r10, r10 C result cy = 0 always std r10, -8(r3) C store the "carry" word mtctr r5 C restore outer loop count bdnz OuterLoop ld r13, 0(r1) C restore r13 ld r0, 8(r1) C original return address addi r1, r1, 16 C restore stack ptr mtlr r0 blr .size .GSYM_PREFIX`'ecm_redc3, .-.GSYM_PREFIX`'ecm_redc3 ecm-6.4.4/powerpc64/mulredc18.asm0000644023561000001540000007552512113421641013363 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc18(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc18 GLOBL .GSYM_PREFIX`'mulredc18 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc18: .quad .GSYM_PREFIX`'mulredc18, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc18, 24 C Implements multiplication and REDC for two input numbers of 18 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 18] array, having 18+1 8-byte words C The tmp array needs 18+1 entries, but tmp[18] is stored in C r15, so only 18 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'18,`@function') .GSYM_PREFIX`'mulredc18: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 144 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 5 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 6 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 7 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 8 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 9 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 10 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 11 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 12 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 104(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 88(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 13 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 104(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 112(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 96(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 14 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 112(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 120(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 104(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 15 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 120(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 128(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 112(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 16 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 128(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 136(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 120(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 17. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 136(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 128(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 136(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 17 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4 ld r14, 40(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 5 ld r14, 48(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 6 ld r14, 56(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 7 ld r14, 64(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 8 ld r14, 72(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 9 ld r14, 80(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 10 ld r14, 88(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 11 ld r14, 96(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 12 ld r14, 104(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 104(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 88(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 13 ld r14, 112(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 104(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 112(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 96(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 14 ld r14, 120(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 112(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 120(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 104(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 15 ld r14, 128(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 120(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 128(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 112(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 16 ld r14, 136(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 128(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 136(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 120(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 17. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 136(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 128(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 136(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc18, .-.GSYM_PREFIX`'mulredc18 ecm-6.4.4/powerpc64/mulredc19.asm0000644023561000001540000010041512113421641013347 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc19(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc19 GLOBL .GSYM_PREFIX`'mulredc19 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc19: .quad .GSYM_PREFIX`'mulredc19, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc19, 24 C Implements multiplication and REDC for two input numbers of 19 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 19] array, having 19+1 8-byte words C The tmp array needs 19+1 entries, but tmp[19] is stored in C r15, so only 19 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'19,`@function') .GSYM_PREFIX`'mulredc19: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 152 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 5 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 6 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 7 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 8 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 9 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 10 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 11 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 12 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 104(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 88(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 13 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 104(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 112(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 96(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 14 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 112(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 120(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 104(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 15 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 120(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 128(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 112(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 16 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 128(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 136(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 120(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 17 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 136(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 144(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 128(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 18. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 144(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 136(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 144(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 18 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4 ld r14, 40(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 5 ld r14, 48(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 6 ld r14, 56(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 7 ld r14, 64(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 8 ld r14, 72(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 9 ld r14, 80(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 10 ld r14, 88(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 11 ld r14, 96(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 12 ld r14, 104(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 104(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 88(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 13 ld r14, 112(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 104(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 112(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 96(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 14 ld r14, 120(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 112(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 120(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 104(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 15 ld r14, 128(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 120(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 128(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 112(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 16 ld r14, 136(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 128(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 136(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 120(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 17 ld r14, 144(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 136(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 144(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 128(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 18. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 144(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 136(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 144(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) stdu r8, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc19, .-.GSYM_PREFIX`'mulredc19 ecm-6.4.4/powerpc64/mulredc9.asm0000644023561000001540000004375112113421641013277 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc9(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc9 GLOBL .GSYM_PREFIX`'mulredc9 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc9: .quad .GSYM_PREFIX`'mulredc9, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc9, 24 C Implements multiplication and REDC for two input numbers of 9 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 9] array, having 9+1 8-byte words C The tmp array needs 9+1 entries, but tmp[9] is stored in C r15, so only 9 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'9,`@function') .GSYM_PREFIX`'mulredc9: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 72 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 5 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 6 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 7 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 8. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 56(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 64(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 8 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4 ld r14, 40(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 5 ld r14, 48(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 6 ld r14, 56(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 7 ld r14, 64(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 8. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 56(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 64(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) stdu r8, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc9, .-.GSYM_PREFIX`'mulredc9 ecm-6.4.4/powerpc64/mulredc13.asm0000644023561000001540000005727512113421641013360 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc13(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc13 GLOBL .GSYM_PREFIX`'mulredc13 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc13: .quad .GSYM_PREFIX`'mulredc13, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc13, 24 C Implements multiplication and REDC for two input numbers of 13 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 13] array, having 13+1 8-byte words C The tmp array needs 13+1 entries, but tmp[13] is stored in C r15, so only 13 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'13,`@function') .GSYM_PREFIX`'mulredc13: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 104 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 5 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 6 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 7 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 8 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 9 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 10 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 11 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 12. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 88(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 96(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 12 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4 ld r14, 40(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 5 ld r14, 48(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 6 ld r14, 56(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 7 ld r14, 64(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 8 ld r14, 72(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 9 ld r14, 80(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 10 ld r14, 88(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 11 ld r14, 96(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 12. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 88(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 96(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) stdu r8, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc13, .-.GSYM_PREFIX`'mulredc13 ecm-6.4.4/powerpc64/mulredc12.asm0000644023561000001540000005441312113421641013346 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc12(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc12 GLOBL .GSYM_PREFIX`'mulredc12 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc12: .quad .GSYM_PREFIX`'mulredc12, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc12, 24 C Implements multiplication and REDC for two input numbers of 12 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 12] array, having 12+1 8-byte words C The tmp array needs 12+1 entries, but tmp[12] is stored in C r15, so only 12 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'12,`@function') .GSYM_PREFIX`'mulredc12: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 96 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 5 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 6 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 7 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 8 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 9 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 10 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 11. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 80(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 88(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 11 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4 ld r14, 40(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 5 ld r14, 48(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 6 ld r14, 56(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 7 ld r14, 64(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 8 ld r14, 72(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 9 ld r14, 80(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 10 ld r14, 88(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 11. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 80(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 88(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc12, .-.GSYM_PREFIX`'mulredc12 ecm-6.4.4/powerpc64/Makefile.in0000644023561000001540000003746112113353770013126 00000000000000# Makefile.in generated by automake 1.11.3 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software # Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = powerpc64 DIST_COMMON = README $(noinst_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.in am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = LTLIBRARIES = $(noinst_LTLIBRARIES) libmulredc_la_LIBADD = am__objects_1 = mulredc1.lo mulredc2.lo mulredc3.lo mulredc4.lo \ mulredc5.lo mulredc6.lo mulredc7.lo mulredc8.lo mulredc9.lo \ mulredc10.lo mulredc11.lo mulredc12.lo mulredc13.lo \ mulredc14.lo mulredc15.lo mulredc16.lo mulredc17.lo \ mulredc18.lo mulredc19.lo mulredc20.lo am_libmulredc_la_OBJECTS = $(am__objects_1) redc.lo libmulredc_la_OBJECTS = $(am_libmulredc_la_OBJECTS) DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) CCASCOMPILE = $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS) LTCCASCOMPILE = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=compile $(CCAS) $(AM_CCASFLAGS) $(CCASFLAGS) COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) CCLD = $(CC) LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ SOURCES = $(libmulredc_la_SOURCES) DIST_SOURCES = $(libmulredc_la_SOURCES) HEADERS = $(noinst_HEADERS) ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ ALLOCA = @ALLOCA@ AMTAR = @AMTAR@ AR = @AR@ ASMPATH = @ASMPATH@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCAS = @CCAS@ CCASDEPMODE = @CCASDEPMODE@ CCASFLAGS = @CCASFLAGS@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ DLLTOOL = @DLLTOOL@ DSYMUTIL = @DSYMUTIL@ DUMPBIN = @DUMPBIN@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ FGREP = @FGREP@ GMPLIB = @GMPLIB@ GREP = @GREP@ GSL_LD_FLAGS = @GSL_LD_FLAGS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LD = @LD@ LDFLAGS = LIBOBJS = @LIBOBJS@ # The asm code does not depend on any libraries except libc for abort() # if assertions are enabled LIBS = LIBTOOL = @LIBTOOL@ LIPO = @LIPO@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ M4 = @M4@ MAKEINFO = @MAKEINFO@ MANIFEST_TOOL = @MANIFEST_TOOL@ MKDIR_P = @MKDIR_P@ NM = @NM@ NMEDIT = @NMEDIT@ OBJDUMP = @OBJDUMP@ OBJEXT = @OBJEXT@ OPENMP_CFLAGS = @OPENMP_CFLAGS@ OTOOL = @OTOOL@ OTOOL64 = @OTOOL64@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ POW_LIB = @POW_LIB@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ STRIP = @STRIP@ VALGRIND = @VALGRIND@ VERSION = @VERSION@ XSLDIR = @XSLDIR@ XSLTPROC = @XSLTPROC@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_AR = @ac_ct_AR@ ac_ct_CC = @ac_ct_CC@ ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ MULREDC = mulredc1.asm mulredc2.asm mulredc3.asm mulredc4.asm mulredc5.asm \ mulredc6.asm mulredc7.asm mulredc8.asm mulredc9.asm mulredc10.asm \ mulredc11.asm mulredc12.asm mulredc13.asm mulredc14.asm \ mulredc15.asm mulredc16.asm mulredc17.asm mulredc18.asm \ mulredc19.asm mulredc20.asm EXTRA_DIST = Makefile.dev README mulredc_1_2.m4 mulredc.m4 generate_all \ powerpc-defs.m4 noinst_LTLIBRARIES = libmulredc.la noinst_HEADERS = mulredc.h # This library definition also causes the mulredc[n].asm and redc.asm files # to go in the distribution - no need for having them in EXTRA_DIST libmulredc_la_SOURCES = $(MULREDC) redc.asm # It's actually the .s files that depend on config.m4, but automake # knows them only as intermediate files, not as targets. Adding the # dependency to libmulredc.la should work so long as no stale .s # files exist. libmulredc_la_DEPENDENCIES = $(top_builddir)/config.m4 all: all-am .SUFFIXES: .SUFFIXES: .asm .lo .o .obj .s $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu powerpc64/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu powerpc64/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): clean-noinstLTLIBRARIES: -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ test "$$dir" != "$$p" || dir=.; \ echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done libmulredc.la: $(libmulredc_la_OBJECTS) $(libmulredc_la_DEPENDENCIES) $(EXTRA_libmulredc_la_DEPENDENCIES) $(LINK) $(libmulredc_la_OBJECTS) $(libmulredc_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c .s.o: $(CCASCOMPILE) -c -o $@ $< .s.obj: $(CCASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .s.lo: $(LTCCASCOMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ mkid -fID $$unique tags: TAGS TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) set x; \ here=`pwd`; \ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: CTAGS CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in files) print i; }; }'`; \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(LTLIBRARIES) $(HEADERS) installdirs: install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: .MAKE: install-am install-strip .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ clean-libtool clean-noinstLTLIBRARIES ctags distclean \ distclean-compile distclean-generic distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ install-strip installcheck installcheck-am installdirs \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ pdf pdf-am ps ps-am tags uninstall uninstall-am # Rules for generating the .asm files from the .m4 scripts mulredc1.asm: mulredc_1_2.m4 $(M4) -DLENGTH=1 $< > $@ mulredc2.asm: mulredc_1_2.m4 $(M4) -DLENGTH=2 $< > $@ mulredc3.asm: mulredc.m4 $(M4) -DLENGTH=3 $< > $@ mulredc4.asm: mulredc.m4 $(M4) -DLENGTH=4 $< > $@ mulredc5.asm: mulredc.m4 $(M4) -DLENGTH=5 $< > $@ mulredc6.asm: mulredc.m4 $(M4) -DLENGTH=6 $< > $@ mulredc7.asm: mulredc.m4 $(M4) -DLENGTH=7 $< > $@ mulredc8.asm: mulredc.m4 $(M4) -DLENGTH=8 $< > $@ mulredc9.asm: mulredc.m4 $(M4) -DLENGTH=9 $< > $@ mulredc10.asm: mulredc.m4 $(M4) -DLENGTH=10 $< > $@ mulredc11.asm: mulredc.m4 $(M4) -DLENGTH=11 $< > $@ mulredc12.asm: mulredc.m4 $(M4) -DLENGTH=12 $< > $@ mulredc13.asm: mulredc.m4 $(M4) -DLENGTH=13 $< > $@ mulredc14.asm: mulredc.m4 $(M4) -DLENGTH=14 $< > $@ mulredc15.asm: mulredc.m4 $(M4) -DLENGTH=15 $< > $@ mulredc16.asm: mulredc.m4 $(M4) -DLENGTH=16 $< > $@ mulredc17.asm: mulredc.m4 $(M4) -DLENGTH=17 $< > $@ mulredc18.asm: mulredc.m4 $(M4) -DLENGTH=18 $< > $@ mulredc19.asm: mulredc.m4 $(M4) -DLENGTH=19 $< > $@ mulredc20.asm: mulredc.m4 $(M4) -DLENGTH=20 $< > $@ .asm.s: $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.s #.asm.S: # $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.S # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: ecm-6.4.4/powerpc64/mulredc16.asm0000644023561000001540000006774512113421641013366 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc16(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc16 GLOBL .GSYM_PREFIX`'mulredc16 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc16: .quad .GSYM_PREFIX`'mulredc16, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc16, 24 C Implements multiplication and REDC for two input numbers of 16 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 16] array, having 16+1 8-byte words C The tmp array needs 16+1 entries, but tmp[16] is stored in C r15, so only 16 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'16,`@function') .GSYM_PREFIX`'mulredc16: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 128 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 5 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 6 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 7 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 8 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 9 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 10 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 11 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 12 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 104(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 88(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 13 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 104(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 112(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 96(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 14 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 112(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 120(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 104(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 15. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 120(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 112(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 120(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 15 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4 ld r14, 40(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 5 ld r14, 48(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 6 ld r14, 56(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 7 ld r14, 64(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 8 ld r14, 72(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 9 ld r14, 80(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 10 ld r14, 88(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 11 ld r14, 96(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 12 ld r14, 104(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 104(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 88(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 13 ld r14, 112(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 104(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 112(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 96(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 14 ld r14, 120(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 112(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 120(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 104(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 15. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 120(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 112(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 120(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc16, .-.GSYM_PREFIX`'mulredc16 ecm-6.4.4/powerpc64/mulredc1.asm0000644023561000001540000000411712113421641013260 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc1(mp_limb_t * z, const mp_limb_t x, const mp_limb_t y, C const mp_limb_t m, mp_limb_t inv_m); C C arguments: C r3 : ptr to result z C r4 : input x C r5 : input y C r6 : modulus m' C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc1 GLOBL .GSYM_PREFIX`'mulredc1 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc1: .quad .GSYM_PREFIX`'mulredc1, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc1, 24 TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'1,`@function') .GSYM_PREFIX`'mulredc1: mulld r8, r4, r5 C x*y low half T0 mulhdu r9, r4, r5 C x*y high half T1 mulld r0, r7, r8 C u = t0 * invm mulld r10, r0, r6 C u*m low mulhdu r11, r0, r6 C u*m high addc r8, r8, r10 C x*y + u*m low (= zero) adde r9, r9, r11 C result std r9, 0(r3) C store in z addze r3, r8 C return carry blr .size .GSYM_PREFIX`'mulredc1, .-.GSYM_PREFIX`'mulredc1 ecm-6.4.4/powerpc64/mulredc_1_2.m40000755023561000001540000001317712110743510013410 00000000000000`dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ******************************************************************************' dnl Use `C' to remove comments in .asm -> .s conversion. dnl Copied from GMP 4.2. `define(C, ` dnl')' ifelse(eval(LENGTH),1, C mp_limb_t mulredc1(mp_limb_t * z, const mp_limb_t x, const mp_limb_t y, C const mp_limb_t m, mp_limb_t inv_m); C C arguments: C r3 : ptr to result z C r4 : input x C r5 : input y C r6 : modulus m', `C mp_limb_t mulredc'LENGTH`(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb') C r7 = -1/m mod 2^64 C C final carry returned in r3 divert(-1) dnl forloop(i, from, to, stmt) define(`forloop', `pushdef(`$1', `$2')_forloop(`$1', `$2', `$3', `$4')popdef(`$1')') define(`_forloop', `$4`'ifelse($1, `$3', , `define(`$1', incr($1))_forloop(`$1', `$2', `$3', `$4')')') divert `include(`config.m4')' GLOBL GSYM_PREFIX``''mulredc`'LENGTH GLOBL .GSYM_PREFIX``''mulredc`'LENGTH .section ".opd", "aw" .align 3 GSYM_PREFIX``''mulredc`'LENGTH: .quad .GSYM_PREFIX``''mulredc`'LENGTH, .TOC.@tocbase, 0 .size GSYM_PREFIX``''mulredc`'LENGTH, 24 TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX``''mulredc``''LENGTH,``@function'') .GSYM_PREFIX``''mulredc`'LENGTH: ifelse(eval(LENGTH),1, ` mulld r8, r4, r5 C x*y low half T0 mulhdu r9, r4, r5 C x*y high half T1 mulld r0, r7, r8 C u = t0 * invm mulld r10, r0, r6 C u*m low mulhdu r11, r0, r6 C u*m high addc r8, r8, r10 C x*y + u*m low (= zero) adde r9, r9, r11 C result std r9, 0(r3) C store in z addze r3, r8 C return carry blr', eval(LENGTH),2, ` ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result zero mulld r8, r0, r12 C x[0]*y[1] low half adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[0]*y[1] high half ld r0, 8(r6) C m[1] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[1] low adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulhdu r9, r0, r11 C U*m[1] high ldu r12, 8(r4) C x[1] ld r0, 0(r5) C y[0] addc r13, r8, r13 C add T0 and low word mulld r8, r0, r12 C x[1]*y[0] low half adde r14, r9, r14 C add high word with carry to T1 addze r15, r16 C put carry in r15 (tmp[len] <= 1) mulhdu r9, r0, r12 C x[1]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending mulld r8, r0, r12 C x[1]*y[1] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[1]*y[1] high half ld r0, 8(r6) C m[1] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[1] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[1] high addc r8, r8, r13 C add T0 and low word adde r9, r9, r14 C T1, carry pending std r8, 0(r3) C copy result to z stdu r9, 8(r3) addze r3, r10 C return tmp(len) ld r16, 0(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr') .size .GSYM_PREFIX``''mulredc`'LENGTH, .-.GSYM_PREFIX``''mulredc`'LENGTH ecm-6.4.4/powerpc64/mulredc15.asm0000644023561000001540000006505512113421641013355 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc15(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc15 GLOBL .GSYM_PREFIX`'mulredc15 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc15: .quad .GSYM_PREFIX`'mulredc15, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc15, 24 C Implements multiplication and REDC for two input numbers of 15 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 15] array, having 15+1 8-byte words C The tmp array needs 15+1 entries, but tmp[15] is stored in C r15, so only 15 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'15,`@function') .GSYM_PREFIX`'mulredc15: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 120 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 5 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 6 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 7 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 8 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 9 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 10 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 11 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 12 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 104(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 88(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 13 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 104(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 112(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 96(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 14. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 112(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 104(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 112(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 14 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4 ld r14, 40(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 5 ld r14, 48(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 6 ld r14, 56(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 7 ld r14, 64(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 8 ld r14, 72(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 9 ld r14, 80(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 10 ld r14, 88(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 88(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 72(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 11 ld r14, 96(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 88(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 96(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 80(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 12 ld r14, 104(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 96(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 104(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 88(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 13 ld r14, 112(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 104(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 112(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 96(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 14. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 112(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 104(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 112(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) stdu r8, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc15, .-.GSYM_PREFIX`'mulredc15 ecm-6.4.4/powerpc64/mulredc10.asm0000644023561000001540000004665012113421641013350 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc10(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc10 GLOBL .GSYM_PREFIX`'mulredc10 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc10: .quad .GSYM_PREFIX`'mulredc10, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc10, 24 C Implements multiplication and REDC for two input numbers of 10 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 10] array, having 10+1 8-byte words C The tmp array needs 10+1 entries, but tmp[10] is stored in C r15, so only 10 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'10,`@function') .GSYM_PREFIX`'mulredc10: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 80 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 5 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 6 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 7 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 8 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 9. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 64(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 72(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 9 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4 ld r14, 40(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 5 ld r14, 48(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 6 ld r14, 56(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 7 ld r14, 64(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 8 ld r14, 72(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 9. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 64(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 72(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc10, .-.GSYM_PREFIX`'mulredc10 ecm-6.4.4/powerpc64/Makefile.am0000755023561000001540000000445512106741272013115 00000000000000MULREDC = mulredc1.asm mulredc2.asm mulredc3.asm mulredc4.asm mulredc5.asm \ mulredc6.asm mulredc7.asm mulredc8.asm mulredc9.asm mulredc10.asm \ mulredc11.asm mulredc12.asm mulredc13.asm mulredc14.asm \ mulredc15.asm mulredc16.asm mulredc17.asm mulredc18.asm \ mulredc19.asm mulredc20.asm EXTRA_DIST = Makefile.dev README mulredc_1_2.m4 mulredc.m4 generate_all \ powerpc-defs.m4 noinst_LTLIBRARIES = libmulredc.la noinst_HEADERS = mulredc.h # This library definition also causes the mulredc[n].asm and redc.asm files # to go in the distribution - no need for having them in EXTRA_DIST libmulredc_la_SOURCES = $(MULREDC) redc.asm # It's actually the .s files that depend on config.m4, but automake # knows them only as intermediate files, not as targets. Adding the # dependency to libmulredc.la should work so long as no stale .s # files exist. libmulredc_la_DEPENDENCIES = $(top_builddir)/config.m4 # The asm code does not depend on any libraries except libc for abort() # if assertions are enabled LIBS = LDFLAGS = # Rules for generating the .asm files from the .m4 scripts mulredc1.asm: mulredc_1_2.m4 $(M4) -DLENGTH=1 $< > $@ mulredc2.asm: mulredc_1_2.m4 $(M4) -DLENGTH=2 $< > $@ mulredc3.asm: mulredc.m4 $(M4) -DLENGTH=3 $< > $@ mulredc4.asm: mulredc.m4 $(M4) -DLENGTH=4 $< > $@ mulredc5.asm: mulredc.m4 $(M4) -DLENGTH=5 $< > $@ mulredc6.asm: mulredc.m4 $(M4) -DLENGTH=6 $< > $@ mulredc7.asm: mulredc.m4 $(M4) -DLENGTH=7 $< > $@ mulredc8.asm: mulredc.m4 $(M4) -DLENGTH=8 $< > $@ mulredc9.asm: mulredc.m4 $(M4) -DLENGTH=9 $< > $@ mulredc10.asm: mulredc.m4 $(M4) -DLENGTH=10 $< > $@ mulredc11.asm: mulredc.m4 $(M4) -DLENGTH=11 $< > $@ mulredc12.asm: mulredc.m4 $(M4) -DLENGTH=12 $< > $@ mulredc13.asm: mulredc.m4 $(M4) -DLENGTH=13 $< > $@ mulredc14.asm: mulredc.m4 $(M4) -DLENGTH=14 $< > $@ mulredc15.asm: mulredc.m4 $(M4) -DLENGTH=15 $< > $@ mulredc16.asm: mulredc.m4 $(M4) -DLENGTH=16 $< > $@ mulredc17.asm: mulredc.m4 $(M4) -DLENGTH=17 $< > $@ mulredc18.asm: mulredc.m4 $(M4) -DLENGTH=18 $< > $@ mulredc19.asm: mulredc.m4 $(M4) -DLENGTH=19 $< > $@ mulredc20.asm: mulredc.m4 $(M4) -DLENGTH=20 $< > $@ .asm.s: $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.s #.asm.S: # $(M4) -I../ -DOPERATION_$* `test -f $< || echo '$(srcdir)/'`$< >$*.S ecm-6.4.4/powerpc64/mulredc4.asm0000644023561000001540000002557612113421641013277 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc4(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc4 GLOBL .GSYM_PREFIX`'mulredc4 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc4: .quad .GSYM_PREFIX`'mulredc4, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc4, 24 C Implements multiplication and REDC for two input numbers of 4 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 4] array, having 4+1 8-byte words C The tmp array needs 4+1 entries, but tmp[4] is stored in C r15, so only 4 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'4,`@function') .GSYM_PREFIX`'mulredc4: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 32 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 16(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 24(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 3 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 24(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc4, .-.GSYM_PREFIX`'mulredc4 ecm-6.4.4/powerpc64/mulredc8.asm0000644023561000001540000004107212113421641013270 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc8(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc8 GLOBL .GSYM_PREFIX`'mulredc8 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc8: .quad .GSYM_PREFIX`'mulredc8, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc8, 24 C Implements multiplication and REDC for two input numbers of 8 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 8] array, having 8+1 8-byte words C The tmp array needs 8+1 entries, but tmp[8] is stored in C r15, so only 8 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'8,`@function') .GSYM_PREFIX`'mulredc8: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 64 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 5 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 6 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 7. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 48(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 56(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 7 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4 ld r14, 40(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 5 ld r14, 48(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 6 ld r14, 56(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 7. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 48(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 56(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc8, .-.GSYM_PREFIX`'mulredc8 ecm-6.4.4/powerpc64/README0000755023561000001540000000176012106741272011735 00000000000000The files in this directory (powerpc64) were contributed by Philip McLaughlin . They are distributed under the LGPL license, whose text is in ../COPYING.LIB. mulredc[1..20].asm are size-specific asm functions for mulredc. Sizes 1 and 2 may be regenerated by the m4 script mulredc_1_2.m4. Sizes 3 through 20 may be regenerated by the m4 script mulredc.m4. This generation is not done automatically with the autoconf/automake stuff. If you need to regenerate them, the syntax is (for 1 and 2) m4 -DLENGTH=1 mulredc_1_2.m4 > mulredc1.asm m4 -DLENGTH=2 mulredc_1_2.m4 > mulredc2.asm (for 3 through 20) m4 -DLENGTH=3 mulredc.m4 > mulredc3.asm m4 -DLENGTH=4 mulredc.m4 > mulredc4.asm ... etc., up to LENGTH=20. If you have problems, you should reconfigure with the --disable-asm-redc option. redc.asm is a version of redc separated from the multiplication, since there are cases where it is needed. test_mulredc.c, bench.c and the Makefile.dev are for development. ecm-6.4.4/powerpc64/mulredc11.asm0000644023561000001540000005153212113421641013344 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc11(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc11 GLOBL .GSYM_PREFIX`'mulredc11 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc11: .quad .GSYM_PREFIX`'mulredc11, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc11, 24 C Implements multiplication and REDC for two input numbers of 11 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 11] array, having 11+1 8-byte words C The tmp array needs 11+1 entries, but tmp[11] is stored in C r15, so only 11 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'11,`@function') .GSYM_PREFIX`'mulredc11: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 88 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 5 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 6 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 7 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 8 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 9 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 10. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 72(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 80(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 10 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4 ld r14, 40(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 5 ld r14, 48(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 6 ld r14, 56(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 56(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 40(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 7 ld r14, 64(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 56(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 64(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 48(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 8 ld r14, 72(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 64(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 72(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 56(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 9 ld r14, 80(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 72(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 80(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 64(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 10. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 80(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 72(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 80(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) stdu r8, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc11, .-.GSYM_PREFIX`'mulredc11 ecm-6.4.4/powerpc64/mulredc7.asm0000644023561000001540000003621312113421641013270 00000000000000dnl ****************************************************************************** dnl Copyright 2009 Paul Zimmermann and Alexander Kruppa. dnl dnl This file is part of the ECM Library. dnl dnl The ECM Library is free software; you can redistribute it and/or modify dnl it under the terms of the GNU Lesser General Public License as published by dnl the Free Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl The ECM Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public dnl License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public License dnl along with the ECM Library; see the file COPYING.LIB. If not, write to dnl the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, dnl MA 02110-1301, USA. dnl ****************************************************************************** define(C, ` dnl') C mp_limb_t mulredc7(mp_limb_t * z, const mp_limb_t * x, const mp_limb_t * y, C const mp_limb_t *m, mp_limb_t inv_m); C C arguments: C r3 = ptr to result z least significant limb C r4 = ptr to input x least significant limb C r5 = ptr to input y least significant limb C r6 = ptr to modulus m least significant limb C r7 = -1/m mod 2^64 C C final carry returned in r3 include(`config.m4') GLOBL GSYM_PREFIX`'mulredc7 GLOBL .GSYM_PREFIX`'mulredc7 .section ".opd", "aw" .align 3 GSYM_PREFIX`'mulredc7: .quad .GSYM_PREFIX`'mulredc7, .TOC.@tocbase, 0 .size GSYM_PREFIX`'mulredc7, 24 C Implements multiplication and REDC for two input numbers of 7 words C The algorithm: C (Notation: a:b:c == a * 2^128 + b * 2^64 + c) C C T1:T0 = x[i]*y[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + T1:T0) / 2^64 ; /* cy:T1 <= 2*2^64 - 4 (see note 1) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + cy:T1 ; C /* for all j result cy:T1 <= 2*2^64 - 3 (see note 2) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 (see note 2) */ C for (i = 1; i < len; i++) C { C cy:T1:T0 = x[i]*y[0] + tmp[1]:tmp[0] ; C u = (T0*invm) % 2^64 ; C cy:T1 = (m[0]*u + cy:T1:T0) / 2^64 ; /* cy:T1 <= 3*2^64 - 4 (see note 3) */ C for (j = 1; j < len; j++) C { C cy:T1:T0 = x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 ; C /* for all j < (len-1), result cy:T1 <= 3*2^64 - 3 C for j = (len-1), result cy:T1 <= 2*2^64 - 1 (see note 4) */ C tmp[j-1] = T0; C } C tmp[len-1] = T1 ; C tmp[len] = cy ; /* cy <= 1 for all i (see note 4) */ C } C z[0 ... len-1] = tmp[0 ... len-1] ; C return (tmp[len]) ; C C notes: C C 1: m[0]*u + T1:T0 <= 2*(2^64 - 1)^2 <= 2*2^128 - 4*2^64 + 2, C so cy:T1 <= 2*2^64 - 4. C 2: For j = 1, x[i]*y[j] + m[j]*u + cy:T1 <= 2*(2^64 - 1)^2 + 2*2^64 - 4 C <= 2*2^128 - 2*2^64 - 2 = 1:(2^64-3):(2^64-2), C so cy:T1 <= 2*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + cy:T1 <= 2*2^128 - 2*2^64 - 1 = 1:(2^64-3):(2^64-1), C so cy:T1 <= 2*2^64 - 3 = 1:(2^64-3) holds for all j. C 3: m[0]*u + cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 = 2:(2^64-4) C 4: For j = 1, x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 4) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 2 = 2:(2^64-3):(2^64-2), C so cy:T1 <= 3*2^64 - 3. For j > 1, C x[i]*y[j] + m[j]*u + (tmp[j+1] + cy):T1 <= 2:(2^64-3):(2^64-1), C so cy:T1 <= 3*2^64 - 3 = 2:(2^64-3) holds for all j < len - 1. C For j = len - 1, we know from note 2 that tmp(len) <= 1 for i = 0. C Assume this is true for index i-1, Then C x[i]*y[len-1] + m[len-1]*u + (tmp[len] + cy):T1 C <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 = 1:(2^64-1):(2^64-1), C so cy:T1 <= 1:(2^64-1) and tmp[len] <= 1 for all i by induction. C C Register vars: T0 = r13, T1 = r14, CY = r10, XI = r12, U = r11 C YP = r5, MP = r6, TP = r1 (stack ptr) C C local variables: tmp[0 ... 7] array, having 7+1 8-byte words C The tmp array needs 7+1 entries, but tmp[7] is stored in C r15, so only 7 entries are used in the stack. TEXT .align 5 C powerPC 32 byte alignment TYPE(.GSYM_PREFIX`'mulredc`'7,`@function') .GSYM_PREFIX`'mulredc7: C ######################################################################## C # i = 0 pass C ######################################################################### C Pass for j = 0. We need to fetch x[i] from memory and compute the new u ld r12, 0(r4) C XI = x[0] ld r0, 0(r5) C y[0] stdu r13, -8(r1) C save r13 mulld r8, r0, r12 C x[0]*y[0] low half stdu r14, -8(r1) C save r14 mulhdu r9, r0, r12 C x[0]*y[0] high half ld r0, 0(r6) C m[0] mulld r11, r7, r8 C U = T0*invm mod 2^64 stdu r15, -8(r1) C save r15 mulld r13, r0, r11 C T0 = U*m[0] low stdu r16, -8(r1) C save r16 li r16, 0 C set r16 to zero for carry propagation subi r1, r1, 56 C set tmp stack space mulhdu r14, r0, r11 C T1 = U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C adde r13, r9, r14 C T0 = initial tmp(0) addze r10, r16 C carry to CY C CY:T1:T0 <= 2*(2^64-1)^2 <= 2^2*128 - 4*2^64 + 2, hence C CY:T1 <= 2*2^64 - 4 C Pass for j = 1 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 2 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 3 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 4 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 5 mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C add high word with carry to T1 addze r10, r16 C carry to CY std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2^128 - 2 + 2^128 - 2*2^64 + 1 <= C 2 * 2^128 - 2*2^64 - 1 ==> CY:T1 <= 2 * 2^64 - 3 C Pass for j = 6. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 adde r14, r9, r10 C add high word with carry + CY to T1 C T1:T0 <= 2^128 - 2*2^64 + 1 + 2*2^64 - 3 <= 2^128 - 2, no carry! mulld r8, r0, r11 C U*m[j] low mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C add high word with carry to T1 std r8, 40(r1) C store tmp[len-2] addze r15, r16 C put carry in r15 (tmp[len] <= 1) std r13, 48(r1) C store tmp[len-1] C ######################################################################### C # i > 0 passes C ######################################################################### li r9, 6 C outer loop count mtctr r9 1: C Pass for j = 0. We need to fetch x[i], tmp[i] and tmp[i+1] from memory C and compute the new u ldu r12, 8(r4) C x[i] ld r0, 0(r5) C y[0] ld r13, 0(r1) C tmp[0] mulld r8, r0, r12 C x[i]*y[0] low half ld r14, 8(r1) C tmp[1] mulhdu r9, r0, r12 C x[i]*y[0] high half addc r13, r8, r13 C T0 ld r0, 0(r6) C m[0] mulld r11, r7, r13 C U = T0*invm mod 2^64 adde r14, r9, r14 C T1 mulld r8, r0, r11 C U*m[0] low addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[0] high ld r0, 8(r5) C y[1] addc r8, r8, r13 C result = 0 adde r13, r9, r14 C T0, carry pending C cy:T1:T0 <= 2*(2^64 - 1)^2 + 2^128 - 1 = 3*2^128 - 4*2^64 + 1, C so cy:T1 <= 3*2^64 - 4 C Pass for j = 1 ld r14, 16(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 8(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 16(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 0(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 2 ld r14, 24(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 16(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 24(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 8(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 3 ld r14, 32(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 24(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 32(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 16(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 4 ld r14, 40(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 32(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 40(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 24(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 5 ld r14, 48(r1) C tmp[j+1] mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r14, r10 C tmp[j+1] + CY + pending carry addze r10, r16 C carry to CY mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 40(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r10 C add carry to CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word ld r0, 48(r5) C y[j+1] adde r13, r9, r14 C T1, carry pending std r8, 32(r1) C store tmp[j-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + (2^64-1)*2^64 C <= 3*2^128 - 2*2^64 - 1 ==> CY:T1 <= 3*2^64 - 3 C Pass for j = 6. Don't fetch new data from y[j+1]. mulld r8, r0, r12 C x[i]*y[j] low half adde r14, r15, r10 C T1 = tmp[len] + CY + pending carry C since tmp[len] <= 1, T1 <= 3 and carry is zero mulhdu r9, r0, r12 C x[i]*y[j] high half ld r0, 48(r6) C m[j] addc r13, r8, r13 C add low word to T0 mulld r8, r0, r11 C U*m[j] low adde r14, r9, r14 C add high to T1 addze r10, r16 C CY mulhdu r9, r0, r11 C U*m[j] high addc r8, r8, r13 C add T0 and low word adde r13, r9, r14 C T1, carry pending std r8, 40(r1) C store tmp[len-2] addze r15, r10 C store tmp[len] <= 1 std r13, 48(r1) C store tmp[len-1] C CY:T1:T0 <= 2*(2^64 - 1)^2 + (3*2^64 - 3) + 2^64 C <= 2*2^128 - 1 ==> CY:T1 <= 2*2^64 - 1 = 1:(2^64-1) bdnz 1b C Copy result from tmp memory to z ld r8, 0(r1) ldu r9, 8(r1) std r8, 0(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) ldu r9, 8(r1) stdu r8, 8(r3) stdu r9, 8(r3) ldu r8, 8(r1) stdu r8, 8(r3) mr r3, r15 C return tmp(len) ldu r16, 8(r1) ldu r15, 8(r1) ldu r14, 8(r1) ldu r13, 8(r1) addi r1, r1, 8 blr .size .GSYM_PREFIX`'mulredc7, .-.GSYM_PREFIX`'mulredc7 ecm-6.4.4/auxlib.c0000644023561000001540000001465412106741273010660 00000000000000/* Auxiliary routines for the ecm library. Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2011 Paul Zimmermann, Alexander Kruppa. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ /* need stdio.h and stdarg.h for gmp.h to declare gmp_vfprintf */ #include #include #include #include "ecm-impl.h" #if TIME_WITH_SYS_TIME # include # include #else # if HAVE_SYS_TIME_H # include # else # include # endif #endif #ifdef HAVE_LIMITS_H # include #else # ifndef ULONG_MAX # define LONG_MAX (__GMP_ULONG_MAX / 2) # endif #endif #ifdef HAVE_STDINT #include #else /* size_t is an unsigned integer so this ought to work */ #ifndef SIZE_MAX #define SIZE_MAX (~((size_t) 0)) #endif #endif #define VERBOSE __ECM(verbose) static int VERBOSE = OUTPUT_NORMAL; void mpz_add_si (mpz_t r, mpz_t s, long i) { if (i >= 0) mpz_add_ui (r, s, (unsigned long) i); else mpz_sub_ui (r, s, (unsigned long) (-i)); } void mpz_sub_si (mpz_t r, mpz_t s, long i) { if (i >= 0) mpz_sub_ui (r, s, (unsigned long) i); else mpz_add_ui (r, s, (unsigned long) (-i)); } /* Divide RS by 3 */ void mpz_divby3_1op (mpz_t RS) { mp_size_t abssize = mpz_size (RS); if (abssize == 0) return; mpn_divexact_by3 (RS->_mp_d, RS->_mp_d, abssize); if (RS->_mp_d[abssize - 1] == 0) RS->_mp_size -= mpz_sgn (RS); } /* Convert a double d to a size_t. If d < 0., returns 0. If d > MAX_SIZE, returns MAX_SIZE. */ size_t double_to_size (double d) { if (d < 0.) return (size_t) 0; if (d > (double) SIZE_MAX) return SIZE_MAX; return (size_t) d; } /* cputime () gives the elapsed time in milliseconds */ #if defined (_WIN32) /* First case - GetProcessTimes () is the only known way of getting process * time (as opposed to calendar time) under mingw32 */ #include long cputime () { FILETIME lpCreationTime, lpExitTime, lpKernelTime, lpUserTime; ULARGE_INTEGER n; HANDLE hProcess = GetCurrentProcess(); GetProcessTimes (hProcess, &lpCreationTime, &lpExitTime, &lpKernelTime, &lpUserTime); /* copy FILETIME to a ULARGE_INTEGER as recommended by MSDN docs */ n.u.LowPart = lpUserTime.dwLowDateTime; n.u.HighPart = lpUserTime.dwHighDateTime; /* lpUserTime is in units of 100 ns. Return time in milliseconds */ return (long) (n.QuadPart / 10000); } #elif defined (HAVE_GETRUSAGE) /* Next case: getrusage () has higher resolution than clock () and so is preferred. */ #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_RESOURCE_H # include #endif long cputime () { struct rusage rus; getrusage (RUSAGE_SELF, &rus); /* This overflows a 32 bit signed int after 2147483s = 24.85 days */ return rus.ru_utime.tv_sec * 1000L + rus.ru_utime.tv_usec / 1000L; } #else /* Resort to clock (), which on some systems may return calendar time. */ long cputime () { /* Return time in milliseconds */ return (long) (clock () * (1000. / (double) CLOCKS_PER_SEC)); } #endif /* defining cputime () */ /* ellapsed time (in milliseconds) between st0 and st1 (values of cputime) */ long elltime (long st0, long st1) { if (st1 >= st0) return st1 - st0; else { /* A wrap around can only really happen on a system where long int is 32 bit and where we use clock(). So we assume that there was exactly one wrap-around which "swallowed" LONG_MAX * (1000. / (double) CLOCKS_PER_SEC) milliseconds. */ return st1 - st0 + (long)(LONG_MAX * (1000. / (double) CLOCKS_PER_SEC)); } } /* Get real (wall-clock) time in milliseconds */ long realtime () { #ifdef HAVE_GETTIMEOFDAY struct timeval tv; if (gettimeofday(&tv, NULL) != 0) return 0L; return (long) tv.tv_sec * 1000L + (long) tv.tv_usec / 1000L; #else return 0L; #endif } int get_verbose () { return VERBOSE; } /* Tests if loglevel gets printed with the current verbose setting */ int test_verbose (int loglevel) { return (loglevel <= VERBOSE); } void set_verbose (int v) { VERBOSE = v; } int inc_verbose () { VERBOSE ++; return VERBOSE; } int outputf (int loglevel, char *format, ...) { va_list ap; int n = 0; va_start (ap, format); MEMORY_TAG; /* For gmp_*printf's temp allocs */ if (loglevel != OUTPUT_ERROR && loglevel <= VERBOSE) { n = gmp_vfprintf (ECM_STDOUT, format, ap); fflush (ECM_STDOUT); } else if (loglevel == OUTPUT_ERROR) n = gmp_vfprintf (ECM_STDERR, format, ap); MEMORY_UNTAG; va_end (ap); return n; } void writechkfile (char *chkfilename, int method, double p, mpmod_t modulus, mpres_t A, mpres_t x, mpres_t z) { FILE *chkfile; char *methodname; mpz_t t; outputf (OUTPUT_DEVVERBOSE, "Writing checkpoint to %s at p = %.0f\n", chkfilename, p); switch (method) { case ECM_ECM : methodname = "ECM"; break; case ECM_PM1 : methodname = "P-1"; break; case ECM_PP1 : methodname = "P+1"; break; default: outputf (OUTPUT_ERROR, "writechkfile: Invalid method\n"); return; } chkfile = fopen (chkfilename, "w"); if (chkfile == NULL) { outputf (OUTPUT_ERROR, "Error opening checkpoint file %s\n", chkfilename); return; } mpz_init (t); gmp_fprintf (chkfile, "METHOD=%s; B1=%.0f; N=%Zd;", methodname, p, modulus->orig_modulus); mpres_get_z (t, x, modulus); gmp_fprintf (chkfile, " X=0x%Zx;", t); if (method == ECM_ECM) { mpres_get_z (t, z, modulus); gmp_fprintf (chkfile, " Z=0x%Zx;", t); mpres_get_z (t, A, modulus); gmp_fprintf (chkfile, " A=0x%Zx;", t); } fprintf (chkfile, "\n"); mpz_clear (t); fflush (chkfile); fclose (chkfile); } ecm-6.4.4/random.c0000644023561000001540000001131412106741273010642 00000000000000/* Random initialization for P-1 and P+1. Copyright 2005, 2006, 2008 Paul Zimmermann, Alexander Kruppa, Dave Newman. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #ifdef OUTSIDE_LIBECM # include "ecm-ecm.h" #else # include "ecm-impl.h" #endif #ifdef HAVE_UNISTD_H # include /* getpid */ #endif #ifdef TIME_WITH_SYS_TIME # include # include #else # if HAVE_SYS_TIME_H # include # else # include # endif #endif #if defined (_MSC_VER) || defined (__MINGW32__) # include # include #endif #if 0 /* dirty hack until outputf gets fixed */ #ifdef outputf # undef outputf # define outputf(x,y) printf(y) #endif #endif /* put in 'a' a valid random seed for P-1, i.e. gcd(a,n)=1 and a <> {-1,1} */ void pm1_random_seed (mpz_t a, mpz_t n, gmp_randstate_t randstate) { mpz_t q; mpz_init (q); do { mpz_urandomb (a, randstate, 32); mpz_gcd (q, a, n); } while (mpz_cmp_ui (q, 1) != 0 || mpz_cmp_ui (a, 1) == 0 || mpz_cmp_si (a, -1) == 0); mpz_clear (q); } /* put in seed a valid random seed for P+1 */ void pp1_random_seed (mpz_t seed, mpz_t n, gmp_randstate_t randstate) { mpz_t q; /* need gcd(p^2-4, n) = 1. */ mpz_init (q); do { mpz_urandomb (q, randstate, 32); mpz_add_ui (q, q, 1); mpz_set (seed, q); mpz_mul (q, q, q); mpz_sub_ui (q, q, 4); mpz_gcd (q, q, n); } while (mpz_cmp_ui (q, 1) != 0); mpz_clear (q); } /* Produces a random unsigned long value */ #if defined (_MSC_VER) || defined (__MINGW32__) unsigned long get_random_ul (void) { SYSTEMTIME tv; HCRYPTPROV Prov; if (CryptAcquireContext (&Prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) { int r; unsigned long rnd; r = CryptGenRandom (Prov, sizeof (unsigned long), (void *) &rnd); CryptReleaseContext (Prov, 0); if (r) { /* warning: outputf is not exported from libecm */ #if !defined (OUTSIDE_LIBECM) && !defined(GPUECM) outputf (OUTPUT_DEVVERBOSE, "Got seed for RNG from CryptGenRandom\n"); #endif return rnd; } } /* warning: outputf is not exported from libecm */ #if !defined (OUTSIDE_LIBECM) && !defined(GPUECM) outputf (OUTPUT_DEVVERBOSE, "Got seed for RNG from GetSystemTime\n"); #endif GetSystemTime (&tv); /* This gets us 27 bits of somewhat "random" data based on the time clock. It would probably do the program justice if a better random mixing was done in the non-MinGW get_random_ul if /dev/random does not exist */ return ((tv.wHour<<22)+(tv.wMinute<<16)+(tv.wSecond<<10)+tv.wMilliseconds) ^ ((tv.wMilliseconds<<17)+(tv.wMinute<<11)+(tv.wHour<<6)+tv.wSecond); } #else unsigned long get_random_ul (void) { FILE *rndfd; struct timeval tv; unsigned long t; /* Try /dev/urandom */ rndfd = fopen ("/dev/urandom", "r"); if (rndfd != NULL) { if (fread (&t, sizeof (unsigned long), 1, rndfd) == 1) { /* warning: outputf is not exported from libecm */ #if !defined (OUTSIDE_LIBECM) && !defined(GPUECM) outputf (OUTPUT_DEVVERBOSE, "Got seed for RNG from /dev/urandom\n"); #endif fclose (rndfd); return t; } fclose (rndfd); } #ifdef HAVE_GETTIMEOFDAY if (gettimeofday (&tv, NULL) == 0) { /* warning: outputf is not exported from libecm */ #if !defined (OUTSIDE_LIBECM) && !defined(GPUECM) outputf (OUTPUT_DEVVERBOSE, "Got seed for RNG from gettimeofday()\n"); #endif return (unsigned long) tv.tv_sec + (unsigned long) tv.tv_usec * 2147483629UL; } #endif /* warning: outputf is not exported from libecm */ #if !defined (OUTSIDE_LIBECM) && !defined(GPUECM) outputf (OUTPUT_DEVVERBOSE, "Got seed for RNG from time()+getpid()\n"); #endif /* Multiply one value by a large prime to get a bit of avalance effect */ return (unsigned long) time (NULL) + (unsigned long) getpid () * 2147483629UL; } #endif ecm-6.4.4/TODO0000644023561000001540000001444412106741273007715 00000000000000ToDo's (see also TODO.sp): Table of contents: 1) efficiency/memory 2) interface 3) documentation 4) installation 5) bugs 6) others 1) efficiency/memory - use a random sigma value of 64 bits by default - try the mpn/generic/{sb,dc,mu}_bdiv_qr.c functions in GMP >= 4.3.0 for REDC - the conversion from NTT primes to mpz_t in function mpzspv_to_mpzv() (file mpzspv.c) is quadratic. A faster conversion is possible with a product tree (already done for the mpz_t -> NTT conversion). - even worse, mpzspm_init seems to be cubic in the input size (because the CRT algorithm used is quadratic in sp_num). We should use a subquadratic CRT. - the "Reducing G * H" step is faster in NTT than with KS. This is probably due to the fact that some transforms are cached in the NTT mode. - the "Reducing G * H" step can be improved as follows: first compute D = GH*I mod (x^d+1) where d = deg(F), and I = 1/F mod (x^d+1); then compute E = D*F mod (x^d-1); finally compute T = (GH-E)/2 mod (x^d+1). T equals the Montgomery product GH/(x^d+1) mod F. See the paper "Fast convolution meets Montgomery" by Preda Mihailescu (Mathematics of Computation). - slowdown in stage 1 with REDC between a 58672-digit number and a 58688-digit number [reported by Christophe.CLAVIER@gemalto.com, 29 Aug 2007] (((2003663613*2^195000-2)/(2*23*173*3863))/1954173900202379)/3612632846010637 ((2003663613*2^195000-2)/(2*23*173*3863))/1954173900202379 with B1=1000 on an Opteron (44.2s for c58672, 67.5s for c58688). The culprit seems to be the REDC routine in mpmod.c: indeed, in case the modulus has n limbs, but the most significant one has only a few bits, the product (called x in REDC) has only 2n-1 limbs, and we never call Mulders's short product in ecm_redc_n (however the else-code using full products seem faster in that case). For c58672, if one replaces if (xn == 2 * n) in mpmod.c/REDC by if (xn >= 2 * n - 1), the time of stage 1 grows from 44s to 64s, whereas ecm_redc_n should be faster... This problem is still present in 6.2, ecm_redc_n should be better tuned, in particular the choice k=0.75*n in ecm_mul_lo_n() is far from optimal. - in Brent-Suyama's extension, the evaluation of a polynomial of degree k over N consecutive values is currently done using a O(k N) algorithm [table of differences]. One can do O(N/k M(k)), cf Section 3 from "Linear recurrences with polynomial coefficients and application to integer factorization and Cartier-Manin operator", by Alin Bostan, Pierrick Gaudry and Eric Schost, SIAM journal on computing, vol. 36, no. 6, pp. 1777 - 1806, 2007. It is not clear if this result also applies to ECM, but at least it should word for P-1 and P+1. - why restrict the use of mpn_mul_fft to Fermat numbers? We could use it for any cofactor of 2^(n*BITS_PER_MP_LIMB)+1, as long as mpn_fft_next_size (n, mpn_fft_best_k (n, S1 == S2)) == n. - use mpres in step 2 (Target: 7.0) - write a mpn version of add3 and duplicate - rewrite entire mpmod.c to be based on mpn_* functions, not mpz_* - take relative speed of multiplying/squaring into account in PRAC (DN: couldn't get any significant speed increase) - use/implement a mpn_mul_hi_n routine for use in mpn_REDC - use mpn_addmul_2, mpn_addmul_4 in the basecase REDC [for machines where it exists]. ASM code should perhaps be moved into GMP. - try McLaughlin's algorithm for Montgomery's modular multiplication (http://www.ams.org/mcom/0000-000-00/S0025-5718-03-01543-6/home.html) - consider Colin Percival's generalized DWT for multiplication modulo k*a^n+b, where k*a*b is highly composite. May belong to GMP rather than GMP-ECM. - implement assembly code (redc.asm) for other architectures - allow composite d2, or better use the S1+S2 idea from the P+-1 algorithm of Montgomery and Kruppa. - init mpz_t's with correct amount of memory allocated to avoid reallocs. Check for reallocs with GMP's memory interface routines. (Partly done.) - try sliding window multiplication for ECM stage 1 (Target: 7.0) - choose Brent/Suyama polynomial according to B2/k and not B2! - Adjust estimated memory to take into account -treefile and NTT (done but improvement possible) - when GWNUM is used, lower the default B2 (James Wanless, 17 Mar 2006, james at grok.ltd.uk) - implement enhanced standard continuation? With graph cover algorithm? - parallel/distributed stage 2? - add curve selection for torsion group of order 8 or 16, see Montgomery's thesis (request of Peter Lawrence Montgomery) - Torbj"orn Granlund suggested faster code for mpn_mod_1(), used extensively in NTT. See http://lists.gforge.inria.fr/pipermail/ecm-discuss/2008-May/003365.html 2) interface - from Mark Rodenkirch 08 April 2011: print messages like "Step 1: 1500000/100000000" with a command-line option (or with -v) http://lists.gforge.inria.fr/pipermail/ecm-discuss/2011-April/004088.html - with -resume, print %time for THIS RUN instead of total run? [suggested by SleepHound ] Add CPUTIME=... in the save file, to take into account the total cpu time spend so far (in seconds). George Woltman agrees for that change. It won't hurt prime95/mprime -> will be added for his next version. - when resuming, print the *initial* x0 for P-1/P+1? - [from Jakub Pawlewicz ] add an option -stage1time t to tell the step 1 time, when done by another program. PZ: or better have it in resume file? (Target: 6.1. Command line option done) 3) documentation 4) installation - check for __builtin_constant_p and __builtin_expect at configure time - [suggested by Peter Montgomery] add the possibility to compile a "fat" binary, which automatically selects the best mulredc assembly code depending on the cpuid [see TODO.fat] - [suggested by Thomas Kunz, who did port GMP-ECM to the PS3, i.e., to the Cell architecture]: several changes to make it easier to port GMP-ECM to specific architectures. Cf TODO.kunz. 5) bugs 6) others - add primality proving of factors/cofactors? Maybe link Pari for this? - add point counting algorithm? SEA implementation exists for Pari/GP, use that? - let user specify previous factoring work, compute distribution of candidate factors, compute probability of/est. time to finding a factor with given parameters. - re-write in C++? Lots of work, but would make parts of the code much cleaner. ecm-6.4.4/README0000644023561000001540000010357612106741273010112 00000000000000This is the README file for GMP-ECM. (See INSTALL-ecm for installing GMP-ECM and the ecm library, and README.lib for using the ecm library.) Table of contents of this file: 1. Basic usage. 2. How to use P-1, P+1, and ECM efficiently? 3. Extra factors and Brent-Suyama's extension. 4. Memory usage. 5. Expression syntax reference for GMP-ECM's syntax parser. 6. Options -save, -resume and -chkpnt. 7. Working with very large numbers (the -prp* options). 8. How to get the best of GMP-ECM? 9. Record factors. 10. Known problems. ############################################################################## 1. Basic usage GMP-ECM reads the numbers to be factored from stdin (one number on each line) and requires a numerical parameter, the stage 1 bound B1. A reasonable stage 2 bound B2 for the given B1 is chosen by default, but can be overridden by a second numerical parameter. By default, GMP-ECM uses the ECM factoring algorithm. Example: To run one curve of ECM with B1=1000000 on each number in the file "composites", run ecm 1000000 < composites To use a B2 value of ~5*10^8 instead of the default value of ~10^9, run ecm 1000000 5e8 < composites Scientific notation is accepted for B1 and B2 values. The actual B2 value used may be larger than the specified value to let parameters satisfy some conditions imposed by the stage 2 algorithm. To run one curve with B1=11e7 on M1061, simply do: echo "2^1061-1" | ecm 11e7 To run more than one ECM curve on each input number, use the -c parameter. Example: to run 100 curves with B1=1000000 and default B2 on each number in "composites", run ecm -c 100 1000000 < composites To use the P-1 or P+1 factoring methods, use the -pm1 or -pp1 parameter, respectively. Example: to use the P-1 method with B1=10^9 on all numbers in the file "composites", run ecm -pm1 1e9 < composites Note that, unlike for ECM, using the same B1,B2 bounds on one number is quite useless for P-1, and of limited use for P+1. See "2. How to use P-1, P+1, and ECM efficiently?" ############################################################################## 2. How to use P-1, P+1, and ECM efficiently? The P-1 method works well when the input number has a prime factor P such that P-1 is "smooth", i.e., has all its prime factor less or equal the step 1 bound B1, except one which may be less or equal the second step bound B2. For P=67872792749091946529, we have P-1 = 2^5 * 11 * 17 * 19 * 43 * 149 * 8467 * 11004397, so this factor will be found as long as B1 >= 8467 and B2 >= 11004397: $ echo 67872792749091946529 | ./ecm -pm1 -x0 2809890345 8467 11004397 GMP-ECM ... [powered by GMP ...] [P-1] Input number is 67872792749091946529 (20 digits) Using B1=8467, B2=6710-19370830, x0=2809890345 Step 1 took 3ms Step 2 took 14ms ********** Factor found in step 2: 67872792749091946529 Found input number N There is no need to run P-1 several times with the same B1 and B2 as there is for ECM, since a factor found with one seed will (almost always) be found by another one. The P+1 method works well when the input number has a prime factor P such that P+1 is "smooth". For P=4190453151940208656715582382315221647, we have P+1 = 2^4 * 283 * 2423 * 21881 * 39839 * 1414261 * 2337233 * 132554351, so this factor will be found as long as B1 >= 2337233 and B2 >= 132554351: $ echo 4190453151940208656715582382315221647 | ./ecm -pp1 -x0 7 2337233 132554351 GMP-ECM ... [powered by GMP ...] [P+1] Input number is 4190453151940208656715582382315221647 (37 digits) Using B1=2337233, B2=2324738-343958122, x0=7 Step 1 took 750ms Step 2 took 120ms ********** Factor found in step 2: 4190453151940208656715582382315221647 Found input number N However not all seeds will succeed: only half of the seeds 'x0' work for P+1 (namely those where the Jacobi symbol of x0^2-4 and P is -1.) Unfortunately, since P is usually not known in advance, there is no way to ensure that this holds. However, if the seed is chosen randomly, there is a probability of about 1/2 that it will give a Jacobi symbol of -1 (i.e., the factor P will be found if P+1 is smooth enough). A rule of thumb is to run 3 times P+1 with different random seeds. The seeds 2/7 and 6/5 have a slightly higher chance of success than average as they lead to a group order divisible by 6 or 4, respectively. When factoring Fibonacci numbers F_n or Lucas numbers L_n, using the seed 23/11 ensures that the group order is divisible by 2n, making other P+1 (and probably P-1) work unnecessary. As of version 6.2, a new stage 2 for the P-1 and P+1 algorithms is implemented. It uses less memory and is faster than the previous code, thus allowing larger B2 values. If GMP-ECM is configured with the "--enable-openmp" flag and is compiled with a compiler that implements OpenMP, it uses multi-threading for computation of polynomial roots and NTT multiplication. When not using the NTT, it benefits from multi-threading only in the computation of roots phase. The number of threads to use can be controlled with the OMP_NUM_THREADS environment variable. Unlike the previous generic stage 2, the new stage 2 cannot use the Brent-Suyama extension (-power and -dickson parameters). Specifying these options on the command line forces use of the generic stage 2. Note: the notation of the parameters follows that in the paper, the number of multi-point evaluations (similar to "blocks") is given by s_2. You can specify a lower limit for s_2 by the -k command line parameter. The ECM method is a probabilistic method, and can be viewed in some sense as a generalization of the P-1 and P+1 method, where we only require that P+t+1 is smooth, where t depends on the curve we use and satisfies |t| <= 2*P^(1/2) (Hasse's theorem). The optimal B1 and B2 bounds have to be chosen according to the (usually unknown) size of P. The following table gives a set of nearly optimal B1 and B2 pairs, with the corresponding expected number of curves to find a factor of given size (column "-power 1" does not take into account the extra factors found by Brent-Suyama's exten- sion, whereas column "default poly" takes them into account, with the poly- nomial used by default: D(n) means Dickson's polynomial of degree n): digits D optimal B1 default B2 expected curves N(B1,B2,D) -power 1 default poly 20 11e3 1.9e6 74 74 [x^1] 25 5e4 1.3e7 221 214 [x^2] 30 25e4 1.3e8 453 430 [D(3)] 35 1e6 1.0e9 984 904 [D(6)] 40 3e6 5.7e9 2541 2350 [D(6)] 45 11e6 3.5e10 4949 4480 [D(12)] 50 43e6 2.4e11 8266 7553 [D(12)] 55 11e7 7.8e11 20158 17769 [D(30)] 60 26e7 3.2e12 47173 42017 [D(30)] 65 85e7 1.6e13 77666 69408 [D(30)] Table 1: optimal B1 and expected number of curves to find a factor of D digits with GMP-ECM. After performing the expected number of curves from Table 1, the probability that a factor of D digits was missed is exp(-1), i.e., about 37%. After twice the expected number of curves, it is exp(-2), i.e., about 14%, and so on. Example: after performing 8266 curves with B1=43e6 and B2=2.4e11 (or 7553 curves with -dickson 12), the probability to miss a 50-digit factor is about 37%. From version 6.0 on, GMP-ECM prints the expected number of curves and expected time to find factors of different sizes in verbose mode (option -v). This makes it easy to further optimize parameters for a certain factor size if desired: simply try to minimize the expected time. (lengthy NOTE: The order of an elliptic curve with Montgomery parameteriza- tion, as used by GMP-ECM, is known to be divisible by 12. Therefore one can assume that the probability that the order is B1,B2 smooth should be about as great as for a random integer 1/12th in value. However, Montgomery observed that the order behaves even nicer than that: heuristically, it seems that the order is as likely to be smooth as a random integer about 1/23.4 in value. This is the value we use in GMP-ECM and the computed probabilities match those observed in experiments very well. This however means that the so computed values for the expected number of curves for given B1,B2 values and factor sizes do not match those published in the literature where a factor of only 1/12 was used. The factor GMP-ECM uses is defined as ECM_EXTRA_SMOOTHNESS in rho.c, you can change it to 12.0 if you want to reproduce the more pessimistic values found in the literature.) In summary, we advise the following method: 0 - choose a target factor size of D digits 1 - choose optimal B1 and B2 values to find factors of D digits (cf Table 1) 2 - run once P-1 with 10*B1, and the default B2 chosen by GMP-ECM 3 - (optional) run 3 times P+1 with 5*B1, and the default B2 4 - run N(B1,B2,D) times ECM with those B1 and B2, where N(B1,B2,D) is the expected number of ECM curves with step 1 bound B1, step 2 bound B2, to find a factor of D digits (cf above table). 5 - if no factor is found, either increase D by 5 digits and go to 0, or use another factorization method (MPQS, NFS) Note: if a factor is found in steps 2, 3 or 4, simply continue the current step with the remaining cofactor (if composite). There is no need to start again from 0, since the cofactor was already tested, too. ############################################################################## 3. Extra factors and Brent-Suyama's extension. GMP-ECM may sometimes find some "extra" factors, such that one factor of P-1, P+1 or P+t+1 exceeds the step 2 bound B2, thanks to Brent-Suyama's extension. Let's explain how it works for P-1, since it's simpler. The classical step 2 (without Brent-Suyama's extension) considers s^(j*d) mod N and s^i mod N, where N is the number to factor, and s is the residue computed in stage 1. Here, d is fixed, and the integers i and j vary in two sets so that j*d-i covers all primes in [B1, B2]. Now consider a polynomial f(x), and compute s^f(j*d) and s^f(i) instead of s^(j*d) and s^i [thus the classical step 2 corresponds to f(x)=x^1]. Then P will be found whenever all but one of the factors of P-1 are <= B1, and one factor divides some f(j*d) - f(i): $ echo 1207946164033269799036708918081 | ./ecm -pm1 -k 3 -power 12 286493 25e6 GMP-ECM ... [powered by GMP ...] [P-1] Input number is 1207946164033269799036708918081 (31 digits) Using B1=286493, B2=30806172, polynomial x^12, x0=1548711558 Step 1 took 320ms Step 2 took 564ms ********** Factor found in step 2: 1207946164033269799036708918081 Found input number N Here the largest factor of P-1 is 83957197, which is 3.35 times larger than B2. Warning: these "extra" factorizations may not be reproducible from one version of GMP-ECM to another one, since they depend on some internal parameters that may change. For P-1 with the generic stage 2, the degree of the Brent-Suyama polynomial should be even. Since i^2k - (j*d)^2k = (i^k - (j*d)^k)(i^k + (j*d)^k), this allows testing two values symmetric around a multiple of d simultaneously, halving the amount of computation required in stage 2. P+1 with the generic stage 2 and ECM do this inherently. The new fast stage 2 for P-1 and P+1 does not support the Brent-Suyama extension. By default, the fast stage 2 is used for P-1 and P+1; giving a -power or -dickson parameter on the command line forces use of the previous, generic stage 2. It is recommended to use the new stage 2 (from version 6.2) for P-1 and P+1, which is the default: it is so much faster that it largely compensates the few extra factors that are not found because Brent-Suyama's extension is not available. The default polynomial used for ECM with a given B2 should be near optimal, i.e., give only a marginal overhead in step 2, while enabling extra factors. ############################################################################## 4. Memory usage. Step 1 does not require much memory: O(n) for an input number of n digits. Step 2 may be quite memory expensive, especially for large B2, since its efficient algorithms use some large tables. To reduce the memory usage of step 2, you may increase the 'k' parameter, which controls the number of "blocks" performed in step 2. Multiplying the default value of k by 4 will decrease the memory usage by a factor of 2. For example with B2=1e10 and a 155-digit number, step 2 requires about 55MB with the default k=4, but only 27MB with k=16. Increasing k does, however, slightly increase the time required for step 2 (see section "How to get the best of GMP-ECM?"). An estimation of the memory usage is given at the start of stage 2: $ ecm -v -k 4 10 1e10 < c155 ... Estimated memory usage: 55M ... Step 2 took 18649ms $ ecm -v -k 16 10 1e10 < c155 ... Estimated memory usage: 27M ... Step 2 took 26972ms Another way is to use the -treefile parameter, which causes some of the tables to be stored on disk instead of in memory. Using the option "-treefile /var/tmp/ecmtree" will create the files "/var/tmp/ecmtree.1", "/var/tmp/ecmtree.2" etc. The files are deleted upon completion of stage 2: $ ecm -v -treefile /tmp/ecmtree -k 4 10 1e10 < c155 ... Estimated memory usage: 36M ... Step 2 took 18648ms Due to time consuming disk I/O, this will cause stage 2 to take somewhat longer. How much memory is saved depends on stage 2 parameters, but a typical value is that memory use is reduced by a factor of about 1.5. Increasing the number of blocks with -k also reduces the amount of data that needs to get written to disk, thus reducing disk I/O time. Combining these parameters is a very effective way of reducing memory use. Up from version 6.1, there is still another (better) possibility, with the -maxmem option. The command-line -maxmem nnn option tells GMP-ECM to use at most nnn MB in stage 2. It is better than -k because it takes into account the size of the number to be factored, and automatically adjusts the number of blocks to use: $ ./ecm -v -maxmem 40 10 1e10 < c155 ... dF=8192, k=15, d=79170, d2=11, i0=-10 ... Estimated memory usage: 27M ... Step 2 took 25456ms NOTE that in -b "breadth-first" mode, GMP-ECM reads all candidate numbers in the input stream and keeps them in memory, so if there are many large numbers to be tested, the memory requirement will increase noticeably. ############################################################################## 5. Expression syntax reference for GMP-ECM's syntax parser. GMP-ECM can handle several kinds of expressions as input numbers. Here is the syntax that is handled: 1. Raw decimal numbers like 123456789 2. Comments can be placed in the file. The C++ "one line comment" // is used. Everything after the // on a line (including the //) is ignored. Warning: no input number should appear on such a comment line. 3. Line continuation. If a line ends with a backslash character '\', it is considered it continues on the next line (ignoring the '\'). 4. Any white space (space, tab, end of line) is ignored. However, the "end of line" is used to end the expression (unless of course there is a '\' character before the end of line). For example, processing this: 1 2 3 4 5 6 7 8 9 would be the same as processing 123456789 5. "common" arithmetic expressions (* / + - %), the period '.' might be used in place of * for multiply, and - can be unary minus (e.g., -55555551). Example: echo "3*5+2" | ./ecm 100 6. Grouping ( [ { for start of group (which symbol is used does not matter) and ) ] } to end a group (again all 3 symbols mean the SAME thing). 7. Exponentiation with the ^ character (i.e., 2^24 is the same as 16777216). Example: echo "2^24+1" | ./ecm 100 8. Simple factorial using the exclamation point ! character. Example is 53! == 1*2*3*4...*52*53. Example: echo '53!+1' | ./ecm 1e2 9. Multi-factorial as in: n!m with an example: 15!3 == 15.12.9.6.3. 10. Simple Primorial using the # character with example of 11# == 2*3*5*7*11 11. Reduced Primorial n#m with example of 17#5 == 5.7.11.13.17 12. Functions are possible with the expression parser. Currently, the only available function is Phi(x,n), however other functions should be easy to add in the future. Note: Expressions are maintained as much as possible (even if the expression becomes longer than the decimal expansion). Expressions are output as cofactors (if the input was an expression), and are stored into save/resume files (again if and only if the original input was an expression, and not an expanded decimal number). When a factor is found, the cofactor expression is of the form (original_expression)/factor_found (see however option -cofdec): $ echo "3*2^210+1" | ./ecm -sigma 4007218240 2500 GMP-ECM ... [powered by GMP ...] [ECM] Input number is 3*2^210+1 (64 digits) Using B1=2500, B2=186156, polynomial x^1, sigma=4007218240 Step 1 took 16ms Step 2 took 16ms ********** Factor found in step 2: 1358437 Found probable prime factor of 7 digits: 1358437 Probable prime cofactor (3*2^210+1)/1358437 has 58 digits ############################################################################## 6. Options -save, -resume and -chkpnt. These -save and -resume options are useful to save the current state of the computation after step 1, or to exchange data with other software. It allows to perform step 1 with GMP-ECM, and step 2 with another software (or vice-versa). Note: the residue from the end of stage 1 gets written to the file only after stage 2, if stage 2 is performed in the same program run. This way, if a factor is found, the save file entry will contain the new cofactor (if composite) or will be omitted (if cofactor is a probable prime). For periodic saving during stage 1 for crash recovery, use -chkpnt, described below. Here is an example how to reuse some P-1 computation: $ cat c71 13155161912808540373988986448257115022677318870175067553764004308210487 $ ./ecm -save toto -pm1 -mpzmod -x0 2 5000000 < c71 GMP-ECM ... [powered by GMP ...] [P-1] Input number is 13155161912808540373988986448257115022677318870175067553764004308210487 (71 digits) Using B1=5000000, B2=352526802, polynomial x^24, x0=2 Step 1 took 3116ms Step 2 took 2316ms The file "toto" now contains some information about the method used, the step 1 bound, the number to factor, the value X at the end of step 1 (in hexa- decimal), and a checksum to verify that no data was corrupted: $ cat toto METHOD=P-1; B1=5000000; N=13155161912808540373988986448257115022677318870175067553764004308210487; X=0x12530157ae22ae14d54d6a5bc404ae9458e54032c1bb2ab269837d1519f; CHECKSUM=2287710189; PROGRAM=GMP-ECM 6.2; X0=0x2; WHO=zimmerma@clafoutis.loria.fr; TIME=Sat Apr 12 13:41:01 2008; Then one can resume the computation with larger B1 and/or B2 as follows: $ ./ecm -resume toto 1e7 GMP-ECM ... [powered by GMP ...] [ECM] Resuming P-1 residue saved by zimmerma@clafoutis.loria.fr with GMP-ECM 6.2 on Sat Apr 12 13:41:01 2008 Input number is 13155161912808540373988986448257115022677318870175067553764004308210487 (71 digits) Using B1=5000000-10000000, B2=9946848-1326917772, Step 1 took 3076ms Step 2 took 4304ms ********** Factor found in step 2: 1448595612076564044790098185437 Found probable prime factor of 31 digits: 1448595612076564044790098185437 Probable prime cofactor 9081321110693270343633073697474256143651 has 40 digits The second run only considered the primes in [5e6-10e6] in step 1, which saved half the time of step 1. The format used is the following: - each line corresponds to a composite (expression ARE saved in the save file) - a line contains assignments = separated by semi-colons ';' - possible values for are - METHOD (value = ECM or P-1 or P+1) - SIGMA (value = ECM sigma parameter) [ECM only] - B1 (first step bound) - N (composite number to factor) - X (value at the end of step 1) - A (A-parameter of the elliptic curve) [ECM only] - CHECKSUM (internal value to check correctness of the format) - PROGRAM (program used to perform step 1, useful for factor credits) - X0 (initial point for ECM, or initial residue for P-1/P+1) [optional] - WHO (who performed step 1) - TIME (date and time of first step) SIGMA and X0 would be optional, and would be mainly be used in case of a factor is found, to be able to reproduce the factorization. For ECM, one of the SIGMA or A values must be present, so that the computation can be continued on the correct curve. The B1 and X values satisfy the condition that X is a lcm(1,2,...,B1)-th power in the (multiplicatively written) group. If consecutive lines in a save file being resumed contain the same number to be factored, say when many ECM curves on one number have been saved, factors discovered by GMP-ECM are carried over from one attempt to the next so that factors will be reported only once. If the cofactor is a probable prime, or if the -one option was given and a factor was found, the remaining consecutive lines for that number will be skipped. Note: it is allowed to have both -save f1 and -resume f2 for the same run, however the files f1 and f2 should be different. Remark: you should not perform in parallel several -resume runs on the same input with the same B1/B2 values, since those runs will do the same computations. Options -save/-resume are useful in the following cases: (a) somebody did a previous step 1 computation with another software which is faster than GMP-ECM, and wants to perform Step 2 with GMP-ECM which is faster for that task. (b) somebody did a previous step 1 for P-1 or P+1 up to a given bound B1, and you want to extend that computation with B1' > B1, without restarting from scratch. Note: this does not apply to ECM, where the smoothness property depends on the (random) curve chosen, not only on the input number. (c) you did a huge step 1 P-1 or P+1 computation on a given machine, and you want to perform a huge step 2 in parallel on several machines. For example machine 1 tests the range B2_1-B2_2, machine 2 tests B2_2-B2_3, ... This also decreases the memory usage for each machine, which is function of the range width B2max-B2min. For the same reason as (b), this does not apply to ECM. The -chkpnt option causes GMP-ECM to write the current residue periodically during the stage 1 computation. This is useful as a safeguard in case the GMP-ECM process is terminated, or the computer loses power, etc. The checkpoint is written every ten minutes, when a signal (SIGINT, SIGTERM) is received, and at the end of stage 1. The format of the checkpoint file is very similar to that of regular save files, and checkpoints can be resumed with the -resume option. For example: $ ecm -chkpnt pm1chkpoint -pm1 1e10 1 < largenumber.txt [Computer crashes during computation] $ ecm -resume pm1chkpoint 1e10 1 Note: if an existing file is specified as the checkpoint file, it will be silently overwritten! Note 2: When resuming a checkpoint file, additional small primes may be processed in stage 1 when the checkpoint file is resumed, so the end-of-stage 1 residues of an uninterrupted run and a checkpointed run may not match. The extra primes do not reduce the probability of finding factors, however. ############################################################################## 7. Executing shell commands You can tell GMP-ECM to execute shell commands when a factor is found or to run an external program for PRP testing. This feature is not compiled in by default, it must be enabled by the parameter --enable-shellcmd when running "configure". If you specify -faccmd on the commandline, will be executed whenever a factor is found by P-1, P+1 or ECM (not by trial divison). The original number, the factor found and the cofactor are passed to via stdin, each number on a line. You may use this for example to have factors automatically sent to you by email: ecm -faccmd 'mail -s "$HOSTNAME found a factor" myemail@myplace.org' \ -c 900 1e6 < candidates.txt The parameter -prpcmd lets you specify a program to perform a probable primality test instead of the GMP built-in function. The number to test is passed on one line to via stdin. The result of the test is expected as the exit code of , where exit code 0 (true) means "is probably prime" and a non-zero code (false) means "is composite". Example: ecm -prpcmd "pfgw --" -c 900 1e6 . The parameter -idlecmd will make GMP-ECM run before each ECM, P-1 or P+1 attempt on a number. If the exit status of is non-zero, GMP-ECM terminates immediately, otherwise it continues normally. GMP-ECM resumes only after has terminated, so this is a way for letting GMP-ECM sleep while the system is busy - just let sleep until the system is idle again. ############################################################################## 8. How to get the best of GMP-ECM? [this part has to be rewritten, beginning of new part] After configuring GMP-ECM, type "make bench_mulredc" (or "make bench_mulredc.exe" under Windows) and type: $ ./bench_mulredc # or bench_mulredc.exe under Windows This will output at the end two lines as follows: /* 0:mulredc 1:mul+redc_1 2:mul+redc_2 3:mul+redc_n */ #define TUNE_MULREDC_TABLE {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} /* 0:mulredc 1:sqr+redc_1 2:sqr+redc_2 3:sqr+redc_n */ #define TUNE_SQRREDC_TABLE {0,0,0,0,0,0,0,0,1,1,3,1,1,3,1,1,1,1,2,1,2} Then put those lines in the ecm-params.h file (which is a symbolic link), or replace the corresponding values in that file if already present, and recompile GMP-ECM. [end of new part] Choice of modular multiplication. The ecm program may choose between 4 kinds of modular arithmetic: (1) Montgomery's REDC algorithm at the word level (option -modmuln). It is quite fast for small numbers, but has quadratic asymptotic complexity. (2) classical GMP arithmetic (option -mpzmod). Has some overhead with respect to (1) for small sizes, but wins over (1) for larger sizes since it has quasi-linear asymptotic complexity. (3) Montgomery's REDC algorithm at high level (option -redc). This essentially replaces each division by two multiplications. Slower than (1) and (2) for small inputs, but better for large or very large inputs. (4) base-2 arithmetic for numbers dividing 2^n+1 or 2^n-1. Each division has only linear time, but the multiplication are more expensive since they are done on larger numbers. (5) If you have a 64 bit processor, use it. Both GMP and the NTT code in GMP-ECM perform MUCH better with 64 bit arithmetic than with 32 bits. This of course requires that you use a 64 bit OS. Many Linux and BSD distributions let you choose between 32 bit and 64 bit at installation. The ecm program automatically selects what it thinks is the best arithmetic for the given input number. If that choice is not optimal, you may force the use of a certain arithmetic by trying options -modmulm, -mpzmod, -redc. (The best choice should depend on B1 and B2 only very little, so long as B1 is not too small, say >= 1000.) Number of step 2 blocks. The step 2 range [B1,B2] is divided into k "big blocks". The default value of k is chosen to be near to optimal. However, it may be that for a given (B1,B2) pair, another value of k may be better. Try for this to give the option -k to ecm, where is 1, 2, 3, ... This will force ecm to divide step 2 in at least blocks. Changing the value of the number of blocks will not modify the chance of finding a factor (except for extra factors, but some will be lost, and some will be won, so the balance should be nearly even). However it will change the time spent in Step 2 and modify the memory used by Step 2 (see the section "Memory usage"). Optimal thresholds. The thresholds for the algorithms used in ecm are defined in ecm-params.h. Several ecm-params.h.* files are included in the distribution and the configure script will select one matching your machine if it exists. If there is no ecm-params.h.* for your machine then you can either compile with default values (not recommended) or you can generate ecm-params.h first with "make ecm-params; make". Stage 2 now uses Number-Theoretic Transforms (NTT) for polynomial arithmetic by default for numbers of at most 30 machine words (NTT_SIZE_THRESHOLD in ecm-ecm.h). The NTT code forces dF to be a power of 2; it can be disabled by passing the command-line option -no-ntt and unconditionally enabled by -ntt. Performance of NTT is dependent on: - Architecture. NTT seems to give the greatest improvement on Athlons, and the least improvement on Pentiums without SSE2. - Thresholds. It is vital to have ecm-params.h properly tuned for your machine. - C compiler. The SSE2 assembly code for 32 bit and the assembly code for 64 bit only work for x86 using gcc or Intel cc, so it is compiler dependent. Note on factoring Fermat numbers: GMP-ECM features Schönhage-Strassen multiplication for polynomials in stage 2 when factoring Fermat numbers (not in the new, fast stage 2 for P+1 and P-1. This is to be implemented.) This greatly reduces the number of modular multiplications required, thus improving speed. It does, however, restrict the length of the polynomials to powers of two, so that for a given number of blocks (-k parameter), the B2 value can only increase by factors of approximately 4. For the number of blocks, choices of 2, 3 or 4 usually give best performance. However, if the polynomial degree becomes too large, relatively expensive Karatsuba or Toom-Coom methods are required to split the polynomial before Schönhage-Strassen's method can handle them. That can make a larger number of blocks worthwhile. When factoring the m-th Fermat number F_m = 2^(2^m)+1, degrees up to dF=2^(m+1) can be handled directly. If your B2 choice requires a degree much larger than this (dF is printed with the -v parameter), try increasing the number of blocks with -k and see if performance improves. The Brent-Suyama extension should not be used when factoring Fermat numbers, it is more efficient to simply increase B2. Therefore, -power 1 for P+1 and ECM, and -power 2 for P-1 are the default for Fermat numbers. (Larger degrees for Brent-Suyama may possibly become worthwhile for P-1 runs on smaller Fermat numbers and extremely large B2, when Karatsuba and Toom-Cook are used extensively.) Factoring Fermat numbers uses a lot of memory, depending on the size of the Fermat number and on dF. For dF=65536 and F_12, the memory used is about 1700MB. If your system does not have enough memory, you will have to use a larger number of blocks to reach the desired B2 value with a smaller poly degree dF, which sacrifices some performance. Additionally, you may use the -treefile option (see 4. Memory usage) k=1 k=2 k=3 k=4 dF=256 582132 1222002 1864182 2504052 dF=512 2443992 5008092 7572192 10131672 dF=1024 10016172 20263332 30519732 40766892 dF=2048 42634420 85689250 128744080 171798910 dF=4096 173259252 347500242 521780502 696021492 dF=8192 711738310 1425139180 2138540050 2851940920 dF=16384 2850278350 5703881830 8557643650 11411247130 dF=32768 11702792020 23412731170 35122670320 46832609470 dF=65536 48071333326 96165459406 144259585486 192353711566 dF=131072 194020810630 388069884940 582118959250 776168033560 Table 2: Stage 2 interval length B2-B2min, for dF a power of 2 and small values of k. For example, if you'd like to run stage 2 on F_12 with B2 ~= 40G, try parameters "-k 1 48e9", "-k 3 35e9" or "-k 4 46e9". ############################################################################## 9. Record factors. If you find a very large factor, the program will print a message like: Report your potential champion to (see ) This means that your factor might be a champion, i.e., one of the top-ten largest factors ever found by the corresponding method (P-1, P+1 or ECM). Cf the following URLs: ECM: http://wwwmaths.anu.edu.au/~brent/ftp/champs.txt P-1: http://www.loria.fr/~zimmerma/records/Pminus1.html P+1: http://www.loria.fr/~zimmerma/records/Pplus1.html ############################################################################## 10. Known problems. On some machines, GMP-ECM uses the clock() function to measure the cpu time used by step 1 and 2. Since that function returns a 32-bit integer, there is a possible wrap-around effect when the clock() value goes back from 2^32-1 to 0, which may produce negative timings. The NTT code uses primes that fit in one machine word and that are congruent to 1 (mod l), where l is the largest transform length required for the desired stage 2 parameters. For very large B2 on 32-bit machines, there may not be enough suitable primes, which may limit the possible transform length to less than what available memory would permit. This problem occurs mostly in the fast stage 2 for P-1 and P+1, as the generic stage 2 uses far more memory for a given polynomial degree, so that memory on a 32-bit machine will be exhausted long before suitable NTT primes are. The maximal transform length depends on the size of the input number. For a transform length l on a 32 bit machine, N must satisfy: l=2^11:N<2^756200, l=2^12:N<2^379353, l=2^13:N<2^190044, l=2^14:N<2^94870, l=2^15:N<2^47414, l=2^16:N<2^23322, l=2^17:N<2^11620, l=2^18:N<2^5891, l=2^19:N<2^2910, l=2^20:N<2^1340, l=2^21:N<2^578, l=2^22:N<2^228. Since log(N)*l is approximately constant, this limits the amount of memory that can be used to about 600MB for P-1, and 1200MB for P+1. ecm-6.4.4/config.guess0000755023561000001540000012743212106744312011544 00000000000000#! /bin/sh # Attempt to guess a canonical system name. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, # 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, # 2011, 2012 Free Software Foundation, Inc. timestamp='2012-02-10' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, see . # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # Originally written by Per Bothner. Please send patches (context # diff format) to and include a ChangeLog # entry. # # This script attempts to guess a canonical system name similar to # config.sub. If it succeeds, it prints the system name on stdout, and # exits with 0. Otherwise, it exits with 1. # # You can get the latest version of this script from: # http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD me=`echo "$0" | sed -e 's,.*/,,'` usage="\ Usage: $0 [OPTION] Output the configuration name of the system \`$me' is run on. Operation modes: -h, --help print this help, then exit -t, --time-stamp print date of last modification, then exit -v, --version print version number, then exit Report bugs and patches to ." version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." help=" Try \`$me --help' for more information." # Parse command line while test $# -gt 0 ; do case $1 in --time-stamp | --time* | -t ) echo "$timestamp" ; exit ;; --version | -v ) echo "$version" ; exit ;; --help | --h* | -h ) echo "$usage"; exit ;; -- ) # Stop option processing shift; break ;; - ) # Use stdin as input. break ;; -* ) echo "$me: invalid option $1$help" >&2 exit 1 ;; * ) break ;; esac done if test $# != 0; then echo "$me: too many arguments$help" >&2 exit 1 fi trap 'exit 1' 1 2 15 # CC_FOR_BUILD -- compiler used by this script. Note that the use of a # compiler to aid in system detection is discouraged as it requires # temporary files to be created and, as you can see below, it is a # headache to deal with in a portable fashion. # Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still # use `HOST_CC' if defined, but it is deprecated. # Portable tmp directory creation inspired by the Autoconf team. set_cc_for_build=' trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; : ${TMPDIR=/tmp} ; { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; dummy=$tmp/dummy ; tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; case $CC_FOR_BUILD,$HOST_CC,$CC in ,,) echo "int x;" > $dummy.c ; for c in cc gcc c89 c99 ; do if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then CC_FOR_BUILD="$c"; break ; fi ; done ; if test x"$CC_FOR_BUILD" = x ; then CC_FOR_BUILD=no_compiler_found ; fi ;; ,,*) CC_FOR_BUILD=$CC ;; ,*,*) CC_FOR_BUILD=$HOST_CC ;; esac ; set_cc_for_build= ;' # This is needed to find uname on a Pyramid OSx when run in the BSD universe. # (ghazi@noc.rutgers.edu 1994-08-24) if (test -f /.attbin/uname) >/dev/null 2>&1 ; then PATH=$PATH:/.attbin ; export PATH fi UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown # Note: order is significant - the case branches are not exclusive. case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in *:NetBSD:*:*) # NetBSD (nbsd) targets should (where applicable) match one or # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently # switched to ELF, *-*-netbsd* would select the old # object file format. This provides both forward # compatibility and a consistent mechanism for selecting the # object file format. # # Note: NetBSD doesn't particularly care about the vendor # portion of the name. We always set it to "unknown". sysctl="sysctl -n hw.machine_arch" UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ /usr/sbin/$sysctl 2>/dev/null || echo unknown)` case "${UNAME_MACHINE_ARCH}" in armeb) machine=armeb-unknown ;; arm*) machine=arm-unknown ;; sh3el) machine=shl-unknown ;; sh3eb) machine=sh-unknown ;; sh5el) machine=sh5le-unknown ;; *) machine=${UNAME_MACHINE_ARCH}-unknown ;; esac # The Operating System including object format, if it has switched # to ELF recently, or will in the future. case "${UNAME_MACHINE_ARCH}" in arm*|i386|m68k|ns32k|sh3*|sparc|vax) eval $set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ELF__ then # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). # Return netbsd for either. FIX? os=netbsd else os=netbsdelf fi ;; *) os=netbsd ;; esac # The OS release # Debian GNU/NetBSD machines have a different userland, and # thus, need a distinct triplet. However, they do not need # kernel version information, so it can be replaced with a # suitable tag, in the style of linux-gnu. case "${UNAME_VERSION}" in Debian*) release='-gnu' ;; *) release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` ;; esac # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: # contains redundant information, the shorter form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. echo "${machine}-${os}${release}" exit ;; *:OpenBSD:*:*) UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} exit ;; *:ekkoBSD:*:*) echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} exit ;; *:SolidBSD:*:*) echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} exit ;; macppc:MirBSD:*:*) echo powerpc-unknown-mirbsd${UNAME_RELEASE} exit ;; *:MirBSD:*:*) echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} exit ;; alpha:OSF1:*:*) case $UNAME_RELEASE in *4.0) UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` ;; *5.*) UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` ;; esac # According to Compaq, /usr/sbin/psrinfo has been available on # OSF/1 and Tru64 systems produced since 1995. I hope that # covers most systems running today. This code pipes the CPU # types through head -n 1, so we only detect the type of CPU 0. ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` case "$ALPHA_CPU_TYPE" in "EV4 (21064)") UNAME_MACHINE="alpha" ;; "EV4.5 (21064)") UNAME_MACHINE="alpha" ;; "LCA4 (21066/21068)") UNAME_MACHINE="alpha" ;; "EV5 (21164)") UNAME_MACHINE="alphaev5" ;; "EV5.6 (21164A)") UNAME_MACHINE="alphaev56" ;; "EV5.6 (21164PC)") UNAME_MACHINE="alphapca56" ;; "EV5.7 (21164PC)") UNAME_MACHINE="alphapca57" ;; "EV6 (21264)") UNAME_MACHINE="alphaev6" ;; "EV6.7 (21264A)") UNAME_MACHINE="alphaev67" ;; "EV6.8CB (21264C)") UNAME_MACHINE="alphaev68" ;; "EV6.8AL (21264B)") UNAME_MACHINE="alphaev68" ;; "EV6.8CX (21264D)") UNAME_MACHINE="alphaev68" ;; "EV6.9A (21264/EV69A)") UNAME_MACHINE="alphaev69" ;; "EV7 (21364)") UNAME_MACHINE="alphaev7" ;; "EV7.9 (21364A)") UNAME_MACHINE="alphaev79" ;; esac # A Pn.n version is a patched version. # A Vn.n version is a released version. # A Tn.n version is a released field test version. # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` # Reset EXIT trap before exiting to avoid spurious non-zero exit code. exitcode=$? trap '' 0 exit $exitcode ;; Alpha\ *:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # Should we change UNAME_MACHINE based on the output of uname instead # of the specific Alpha model? echo alpha-pc-interix exit ;; 21064:Windows_NT:50:3) echo alpha-dec-winnt3.5 exit ;; Amiga*:UNIX_System_V:4.0:*) echo m68k-unknown-sysv4 exit ;; *:[Aa]miga[Oo][Ss]:*:*) echo ${UNAME_MACHINE}-unknown-amigaos exit ;; *:[Mm]orph[Oo][Ss]:*:*) echo ${UNAME_MACHINE}-unknown-morphos exit ;; *:OS/390:*:*) echo i370-ibm-openedition exit ;; *:z/VM:*:*) echo s390-ibm-zvmoe exit ;; *:OS400:*:*) echo powerpc-ibm-os400 exit ;; arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) echo arm-acorn-riscix${UNAME_RELEASE} exit ;; arm:riscos:*:*|arm:RISCOS:*:*) echo arm-unknown-riscos exit ;; SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) echo hppa1.1-hitachi-hiuxmpp exit ;; Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. if test "`(/bin/universe) 2>/dev/null`" = att ; then echo pyramid-pyramid-sysv3 else echo pyramid-pyramid-bsd fi exit ;; NILE*:*:*:dcosx) echo pyramid-pyramid-svr4 exit ;; DRS?6000:unix:4.0:6*) echo sparc-icl-nx6 exit ;; DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) case `/usr/bin/uname -p` in sparc) echo sparc-icl-nx7; exit ;; esac ;; s390x:SunOS:*:*) echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; sun4H:SunOS:5.*:*) echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) echo i386-pc-auroraux${UNAME_RELEASE} exit ;; i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) eval $set_cc_for_build SUN_ARCH="i386" # If there is a compiler, see if it is configured for 64-bit objects. # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. # This test works for both compilers. if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then SUN_ARCH="x86_64" fi fi echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; sun4*:SunOS:6*:*) # According to config.sub, this is the proper way to canonicalize # SunOS6. Hard to guess exactly what SunOS6 will be like, but # it's likely to be more like Solaris than SunOS4. echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; sun4*:SunOS:*:*) case "`/usr/bin/arch -k`" in Series*|S4*) UNAME_RELEASE=`uname -v` ;; esac # Japanese Language versions have a version number like `4.1.3-JL'. echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` exit ;; sun3*:SunOS:*:*) echo m68k-sun-sunos${UNAME_RELEASE} exit ;; sun*:*:4.2BSD:*) UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 case "`/bin/arch`" in sun3) echo m68k-sun-sunos${UNAME_RELEASE} ;; sun4) echo sparc-sun-sunos${UNAME_RELEASE} ;; esac exit ;; aushp:SunOS:*:*) echo sparc-auspex-sunos${UNAME_RELEASE} exit ;; # The situation for MiNT is a little confusing. The machine name # can be virtually everything (everything which is not # "atarist" or "atariste" at least should have a processor # > m68000). The system name ranges from "MiNT" over "FreeMiNT" # to the lowercase version "mint" (or "freemint"). Finally # the system name "TOS" denotes a system which is actually not # MiNT. But MiNT is downward compatible to TOS, so this should # be no problem. atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} exit ;; atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} exit ;; *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} exit ;; milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) echo m68k-milan-mint${UNAME_RELEASE} exit ;; hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) echo m68k-hades-mint${UNAME_RELEASE} exit ;; *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) echo m68k-unknown-mint${UNAME_RELEASE} exit ;; m68k:machten:*:*) echo m68k-apple-machten${UNAME_RELEASE} exit ;; powerpc:machten:*:*) echo powerpc-apple-machten${UNAME_RELEASE} exit ;; RISC*:Mach:*:*) echo mips-dec-mach_bsd4.3 exit ;; RISC*:ULTRIX:*:*) echo mips-dec-ultrix${UNAME_RELEASE} exit ;; VAX*:ULTRIX*:*:*) echo vax-dec-ultrix${UNAME_RELEASE} exit ;; 2020:CLIX:*:* | 2430:CLIX:*:*) echo clipper-intergraph-clix${UNAME_RELEASE} exit ;; mips:*:*:UMIPS | mips:*:*:RISCos) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #ifdef __cplusplus #include /* for printf() prototype */ int main (int argc, char *argv[]) { #else int main (argc, argv) int argc; char *argv[]; { #endif #if defined (host_mips) && defined (MIPSEB) #if defined (SYSTYPE_SYSV) printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); #endif #if defined (SYSTYPE_SVR4) printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); #endif #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); #endif #endif exit (-1); } EOF $CC_FOR_BUILD -o $dummy $dummy.c && dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && SYSTEM_NAME=`$dummy $dummyarg` && { echo "$SYSTEM_NAME"; exit; } echo mips-mips-riscos${UNAME_RELEASE} exit ;; Motorola:PowerMAX_OS:*:*) echo powerpc-motorola-powermax exit ;; Motorola:*:4.3:PL8-*) echo powerpc-harris-powermax exit ;; Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) echo powerpc-harris-powermax exit ;; Night_Hawk:Power_UNIX:*:*) echo powerpc-harris-powerunix exit ;; m88k:CX/UX:7*:*) echo m88k-harris-cxux7 exit ;; m88k:*:4*:R4*) echo m88k-motorola-sysv4 exit ;; m88k:*:3*:R3*) echo m88k-motorola-sysv3 exit ;; AViiON:dgux:*:*) # DG/UX returns AViiON for all architectures UNAME_PROCESSOR=`/usr/bin/uname -p` if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] then if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ [ ${TARGET_BINARY_INTERFACE}x = x ] then echo m88k-dg-dgux${UNAME_RELEASE} else echo m88k-dg-dguxbcs${UNAME_RELEASE} fi else echo i586-dg-dgux${UNAME_RELEASE} fi exit ;; M88*:DolphinOS:*:*) # DolphinOS (SVR3) echo m88k-dolphin-sysv3 exit ;; M88*:*:R3*:*) # Delta 88k system running SVR3 echo m88k-motorola-sysv3 exit ;; XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) echo m88k-tektronix-sysv3 exit ;; Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) echo m68k-tektronix-bsd exit ;; *:IRIX*:*:*) echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` exit ;; ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' i*86:AIX:*:*) echo i386-ibm-aix exit ;; ia64:AIX:*:*) if [ -x /usr/bin/oslevel ] ; then IBM_REV=`/usr/bin/oslevel` else IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} exit ;; *:AIX:2:3) if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #include main() { if (!__power_pc()) exit(1); puts("powerpc-ibm-aix3.2.5"); exit(0); } EOF if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` then echo "$SYSTEM_NAME" else echo rs6000-ibm-aix3.2.5 fi elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then echo rs6000-ibm-aix3.2.4 else echo rs6000-ibm-aix3.2 fi exit ;; *:AIX:*:[4567]) IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then IBM_ARCH=rs6000 else IBM_ARCH=powerpc fi if [ -x /usr/bin/oslevel ] ; then IBM_REV=`/usr/bin/oslevel` else IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi echo ${IBM_ARCH}-ibm-aix${IBM_REV} exit ;; *:AIX:*:*) echo rs6000-ibm-aix exit ;; ibmrt:4.4BSD:*|romp-ibm:BSD:*) echo romp-ibm-bsd4.4 exit ;; ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to exit ;; # report: romp-ibm BSD 4.3 *:BOSX:*:*) echo rs6000-bull-bosx exit ;; DPX/2?00:B.O.S.:*:*) echo m68k-bull-sysv3 exit ;; 9000/[34]??:4.3bsd:1.*:*) echo m68k-hp-bsd exit ;; hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) echo m68k-hp-bsd4.4 exit ;; 9000/[34678]??:HP-UX:*:*) HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` case "${UNAME_MACHINE}" in 9000/31? ) HP_ARCH=m68000 ;; 9000/[34]?? ) HP_ARCH=m68k ;; 9000/[678][0-9][0-9]) if [ -x /usr/bin/getconf ]; then sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` case "${sc_cpu_version}" in 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 532) # CPU_PA_RISC2_0 case "${sc_kernel_bits}" in 32) HP_ARCH="hppa2.0n" ;; 64) HP_ARCH="hppa2.0w" ;; '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 esac ;; esac fi if [ "${HP_ARCH}" = "" ]; then eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #define _HPUX_SOURCE #include #include int main () { #if defined(_SC_KERNEL_BITS) long bits = sysconf(_SC_KERNEL_BITS); #endif long cpu = sysconf (_SC_CPU_VERSION); switch (cpu) { case CPU_PA_RISC1_0: puts ("hppa1.0"); break; case CPU_PA_RISC1_1: puts ("hppa1.1"); break; case CPU_PA_RISC2_0: #if defined(_SC_KERNEL_BITS) switch (bits) { case 64: puts ("hppa2.0w"); break; case 32: puts ("hppa2.0n"); break; default: puts ("hppa2.0"); break; } break; #else /* !defined(_SC_KERNEL_BITS) */ puts ("hppa2.0"); break; #endif default: puts ("hppa1.0"); break; } exit (0); } EOF (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` test -z "$HP_ARCH" && HP_ARCH=hppa fi ;; esac if [ ${HP_ARCH} = "hppa2.0w" ] then eval $set_cc_for_build # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler # generating 64-bit code. GNU and HP use different nomenclature: # # $ CC_FOR_BUILD=cc ./config.guess # => hppa2.0w-hp-hpux11.23 # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess # => hppa64-hp-hpux11.23 if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | grep -q __LP64__ then HP_ARCH="hppa2.0w" else HP_ARCH="hppa64" fi fi echo ${HP_ARCH}-hp-hpux${HPUX_REV} exit ;; ia64:HP-UX:*:*) HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` echo ia64-hp-hpux${HPUX_REV} exit ;; 3050*:HI-UX:*:*) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #include int main () { long cpu = sysconf (_SC_CPU_VERSION); /* The order matters, because CPU_IS_HP_MC68K erroneously returns true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct results, however. */ if (CPU_IS_PA_RISC (cpu)) { switch (cpu) { case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; default: puts ("hppa-hitachi-hiuxwe2"); break; } } else if (CPU_IS_HP_MC68K (cpu)) puts ("m68k-hitachi-hiuxwe2"); else puts ("unknown-hitachi-hiuxwe2"); exit (0); } EOF $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && { echo "$SYSTEM_NAME"; exit; } echo unknown-hitachi-hiuxwe2 exit ;; 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) echo hppa1.1-hp-bsd exit ;; 9000/8??:4.3bsd:*:*) echo hppa1.0-hp-bsd exit ;; *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) echo hppa1.0-hp-mpeix exit ;; hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) echo hppa1.1-hp-osf exit ;; hp8??:OSF1:*:*) echo hppa1.0-hp-osf exit ;; i*86:OSF1:*:*) if [ -x /usr/sbin/sysversion ] ; then echo ${UNAME_MACHINE}-unknown-osf1mk else echo ${UNAME_MACHINE}-unknown-osf1 fi exit ;; parisc*:Lites*:*:*) echo hppa1.1-hp-lites exit ;; C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) echo c1-convex-bsd exit ;; C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi exit ;; C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) echo c34-convex-bsd exit ;; C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) echo c38-convex-bsd exit ;; C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) echo c4-convex-bsd exit ;; CRAY*Y-MP:*:*:*) echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; CRAY*[A-Z]90:*:*:*) echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ -e 's/\.[^.]*$/.X/' exit ;; CRAY*TS:*:*:*) echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; CRAY*T3E:*:*:*) echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; CRAY*SV1:*:*:*) echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; *:UNICOS/mp:*:*) echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; 5000:UNIX_System_V:4.*:*) FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} exit ;; sparc*:BSD/OS:*:*) echo sparc-unknown-bsdi${UNAME_RELEASE} exit ;; *:BSD/OS:*:*) echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} exit ;; *:FreeBSD:*:*) UNAME_PROCESSOR=`/usr/bin/uname -p` case ${UNAME_PROCESSOR} in amd64) echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; *) echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; esac exit ;; i*:CYGWIN*:*) echo ${UNAME_MACHINE}-pc-cygwin exit ;; *:MINGW*:*) echo ${UNAME_MACHINE}-pc-mingw32 exit ;; i*:MSYS*:*) echo ${UNAME_MACHINE}-pc-msys exit ;; i*:windows32*:*) # uname -m includes "-pc" on this system. echo ${UNAME_MACHINE}-mingw32 exit ;; i*:PW*:*) echo ${UNAME_MACHINE}-pc-pw32 exit ;; *:Interix*:*) case ${UNAME_MACHINE} in x86) echo i586-pc-interix${UNAME_RELEASE} exit ;; authenticamd | genuineintel | EM64T) echo x86_64-unknown-interix${UNAME_RELEASE} exit ;; IA64) echo ia64-unknown-interix${UNAME_RELEASE} exit ;; esac ;; [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) echo i${UNAME_MACHINE}-pc-mks exit ;; 8664:Windows_NT:*) echo x86_64-pc-mks exit ;; i*:Windows_NT*:* | Pentium*:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we # UNAME_MACHINE based on the output of uname instead of i386? echo i586-pc-interix exit ;; i*:UWIN*:*) echo ${UNAME_MACHINE}-pc-uwin exit ;; amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) echo x86_64-unknown-cygwin exit ;; p*:CYGWIN*:*) echo powerpcle-unknown-cygwin exit ;; prep*:SunOS:5.*:*) echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; *:GNU:*:*) # the GNU system echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` exit ;; *:GNU/*:*:*) # other systems with GNU libc and userland echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu exit ;; i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix exit ;; aarch64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; aarch64_be:Linux:*:*) UNAME_MACHINE=aarch64_be echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; alpha:Linux:*:*) case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in EV5) UNAME_MACHINE=alphaev5 ;; EV56) UNAME_MACHINE=alphaev56 ;; PCA56) UNAME_MACHINE=alphapca56 ;; PCA57) UNAME_MACHINE=alphapca56 ;; EV6) UNAME_MACHINE=alphaev6 ;; EV67) UNAME_MACHINE=alphaev67 ;; EV68*) UNAME_MACHINE=alphaev68 ;; esac objdump --private-headers /bin/sh | grep -q ld.so.1 if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} exit ;; arm*:Linux:*:*) eval $set_cc_for_build if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ARM_EABI__ then echo ${UNAME_MACHINE}-unknown-linux-gnu else if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ARM_PCS_VFP then echo ${UNAME_MACHINE}-unknown-linux-gnueabi else echo ${UNAME_MACHINE}-unknown-linux-gnueabihf fi fi exit ;; avr32*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; cris:Linux:*:*) echo ${UNAME_MACHINE}-axis-linux-gnu exit ;; crisv32:Linux:*:*) echo ${UNAME_MACHINE}-axis-linux-gnu exit ;; frv:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; hexagon:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; i*86:Linux:*:*) LIBC=gnu eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #ifdef __dietlibc__ LIBC=dietlibc #endif EOF eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'` echo "${UNAME_MACHINE}-pc-linux-${LIBC}" exit ;; ia64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; m32r*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; m68*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; mips:Linux:*:* | mips64:Linux:*:*) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #undef CPU #undef ${UNAME_MACHINE} #undef ${UNAME_MACHINE}el #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) CPU=${UNAME_MACHINE}el #else #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) CPU=${UNAME_MACHINE} #else CPU= #endif #endif EOF eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } ;; or32:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; padre:Linux:*:*) echo sparc-unknown-linux-gnu exit ;; parisc64:Linux:*:* | hppa64:Linux:*:*) echo hppa64-unknown-linux-gnu exit ;; parisc:Linux:*:* | hppa:Linux:*:*) # Look for CPU level case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in PA7*) echo hppa1.1-unknown-linux-gnu ;; PA8*) echo hppa2.0-unknown-linux-gnu ;; *) echo hppa-unknown-linux-gnu ;; esac exit ;; ppc64:Linux:*:*) echo powerpc64-unknown-linux-gnu exit ;; ppc:Linux:*:*) echo powerpc-unknown-linux-gnu exit ;; s390:Linux:*:* | s390x:Linux:*:*) echo ${UNAME_MACHINE}-ibm-linux exit ;; sh64*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; sh*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; sparc:Linux:*:* | sparc64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; tile*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; vax:Linux:*:*) echo ${UNAME_MACHINE}-dec-linux-gnu exit ;; x86_64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; xtensa*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; i*86:DYNIX/ptx:4*:*) # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. # earlier versions are messed up and put the nodename in both # sysname and nodename. echo i386-sequent-sysv4 exit ;; i*86:UNIX_SV:4.2MP:2.*) # Unixware is an offshoot of SVR4, but it has its own version # number series starting with 2... # I am not positive that other SVR4 systems won't match this, # I just have to hope. -- rms. # Use sysv4.2uw... so that sysv4* matches it. echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} exit ;; i*86:OS/2:*:*) # If we were able to find `uname', then EMX Unix compatibility # is probably installed. echo ${UNAME_MACHINE}-pc-os2-emx exit ;; i*86:XTS-300:*:STOP) echo ${UNAME_MACHINE}-unknown-stop exit ;; i*86:atheos:*:*) echo ${UNAME_MACHINE}-unknown-atheos exit ;; i*86:syllable:*:*) echo ${UNAME_MACHINE}-pc-syllable exit ;; i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) echo i386-unknown-lynxos${UNAME_RELEASE} exit ;; i*86:*DOS:*:*) echo ${UNAME_MACHINE}-pc-msdosdjgpp exit ;; i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} else echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} fi exit ;; i*86:*:5:[678]*) # UnixWare 7.x, OpenUNIX and OpenServer 6. case `/bin/uname -X | grep "^Machine"` in *486*) UNAME_MACHINE=i486 ;; *Pentium) UNAME_MACHINE=i586 ;; *Pent*|*Celeron) UNAME_MACHINE=i686 ;; esac echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} exit ;; i*86:*:3.2:*) if test -f /usr/options/cb.name; then UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ && UNAME_MACHINE=i586 (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ && UNAME_MACHINE=i686 (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ && UNAME_MACHINE=i686 echo ${UNAME_MACHINE}-pc-sco$UNAME_REL else echo ${UNAME_MACHINE}-pc-sysv32 fi exit ;; pc:*:*:*) # Left here for compatibility: # uname -m prints for DJGPP always 'pc', but it prints nothing about # the processor, so we play safe by assuming i586. # Note: whatever this is, it MUST be the same as what config.sub # prints for the "djgpp" host, or else GDB configury will decide that # this is a cross-build. echo i586-pc-msdosdjgpp exit ;; Intel:Mach:3*:*) echo i386-pc-mach3 exit ;; paragon:*:*:*) echo i860-intel-osf1 exit ;; i860:*:4.*:*) # i860-SVR4 if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 else # Add other i860-SVR4 vendors below as they are discovered. echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 fi exit ;; mini*:CTIX:SYS*5:*) # "miniframe" echo m68010-convergent-sysv exit ;; mc68k:UNIX:SYSTEM5:3.51m) echo m68k-convergent-sysv exit ;; M680?0:D-NIX:5.3:*) echo m68k-diab-dnix exit ;; M68*:*:R3V[5678]*:*) test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) OS_REL='' test -r /etc/.relid \ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ && { echo i486-ncr-sysv4.3${OS_REL}; exit; } /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ && { echo i486-ncr-sysv4; exit; } ;; NCR*:*:4.2:* | MPRAS*:*:4.2:*) OS_REL='.3' test -r /etc/.relid \ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ && { echo i486-ncr-sysv4.3${OS_REL}; exit; } /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) echo m68k-unknown-lynxos${UNAME_RELEASE} exit ;; mc68030:UNIX_System_V:4.*:*) echo m68k-atari-sysv4 exit ;; TSUNAMI:LynxOS:2.*:*) echo sparc-unknown-lynxos${UNAME_RELEASE} exit ;; rs6000:LynxOS:2.*:*) echo rs6000-unknown-lynxos${UNAME_RELEASE} exit ;; PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) echo powerpc-unknown-lynxos${UNAME_RELEASE} exit ;; SM[BE]S:UNIX_SV:*:*) echo mips-dde-sysv${UNAME_RELEASE} exit ;; RM*:ReliantUNIX-*:*:*) echo mips-sni-sysv4 exit ;; RM*:SINIX-*:*:*) echo mips-sni-sysv4 exit ;; *:SINIX-*:*:*) if uname -p 2>/dev/null >/dev/null ; then UNAME_MACHINE=`(uname -p) 2>/dev/null` echo ${UNAME_MACHINE}-sni-sysv4 else echo ns32k-sni-sysv fi exit ;; PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort # says echo i586-unisys-sysv4 exit ;; *:UNIX_System_V:4*:FTX*) # From Gerald Hewes . # How about differentiating between stratus architectures? -djm echo hppa1.1-stratus-sysv4 exit ;; *:*:*:FTX*) # From seanf@swdc.stratus.com. echo i860-stratus-sysv4 exit ;; i*86:VOS:*:*) # From Paul.Green@stratus.com. echo ${UNAME_MACHINE}-stratus-vos exit ;; *:VOS:*:*) # From Paul.Green@stratus.com. echo hppa1.1-stratus-vos exit ;; mc68*:A/UX:*:*) echo m68k-apple-aux${UNAME_RELEASE} exit ;; news*:NEWS-OS:6*:*) echo mips-sony-newsos6 exit ;; R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) if [ -d /usr/nec ]; then echo mips-nec-sysv${UNAME_RELEASE} else echo mips-unknown-sysv${UNAME_RELEASE} fi exit ;; BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. echo powerpc-be-beos exit ;; BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. echo powerpc-apple-beos exit ;; BePC:BeOS:*:*) # BeOS running on Intel PC compatible. echo i586-pc-beos exit ;; BePC:Haiku:*:*) # Haiku running on Intel PC compatible. echo i586-pc-haiku exit ;; SX-4:SUPER-UX:*:*) echo sx4-nec-superux${UNAME_RELEASE} exit ;; SX-5:SUPER-UX:*:*) echo sx5-nec-superux${UNAME_RELEASE} exit ;; SX-6:SUPER-UX:*:*) echo sx6-nec-superux${UNAME_RELEASE} exit ;; SX-7:SUPER-UX:*:*) echo sx7-nec-superux${UNAME_RELEASE} exit ;; SX-8:SUPER-UX:*:*) echo sx8-nec-superux${UNAME_RELEASE} exit ;; SX-8R:SUPER-UX:*:*) echo sx8r-nec-superux${UNAME_RELEASE} exit ;; Power*:Rhapsody:*:*) echo powerpc-apple-rhapsody${UNAME_RELEASE} exit ;; *:Rhapsody:*:*) echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} exit ;; *:Darwin:*:*) UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown case $UNAME_PROCESSOR in i386) eval $set_cc_for_build if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then UNAME_PROCESSOR="x86_64" fi fi ;; unknown) UNAME_PROCESSOR=powerpc ;; esac echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} exit ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) UNAME_PROCESSOR=`uname -p` if test "$UNAME_PROCESSOR" = "x86"; then UNAME_PROCESSOR=i386 UNAME_MACHINE=pc fi echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} exit ;; *:QNX:*:4*) echo i386-pc-qnx exit ;; NEO-?:NONSTOP_KERNEL:*:*) echo neo-tandem-nsk${UNAME_RELEASE} exit ;; NSE-?:NONSTOP_KERNEL:*:*) echo nse-tandem-nsk${UNAME_RELEASE} exit ;; NSR-?:NONSTOP_KERNEL:*:*) echo nsr-tandem-nsk${UNAME_RELEASE} exit ;; *:NonStop-UX:*:*) echo mips-compaq-nonstopux exit ;; BS2000:POSIX*:*:*) echo bs2000-siemens-sysv exit ;; DS/*:UNIX_System_V:*:*) echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} exit ;; *:Plan9:*:*) # "uname -m" is not consistent, so use $cputype instead. 386 # is converted to i386 for consistency with other x86 # operating systems. if test "$cputype" = "386"; then UNAME_MACHINE=i386 else UNAME_MACHINE="$cputype" fi echo ${UNAME_MACHINE}-unknown-plan9 exit ;; *:TOPS-10:*:*) echo pdp10-unknown-tops10 exit ;; *:TENEX:*:*) echo pdp10-unknown-tenex exit ;; KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) echo pdp10-dec-tops20 exit ;; XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) echo pdp10-xkl-tops20 exit ;; *:TOPS-20:*:*) echo pdp10-unknown-tops20 exit ;; *:ITS:*:*) echo pdp10-unknown-its exit ;; SEI:*:*:SEIUX) echo mips-sei-seiux${UNAME_RELEASE} exit ;; *:DragonFly:*:*) echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` exit ;; *:*VMS:*:*) UNAME_MACHINE=`(uname -p) 2>/dev/null` case "${UNAME_MACHINE}" in A*) echo alpha-dec-vms ; exit ;; I*) echo ia64-dec-vms ; exit ;; V*) echo vax-dec-vms ; exit ;; esac ;; *:XENIX:*:SysV) echo i386-pc-xenix exit ;; i*86:skyos:*:*) echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' exit ;; i*86:rdos:*:*) echo ${UNAME_MACHINE}-pc-rdos exit ;; i*86:AROS:*:*) echo ${UNAME_MACHINE}-pc-aros exit ;; x86_64:VMkernel:*:*) echo ${UNAME_MACHINE}-unknown-esx exit ;; esac #echo '(No uname command or uname output not recognized.)' 1>&2 #echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 eval $set_cc_for_build cat >$dummy.c < # include #endif main () { #if defined (sony) #if defined (MIPSEB) /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, I don't know.... */ printf ("mips-sony-bsd\n"); exit (0); #else #include printf ("m68k-sony-newsos%s\n", #ifdef NEWSOS4 "4" #else "" #endif ); exit (0); #endif #endif #if defined (__arm) && defined (__acorn) && defined (__unix) printf ("arm-acorn-riscix\n"); exit (0); #endif #if defined (hp300) && !defined (hpux) printf ("m68k-hp-bsd\n"); exit (0); #endif #if defined (NeXT) #if !defined (__ARCHITECTURE__) #define __ARCHITECTURE__ "m68k" #endif int version; version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; if (version < 4) printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); else printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); exit (0); #endif #if defined (MULTIMAX) || defined (n16) #if defined (UMAXV) printf ("ns32k-encore-sysv\n"); exit (0); #else #if defined (CMU) printf ("ns32k-encore-mach\n"); exit (0); #else printf ("ns32k-encore-bsd\n"); exit (0); #endif #endif #endif #if defined (__386BSD__) printf ("i386-pc-bsd\n"); exit (0); #endif #if defined (sequent) #if defined (i386) printf ("i386-sequent-dynix\n"); exit (0); #endif #if defined (ns32000) printf ("ns32k-sequent-dynix\n"); exit (0); #endif #endif #if defined (_SEQUENT_) struct utsname un; uname(&un); if (strncmp(un.version, "V2", 2) == 0) { printf ("i386-sequent-ptx2\n"); exit (0); } if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ printf ("i386-sequent-ptx1\n"); exit (0); } printf ("i386-sequent-ptx\n"); exit (0); #endif #if defined (vax) # if !defined (ultrix) # include # if defined (BSD) # if BSD == 43 printf ("vax-dec-bsd4.3\n"); exit (0); # else # if BSD == 199006 printf ("vax-dec-bsd4.3reno\n"); exit (0); # else printf ("vax-dec-bsd\n"); exit (0); # endif # endif # else printf ("vax-dec-bsd\n"); exit (0); # endif # else printf ("vax-dec-ultrix\n"); exit (0); # endif #endif #if defined (alliant) && defined (i860) printf ("i860-alliant-bsd\n"); exit (0); #endif exit (1); } EOF $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && { echo "$SYSTEM_NAME"; exit; } # Apollos put the system type in the environment. test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } # Convex versions that predate uname can use getsysinfo(1) if [ -x /usr/convex/getsysinfo ] then case `getsysinfo -f cpu_type` in c1*) echo c1-convex-bsd exit ;; c2*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi exit ;; c34*) echo c34-convex-bsd exit ;; c38*) echo c38-convex-bsd exit ;; c4*) echo c4-convex-bsd exit ;; esac fi cat >&2 < in order to provide the needed information to handle your system. config.guess timestamp = $timestamp uname -m = `(uname -m) 2>/dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` /bin/uname -X = `(/bin/uname -X) 2>/dev/null` hostinfo = `(hostinfo) 2>/dev/null` /bin/universe = `(/bin/universe) 2>/dev/null` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` /bin/arch = `(/bin/arch) 2>/dev/null` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` UNAME_MACHINE = ${UNAME_MACHINE} UNAME_RELEASE = ${UNAME_RELEASE} UNAME_SYSTEM = ${UNAME_SYSTEM} UNAME_VERSION = ${UNAME_VERSION} EOF exit 1 # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "timestamp='" # time-stamp-format: "%:y-%02m-%02d" # time-stamp-end: "'" # End: ecm-6.4.4/factor.c0000644023561000001540000001003412106741273010636 00000000000000/* factor.c - public interface for libecm. Copyright 2005, 2006, 2007, 2009, 2011 Paul Zimmermann, Alexander Kruppa, David Cleaver, Cyril Bouvier. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include "ecm-impl.h" void ecm_init (ecm_params q) { q->method = ECM_ECM; /* default method */ MEMORY_TAG; mpz_init_set_ui (q->x, 0); mpz_init_set_ui (q->sigma, 0); q->sigma_is_A = 0; mpz_init_set_ui (q->go, 1); q->B1done = ECM_DEFAULT_B1_DONE + 1. / 1048576.; mpz_init_set_si (q->B2min, -1.0); /* default: B2min will be set to B1 */ mpz_init_set_si (q->B2, ECM_DEFAULT_B2); q->k = ECM_DEFAULT_K; q->S = ECM_DEFAULT_S; /* automatic choice of polynomial */ q->repr = ECM_MOD_DEFAULT; /* automatic choice of representation */ q->nobase2step2 = 0; /* continue special base 2 code in ecm step 2, if used */ q->verbose = 0; /* no output (default in library mode) */ q->os = stdout; /* standard output */ q->es = stderr; /* error output */ q->chkfilename = NULL; q->TreeFilename = NULL; q->maxmem = 0.0; q->stage1time = 0.0; MEMORY_TAG; gmp_randinit_default (q->rng); MEMORY_TAG; gmp_randseed_ui (q->rng, get_random_ul ()); MEMORY_UNTAG; q->use_ntt = 1; q->stop_asap = NULL; q->batch = 0; /* no batch mode by default in library mode */ q->batch_B1 = 1.0; mpz_init_set_ui(q->batch_s, 1); q->gw_k = 0.0; q->gw_b = 0; q->gw_n = 0; q->gw_c = 0; } void ecm_clear (ecm_params q) { mpz_clear (q->x); mpz_clear (q->sigma); mpz_clear (q->go); mpz_clear (q->B2min); mpz_clear (q->B2); gmp_randclear (q->rng); mpz_clear (q->batch_s); } /* returns ECM_FACTOR_FOUND, ECM_NO_FACTOR_FOUND, or ECM_ERROR */ int ecm_factor (mpz_t f, mpz_t n, double B1, ecm_params p) { int res; /* return value */ int p_is_null; ecm_params q; double B1done, B2scale; if ((p_is_null = (p == NULL))) { p = q; ecm_init (q); } /* Ugly hack to pass B2scale to the library somehow. It gets piggy-backed onto B1done. The next major release will have to allow for variable length parameter structs. */ B1done = floor (p->B1done); B2scale = (p->B1done - B1done) * 1048576.; p->B1done = B1done; if (p->method == ECM_ECM) res = ecm (f, p->x, p->sigma, n, p->go, &(p->B1done), B1, p->B2min, p->B2, B2scale, p->k, p->S, p->verbose, p->repr, p->nobase2step2, p->use_ntt, p->sigma_is_A, p->os, p->es, p->chkfilename, p->TreeFilename, p->maxmem, p->stage1time, p->rng, p->stop_asap, p->batch, p->batch_s, p->gw_k, p->gw_b, p->gw_n, p->gw_c); else if (p->method == ECM_PM1) res = pm1 (f, p->x, n, p->go, &(p->B1done), B1, p->B2min, p->B2, B2scale, p->k, p->S, p->verbose, p->repr, p->use_ntt, p->os, p->es, p->chkfilename, p->TreeFilename, p->maxmem, p->rng, p->stop_asap); else if (p->method == ECM_PP1) res = pp1 (f, p->x, n, p->go, &(p->B1done), B1, p->B2min, p->B2, B2scale, p->k, p->S, p->verbose, p->repr, p->use_ntt, p->os, p->es, p->chkfilename, p->TreeFilename, p->maxmem, p->rng, p->stop_asap); else { fprintf (p->es, "Error, unknown method: %d\n", p->method); res = ECM_ERROR; } if (p_is_null) ecm_clear (q); return res; } ecm-6.4.4/test.pm10000755023561000001540000000736312106741274010631 00000000000000#!/bin/sh # test file for P-1 method # # Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 # Paul Zimmermann, Alexander Kruppa, Dave Newman, Jim Fougeron. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License # along with this program; see the file COPYING. If not, see # http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., # 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. PM1="$1 -pm1" # Call with "checkcode $? n" to check that return code is n # the return code is (see ecm-ecm.h): # 0: no factor found # 1: error (for example out of memory) # 2: composite factor found with composite cofactor # 6: prime factor found with composite cofactor # 8: input number found # 10: composite factor found with prime cofactor # 14: prime factor found with prime cofactor checkcode () { if [ $1 != $2 ] then echo "############### ERROR ###############" echo "Expected return code $2 but got $1" exit 1 fi } ### bug in ecm-5.0 found by Jay Berg (overflow in i0*d) echo 441995541378330835457 | $PM1 -x0 3 157080 7e9-72e8; checkcode $? 8 ### stage 2 less than 10^9. Input is prime factor of 2^731-1 ### echo 335203548019575991076297 | $PM1 -x0 2 23 31; checkcode $? 8 ### stage 2 of length 1 ### echo 335203548019575991076297 | $PM1 -x0 3 31 58766400424189339249-58766400424189339249; checkcode $? 8 # try primes < d in stage 2 echo 2050449353925555290706354283 | $PM1 -k 1 20 0-1e6; checkcode $? 14 # This factor was missed by an early development version of stage 2 echo 67872792749091946529 | $PM1 -x0 3 8467 11004397; checkcode $? 8 echo 5735039483399104015346944564789 | $PM1 1277209 9247741; checkcode $? 8 echo 620224739362954187513 | $PM1 -x0 3 668093 65087177; checkcode $? 8 echo 1405929742229533753 | $PM1 1123483 75240667; checkcode $? 8 echo 16811052664235873 | $PM1 -x0 3 19110 178253039; checkcode $? 8 echo 9110965748024759967611 | $PM1 1193119 316014211; checkcode $? 8 echo 563796628294674772855559264041716715663 | $PM1 4031563 14334623; checkcode $? 8 echo 188879386195169498836498369376071664143 | $PM1 3026227 99836987; checkcode $? 8 # factor of 909*9^909+1 found by Paul Leyland on 15 Nov 2002 echo 474476178924594486566271953891 | $PM1 9594209 519569569; checkcode $? 8 ### stage 2 less than 10^10 ### echo 2124306045220073929294177 | $PM1 290021 1193749003; checkcode $? 8 ### Try saving and resuming echo 25591172394760497166702530699464321 | $PM1 -save test.pm1.save 100000 checkcode $? 0 $PM1 -resume test.pm1.save 120557 2007301 C=$? /bin/rm -f test.pm1.save checkcode $C 8 # bug in ecm-5.0 (overflow in fin_diff_coeff) echo 504403158265489337 | $PM1 -k 4 8 9007199254740700-9007199254740900; checkcode $? 8 # check that primes near B2min are covered echo 6857 | $PM1 840 857; checkcode $? 8 # A test with a larger input number to test modular arithmetic routines not # in mulredc*.asm. This input has 1363 bits so it has 22 64-bit words # (43 32-bit words) and cannot use mulredc which handles only up to 20 limbs echo "10090030271*10^400+696212088699" | $PM1 2e3 2e6; checkcode $? 14 # check bug fixed in revision 1378 echo "2^(64*2)-1" | $PM1 -redc -x0 -1 2 1; checkcode $? 8 # check bug fixed in revision 2068 echo "234^997+997^234" | $PM1 -ntt 100 324; checkcode $? 0 echo "All P-1 tests are ok." ecm-6.4.4/memory.c0000644023561000001540000001553612106741273010704 00000000000000/* Memory allocation used during tests. Copyright 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc. This file was copied from the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU MP Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "config.h" #include #include /* for abort */ #include #include "ecm.h" void *__gmp_default_allocate (size_t); void *__gmp_default_reallocate (void *, size_t, size_t); void __gmp_default_free (void *, size_t); /* Each block allocated is a separate malloc, for the benefit of a redzoning malloc debugger during development or when bug hunting. Sizes passed when reallocating or freeing are checked (the default routines don't care about these). Memory leaks are checked by requiring that all blocks have been freed when tests_memory_end() is called. Test programs must be sure to have "clear"s for all temporary variables used. */ #define NAME_LEN 8 struct header { void *ptr; size_t size; char name[NAME_LEN]; unsigned int line; struct header *next; }; struct header *tests_memory_list = NULL; static unsigned long nr_realloc = 0, nr_realloc_move = 0; static char cur_name[NAME_LEN]; static unsigned int cur_line; static unsigned long cur_mem, peak_mem; /* Return a pointer to a pointer to the found block (so it can be updated when unlinking). */ static struct header ** tests_memory_find (void *ptr) { struct header **hp; for (hp = &tests_memory_list; *hp != NULL; hp = &((*hp)->next)) if ((*hp)->ptr == ptr) return hp; return NULL; } #if 0 static int tests_memory_valid (void *ptr) { return (tests_memory_find (ptr) != NULL); } #endif static void * tests_allocate (size_t size) { struct header *h; int i; if (size == 0) { printf ("tests_allocate(): attempt to allocate 0 bytes\n"); abort (); } if (cur_name[0] == 0) cur_name[1] = 0; /* Set breakpoint here to find untagged allocs */ h = (struct header *) __gmp_default_allocate (sizeof (*h)); h->next = tests_memory_list; tests_memory_list = h; h->size = size; h->ptr = (struct header*) __gmp_default_allocate (size); for (i = 0; i < NAME_LEN; i++) h->name[i] = cur_name[i]; h->line = cur_line; cur_mem += size; if (cur_mem > peak_mem) peak_mem = cur_mem; return h->ptr; } static void * tests_reallocate (void *ptr, size_t old_size, size_t new_size) { struct header **hp, *h; if (new_size == 0) { printf ("tests_reallocate(): attempt to reallocate 0x%lX to 0 bytes\n", (unsigned long) ptr); abort (); } hp = tests_memory_find (ptr); if (hp == NULL) { printf ("tests_reallocate(): attempt to reallocate bad pointer 0x%lX\n", (unsigned long) ptr); abort (); } h = *hp; if (h->size != old_size) { printf ("tests_reallocate(): bad old size %zd, should be %zd\n", old_size, h->size); abort (); } if (h->size > cur_mem) { printf ("tests_reallocate(): h->size = %zd but cur_mem = %lu\n", h->size, cur_mem); abort(); } cur_mem = cur_mem - h->size + new_size; if (cur_mem > peak_mem) peak_mem = cur_mem; #if 0 printf ("Reallocating %p, first allocated in %s, line %d, from %d to %d\n", ptr, h->name, h->line, h->size, new_size); if (new_size <= h->size) printf ("Unnecessary realloc!\n"); #endif nr_realloc++; h->size = new_size; h->ptr = (struct header*) __gmp_default_reallocate (ptr, old_size, new_size); if (h->ptr != ptr) nr_realloc_move++; return h->ptr; } static struct header ** tests_free_find (void *ptr) { struct header **hp = tests_memory_find (ptr); if (hp == NULL) { printf ("tests_free(): attempt to free bad pointer 0x%lX\n", (unsigned long) ptr); abort (); } return hp; } static void tests_free_nosize (void *ptr) { struct header **hp = tests_free_find (ptr); struct header *h = *hp; if (h->size > cur_mem) { printf ("tests_free_nosize(): h->size = %zd but cur_mem = %lu\n", h->size, cur_mem); abort(); } cur_mem -= h->size; *hp = h->next; /* unlink */ __gmp_default_free (ptr, h->size); __gmp_default_free (h, sizeof (*h)); } void tests_free (void *ptr, size_t size) { struct header **hp = tests_free_find (ptr); struct header *h = *hp; if (h->size != size) { printf ("tests_free(): bad size %zd, should be %zd\n", size, h->size); abort (); } tests_free_nosize (ptr); } void tests_memory_start (void) { mp_set_memory_functions (tests_allocate, tests_reallocate, tests_free); cur_name[0] = 0; cur_line = 0; cur_mem = 0L; peak_mem = 0L; } void tests_memory_reset (void) { mp_set_memory_functions (__gmp_default_allocate, __gmp_default_reallocate, __gmp_default_free); } void tests_memory_end (void) { if (tests_memory_list != NULL) { struct header *h; unsigned count; printf ("tests_memory_end(): not all memory freed\n"); count = 0; for (h = tests_memory_list; h != NULL; h = h->next) { count++; printf ("Memory at %p, allocated by %s, line %d\n", h->ptr, h->name, h->line); } printf (" %u block(s) remaining\n", count); abort (); } if (cur_mem != 0) { printf ("tests_memory_end(): cur_mem = %lu but list of allocated " "memory empty\n", cur_mem); abort (); } printf ("%lu reallocates, %lu reallocates with move, peak_mem = %lu\n", nr_realloc, nr_realloc_move, peak_mem); } void tests_memory_status (void) { unsigned count = 0, size = 0; if (tests_memory_list != NULL) { struct header *h; for (h = tests_memory_list; h != NULL; h = h->next) { count++; size += h->size; } } if (size != cur_mem) { printf ("tests_memory_status(): size = %d but cur_mem = %lu", size, cur_mem); abort(); } printf (" %u blocks remaining, total size %u\n", count, size); } void tests_memory_set_location (char *name, unsigned int line) { unsigned int i; for (i = 0; i < NAME_LEN; i++) cur_name[i] = name[i]; cur_line = line; } ecm-6.4.4/auxarith.c0000644023561000001540000000520112106741273011205 00000000000000/* Auxiliary arithmetic routines on unsigned long ints for the ecm library. Copyright 2001, 2002, 2003, 2004, 2005, 2007, 2008 Paul Zimmermann and Alexander Kruppa. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, see http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "config.h" #include "ecm-impl.h" /* Returns the gcd of a and b */ unsigned long gcd (unsigned long a, unsigned long b) { unsigned long t; while (b != 0UL) { t = a % b; a = b; b = t; } return a; } /* returns Euler's totient phi function */ unsigned long eulerphi (unsigned long n) { unsigned long phi = 1UL, p; for (p = 2UL; p * p <= n; p += 2UL) { if (n % p == 0UL) { phi *= p - 1UL; n /= p; while (n % p == 0UL) { phi *= p; n /= p; } } if (p == 2UL) p--; } /* now n is prime or 1 */ return (n == 1UL) ? phi : phi * (n - 1UL); } /* returns ceil(log(n)/log(2)) */ unsigned int ceil_log2 (unsigned long n) { unsigned int k = 0; ASSERT (n > 0UL); n--; while (n) { k++; n >>= 1; } return k; } /* Simple, slow methods for testing / finding primes */ int is_prime (const unsigned long n) { unsigned long i; if (n < 2UL) return 0; if (n % 2UL == 0UL) return n == 2UL; for (i = 3UL; i*i <= n; i += 2UL) if (n % i == 0UL) return 0; return 1; } /* Returns the smallest prime larger than n */ unsigned long next_prime (const unsigned long n) { unsigned long m; if (n < 2UL) return 2UL; if (n == 2UL) return 3UL; m = n + 2UL; while (! is_prime (m)) m += 2UL; return m; } /* Returns the smallest prime factor of N. If N == 1, return 1. */ unsigned long find_factor (const unsigned long N) { unsigned long i; ASSERT_ALWAYS (N != 0UL); if (N == 1UL) return 1UL; if (N % 2UL == 0UL) return 2UL; for (i = 3UL; i*i <= N; i += 2UL) if (N % i == 0UL) return i; return N; }