flint-1.011/0000755017361200017500000000000011035402623012513 5ustar tabbotttabbottflint-1.011/NTL-interface.cpp0000644017361200017500000000725411025357254015632 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** NTL-interface.cpp: Functions for conversion between NTL and FLINT format Copyright (C) 2007, William Hart *****************************************************************************/ #include #include #include #include #include #include #include #include "flint.h" #include "fmpz.h" #include "fmpz_poly.h" #include "NTL-interface.h" #define SIZE(p) (((long *) (p))[1]) #define DATA(p) ((mp_limb_t *) (((long *) (p)) + 2)) NTL_CLIENT unsigned long ZZ_limbs(const ZZ& z) { if (z.rep) return FLINT_ABS(SIZE(z.rep)); else return 0; } unsigned long ZZX_maxlimbs(const ZZX& z) { unsigned long length = deg(z)+1; unsigned long maxlimbs = 0; unsigned long newlimbs, i; const ZZ *ap; if (length == 0) return 0; for (i = 0, ap = z.rep.elts(); i < length; ap++, i++) { newlimbs = ZZ_limbs(*ap); if (newlimbs > maxlimbs) maxlimbs = newlimbs; } return maxlimbs; } void ZZ_to_fmpz(fmpz_t output, const ZZ& z) { _ntl_gbigint x = z.rep; if (!x) { output[0] = 0L; return; } unsigned long lw = ZZ_limbs(z); mp_limb_t *xp = DATA(x); F_mpn_copy(output + 1, xp, lw); if (z < 0L) output[0] = -lw; else output[0] = lw; } void fmpz_to_ZZ(ZZ& output, const fmpz_t z) { mp_limb_t *xp; _ntl_gbigint *x = &output.rep; long lw = FLINT_ABS(z[0]);; if (lw == 0) { if (*x) SIZE(*x) = 0; return; } _ntl_gsetlength(x, lw); xp = DATA(*x); F_mpn_copy(xp, z + 1, lw); if ((long) z[0] < 0L) SIZE(*x) = -lw; else SIZE(*x) = lw; } void fmpz_poly_to_ZZX(ZZX& output, const fmpz_poly_t poly) { unsigned long length = poly->length; unsigned long i; fmpz_t coeff; ZZ *ap; if (length == 0) { output = 0; return; } output.rep.SetLength(length); for (i = 0, ap = output.rep.elts(); i < length; ap++, i++) { coeff = fmpz_poly_get_coeff_ptr(poly, i); fmpz_to_ZZ(*ap, coeff); } } void ZZX_to_fmpz_poly(fmpz_poly_t output, const ZZX& poly) { unsigned long length = deg(poly) + 1; unsigned long limbs = ZZX_maxlimbs(poly); unsigned long i; fmpz_t coeff_f; const ZZ *ap; if (length == 0) { fmpz_poly_zero(output); return; } fmpz_poly_fit_length(output, length); fmpz_poly_fit_limbs(output, limbs); output->length = length; for (i = 0, ap = poly.rep.elts(); i < length; ap++, i++) { coeff_f = fmpz_poly_get_coeff_ptr(output, i); ZZ_to_fmpz(coeff_f, *ap); } } flint-1.011/fmpz_poly-test.c0000644017361200017500000142760311025357254015700 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** fmpz_poly-test.c: Test code for fmpz_poly.c and fmpz_poly.h Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #include #include #include #include "flint.h" #include "mpz_poly.h" #include "fmpz_poly.h" #include "memory-manager.h" #include "ZmodF_poly.h" #include "test-support.h" #include "zmod_poly.h" #define VARY_BITS 1 #define SIGNS 1 #define SPARSE 1 #define TESTFILE 0 // Set this to test polynomial reading and writing to a file in the current dir #define DEBUG 0 // prints debug information #define DEBUG2 1 unsigned long randint(unsigned long randsup) { if (randsup == 0) return 0; static unsigned long randval = 4035456057U; randval = ((unsigned long)randval*1025416097U+286824428U)%(unsigned long)4294967291U; return (unsigned long)randval%randsup; } void randpoly(mpz_poly_t pol, long length, unsigned long maxbits) { unsigned long bits; mpz_t temp; mpz_init(temp); mpz_poly_zero(pol); for (long i = 0; i < length; i++) { #if VARY_BITS bits = randint(maxbits+1); #else bits = maxbits; #endif if (bits == 0) mpz_set_ui(temp,0); else { #if SPARSE if (randint(10) == 1) mpz_rrandomb(temp, randstate, bits); else mpz_set_ui(temp, 0); #else mpz_rrandomb(temp, randstate, bits); #endif #if SIGNS if (randint(2)) mpz_neg(temp,temp); #endif } mpz_poly_set_coeff(pol, i, temp); } mpz_clear(temp); } void randpoly_unsigned(mpz_poly_t pol, long length, unsigned long maxbits) { unsigned long bits; mpz_t temp; mpz_init(temp); mpz_poly_zero(pol); for (long i = 0; i < length; i++) { #if VARY_BITS bits = randint(maxbits+1); #else bits = maxbits; #endif if (bits == 0) mpz_set_ui(temp,0); else { mpz_rrandomb(temp, randstate, bits); } mpz_poly_set_coeff(pol, i, temp); } mpz_clear(temp); } #define RUN_TEST(targetfunc) \ printf("Testing " #targetfunc "()... "); \ fflush(stdout); \ success = test_##targetfunc(); \ all_success = all_success && success; \ printf(success ? "ok\n" : "FAIL!\n"); int test__fmpz_poly_convert() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly; mpz_t temp; mpz_init(temp); int result = 1; unsigned long bits, length; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 1; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(5) + 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(20); #if DEBUG printf("%ld, %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_realloc(test_poly2, length); randpoly(test_poly, length, bits); #if DEBUG mpz_poly_print_pretty(test_poly, "x"); printf("\n\n"); #endif mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly); #if DEBUG for (unsigned j = 0; j < test_poly2->length; j++) gmp_printf("%Zd, ",test_poly2->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly, test_poly2); } fmpz_poly_clear(test_fmpz_poly); } mpz_clear(temp); mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); return result; } int test__fmpz_poly_truncate() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly; int result = 1; unsigned long bits, length, trunc; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 1; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(1000) + 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); trunc = random_ulong(length+1); #if DEBUG printf("%ld, %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_realloc(test_poly2, length); randpoly(test_poly, length, bits); #if DEBUG for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zd, ",test_poly->coeffs[j]); printf("\n\n"); #endif mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_truncate(test_fmpz_poly, trunc); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly); mpz_poly_truncate(test_poly, test_poly, trunc); #if DEBUG for (unsigned j = 0; j < test_poly2->length; j++) gmp_printf("%Zd, ",test_poly2->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly, test_poly2); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); return result; } int test__fmpz_poly_max_bits() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; int result = 1; unsigned long bits, length; long next_bits, mpz_bits, fmpz_bits, sign; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(1000) + 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("%ld, %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); #if DEBUG for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zd, ",test_poly->coeffs[j]); printf("\n\n"); #endif mpz_bits = 0; sign = 1L; for (unsigned long i = 0; i < test_poly->length; i++) { next_bits = mpz_sizeinbase(test_poly->coeffs[i], 2); if (next_bits > mpz_bits) mpz_bits = next_bits; if (mpz_sgn(test_poly->coeffs[i]) < 0L) sign = -1L; } mpz_bits = sign*mpz_bits; mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_bits = _fmpz_poly_max_bits(test_fmpz_poly); result = (mpz_bits == fmpz_bits); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_max_bits1() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; int result = 1; unsigned long bits, length; long next_bits, mpz_bits, fmpz_bits, sign; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(FLINT_BITS)+1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("%ld, %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); #if DEBUG for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zd, ",test_poly->coeffs[j]); printf("\n\n"); #endif mpz_bits = 0; sign = 1L; for (unsigned long i = 0; i < test_poly->length; i++) { next_bits = mpz_sizeinbase(test_poly->coeffs[i], 2); if (next_bits > mpz_bits) mpz_bits = next_bits; if (mpz_sgn(test_poly->coeffs[i]) < 0L) sign = -1L; } mpz_bits = mpz_bits*sign; mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_bits = _fmpz_poly_max_bits1(test_fmpz_poly); result = (mpz_bits == fmpz_bits); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_max_limbs() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; int result = 1; unsigned long bits, length, next_limbs, mpz_limbs, fmpz_limbs; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(1000) + 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("%ld, %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); #if DEBUG for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zd, ",test_poly->coeffs[j]); printf("\n\n"); #endif mpz_limbs = 0; for (unsigned long i = 0; i < test_poly->length; i++) { next_limbs = mpz_size(test_poly->coeffs[i]); if (next_limbs > mpz_limbs) mpz_limbs = next_limbs; } mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_limbs = _fmpz_poly_max_limbs(test_fmpz_poly); result = (mpz_limbs == fmpz_limbs); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_attach() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; mpz_t temp; mpz_init(temp); int result = 1; unsigned long bits, length; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 1; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(1000) + 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("%ld, %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_realloc(test_poly2, length); randpoly(test_poly, length, bits); #if DEBUG for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zd, ",test_poly->coeffs[j]); printf("\n\n"); #endif mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_attach(test_fmpz_poly2, test_fmpz_poly); _fmpz_poly_check_normalisation(test_fmpz_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG for (unsigned j = 0; j < test_poly2->length; j++) gmp_printf("%Zd, ",test_poly2->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly, test_poly2); } fmpz_poly_clear(test_fmpz_poly); } mpz_clear(temp); mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); return result; } int test__fmpz_poly_attach_shift() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; mpz_t temp; mpz_init(temp); int result = 1; unsigned long bits, length, shift; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 1; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(1000) + 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); shift = random_ulong(length+1); #if DEBUG printf("%ld, %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_realloc(test_poly2, length); randpoly(test_poly, length, bits); #if DEBUG for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zd, ",test_poly->coeffs[j]); printf("\n\n"); #endif mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_attach_shift(test_fmpz_poly2, test_fmpz_poly, shift); _fmpz_poly_check_normalisation(test_fmpz_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); _fmpz_poly_right_shift(test_fmpz_poly, test_fmpz_poly, shift); fmpz_poly_to_mpz_poly(test_poly, test_fmpz_poly); #if DEBUG for (unsigned j = 0; j < test_poly2->length; j++) gmp_printf("%Zd, ",test_poly2->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly, test_poly2); } fmpz_poly_clear(test_fmpz_poly); } mpz_clear(temp); mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); return result; } int test__fmpz_poly_attach_truncate() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; mpz_t temp; mpz_init(temp); int result = 1; unsigned long bits, length, trunc; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 1; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(1000) + 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); trunc = random_ulong(length+1); #if DEBUG printf("%ld, %ld, %ld\n",length, bits, trunc); #endif fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_realloc(test_poly2, length); randpoly(test_poly, length, bits); if (trunc > test_poly->length) trunc = test_poly->length; #if DEBUG for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zd, ",test_poly->coeffs[j]); printf("\n\n"); #endif mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_attach_truncate(test_fmpz_poly2, test_fmpz_poly, trunc); _fmpz_poly_check_normalisation(test_fmpz_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); _fmpz_poly_truncate(test_fmpz_poly, trunc); fmpz_poly_to_mpz_poly(test_poly, test_fmpz_poly); result = mpz_poly_equal(test_poly, test_poly2); #if DEBUG2 if (!result) { mpz_poly_print(test_poly); printf("\n"); mpz_poly_print(test_poly2); printf("\n"); } #endif } fmpz_poly_clear(test_fmpz_poly); } mpz_clear(temp); mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); return result; } int test__fmpz_poly_getset_ui() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; int result = 1; unsigned long bits, length; unsigned long coeff, coeff_bits, coeff_num; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_poly_check_normalisation(test_fmpz_poly); for (unsigned long count3 = 1; (count3 < 1000) && result == 1; count3++) { coeff_bits = randint(FLINT_BITS); if (coeff_bits == 0) coeff = 0; else coeff = gmp_urandomb_ui(randstate, coeff_bits); coeff_num = randint(test_fmpz_poly->length); if (test_fmpz_poly->length) { _fmpz_poly_set_coeff_ui(test_fmpz_poly, coeff_num, coeff); fmpz_poly_check_normalisation(test_fmpz_poly); result = (_fmpz_poly_get_coeff_ui(test_fmpz_poly, coeff_num) == coeff); } #if DEBUG2 if (!result) printf("Length = %ld, index = %ld, bits = %ld, coeff = %ld\n", test_fmpz_poly->length, coeff_num, coeff_bits, coeff); #endif } } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_getset_si() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; int result = 1; unsigned long bits, length; long coeff, sign; unsigned long coeff_bits, coeff_num; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); length = test_fmpz_poly->length; for (unsigned long count3 = 1; (count3 < 1000) && result == 1; count3++) { coeff_bits = randint(FLINT_BITS-1); if (coeff_bits == 0) coeff = 0; else coeff = gmp_urandomb_ui(randstate, coeff_bits); coeff_num = randint(test_fmpz_poly->length); #if DEBUG printf("Index = %ld, bits = %ld, coeff = %ld\n", coeff_num, coeff_bits, coeff); #endif if (randint(2)) sign = -1L; else sign = 1L; coeff = sign*coeff; if (test_fmpz_poly->length) { _fmpz_poly_set_coeff_si(test_fmpz_poly, coeff_num, coeff); fmpz_poly_check_normalisation(test_fmpz_poly); result = ((_fmpz_poly_get_coeff_si(test_fmpz_poly, coeff_num) == coeff) && (_fmpz_poly_get_coeff_ui(test_fmpz_poly, coeff_num) == sign*coeff)); } } } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test_fmpz_poly_getset_ui() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; int result = 1; unsigned long bits, length; unsigned long coeff, coeff_bits, coeff_num; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_poly_check_normalisation(test_fmpz_poly); for (unsigned long count3 = 1; (count3 < 1000) && result == 1; count3++) { coeff_bits = randint(FLINT_BITS); if (coeff_bits == 0) coeff = 0; else coeff = gmp_urandomb_ui(randstate, coeff_bits); coeff_num = randint(test_fmpz_poly->length); if (test_fmpz_poly->length) { fmpz_poly_set_coeff_ui(test_fmpz_poly, coeff_num, coeff); fmpz_poly_check_normalisation(test_fmpz_poly); result = (fmpz_poly_get_coeff_ui(test_fmpz_poly, coeff_num) == coeff); } #if DEBUG2 if (!result) printf("Length = %ld, index = %ld, bits = %ld, coeff = %ld\n", test_fmpz_poly->length, coeff_num, coeff_bits, coeff); #endif } } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test_fmpz_poly_getset_si() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; int result = 1; unsigned long bits, length; long coeff, sign; unsigned long coeff_bits, coeff_num; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); length = test_fmpz_poly->length; for (unsigned long count3 = 1; (count3 < 1000) && result == 1; count3++) { coeff_bits = randint(FLINT_BITS-1); if (coeff_bits == 0) coeff = 0; else coeff = gmp_urandomb_ui(randstate, coeff_bits); coeff_num = randint(test_fmpz_poly->length); #if DEBUG printf("Index = %ld, bits = %ld, coeff = %ld\n", coeff_num, coeff_bits, coeff); #endif if (randint(2)) sign = -1L; else sign = 1L; coeff = sign*coeff; if (test_fmpz_poly->length) { fmpz_poly_set_coeff_si(test_fmpz_poly, coeff_num, coeff); fmpz_poly_check_normalisation(test_fmpz_poly); result = ((fmpz_poly_get_coeff_si(test_fmpz_poly, coeff_num) == coeff) && (_fmpz_poly_get_coeff_ui(test_fmpz_poly, coeff_num) == sign*coeff)); } } } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_get_coeff_ptr() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; int result = 1; unsigned long bits, length; long coeff, sign; unsigned long coeff_bits, coeff_num; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); for (unsigned long count3 = 1; (count3 < 1000) && result == 1; count3++) { coeff_bits = randint(FLINT_BITS-1); if (coeff_bits == 0) coeff = 0; else coeff = gmp_urandomb_ui(randstate, coeff_bits); coeff_num = randint(length); #if DEBUG printf("Index = %ld, bits = %ld, coeff = %ld\n", coeff_num, coeff_bits, coeff); #endif if (randint(2)) sign = -1L; else sign = 1L; coeff = sign*coeff; _fmpz_poly_set_coeff_si(test_fmpz_poly, coeff_num, coeff); fmpz_poly_check_normalisation(test_fmpz_poly); if (coeff == 0) sign = 0; result = (_fmpz_poly_get_coeff_ptr(test_fmpz_poly, coeff_num)[0] == sign); } } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test_fmpz_poly_get_coeff_ptr() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; int result = 1; unsigned long bits, length; long coeff, sign; unsigned long coeff_bits, coeff_num; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); do { randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); } while (test_fmpz_poly->length == 0); for (unsigned long count3 = 1; (count3 < 1000) && result == 1; count3++) { coeff_bits = randint(FLINT_BITS-1); if (coeff_bits == 0) coeff = 0; else coeff = gmp_urandomb_ui(randstate, coeff_bits); coeff_num = randint(test_fmpz_poly->length); #if DEBUG printf("Length = %ld, index = %ld, bits = %ld, coeff = %ld\n", test_fmpz_poly->length, coeff_num, coeff_bits, coeff); #endif if (randint(2)) sign = -1L; else sign = 1L; coeff = sign*coeff; _fmpz_poly_set_coeff_si(test_fmpz_poly, coeff_num, coeff); fmpz_poly_check_normalisation(test_fmpz_poly); if (coeff == 0) sign = 0; fmpz_t coeff_ptr = fmpz_poly_get_coeff_ptr(test_fmpz_poly, coeff_num); if (coeff_ptr != NULL) result = (coeff_ptr[0] == sign); } if (test_fmpz_poly->length) { for (unsigned long count3 = 1; (count3 < 1000) && result == 1; count3++) { coeff_num = randint(test_fmpz_poly->length)+test_fmpz_poly->length; result = (fmpz_poly_get_coeff_ptr(test_fmpz_poly, coeff_num) == NULL); } } } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_normalise() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; int result = 1; unsigned long bits, length; unsigned long nz_coeff; long sign; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); nz_coeff = randint(length+1)-1; if (randint(2)) sign = -1L; else sign = 1; if (nz_coeff != -1L) _fmpz_poly_set_coeff_si(test_fmpz_poly, nz_coeff, sign*1000); for (unsigned long i = nz_coeff+1; i < length; i++) _fmpz_poly_set_coeff_ui(test_fmpz_poly, i, 0); _fmpz_poly_normalise(test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly); #if DEBUG printf("length = %ld, nonzero coefficient = %ld\n",_fmpz_poly_length(test_fmpz_poly), nz_coeff); #endif result = (_fmpz_poly_length(test_fmpz_poly) == nz_coeff+1); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_getset_coeff() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; unsigned long result = 1; unsigned long bits, length, rand_coeff; long sign, sign2; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 400) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); do { randpoly(test_poly, length, bits); } while (test_poly->length != length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mp_limb_t * coeff1 = (mp_limb_t *) calloc(test_fmpz_poly->limbs, sizeof(mp_limb_t)); mp_limb_t * coeff2 = (mp_limb_t *) calloc(test_fmpz_poly->limbs, sizeof(mp_limb_t)); sign = _fmpz_poly_get_coeff(coeff1, test_fmpz_poly, randint(test_fmpz_poly->length)); rand_coeff = randint(test_fmpz_poly->length); if (test_fmpz_poly->length) { _fmpz_poly_set_coeff(test_fmpz_poly, rand_coeff, coeff1, sign, test_fmpz_poly->limbs); fmpz_poly_check_normalisation(test_fmpz_poly); sign2 = _fmpz_poly_get_coeff(coeff2, test_fmpz_poly, rand_coeff); for (unsigned long i = 0; (i < test_fmpz_poly->limbs) && (result == 1); i++) { result = (coeff1[i] == coeff2[i]); } if (sign != sign2) result = 0; } free(coeff1); free(coeff2); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test_fmpz_poly_getset_coeff() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; unsigned long result = 1; unsigned long bits, length, rand_coeff; long sign, sign2; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 400) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); do { randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); } while (test_fmpz_poly->length != length); mp_limb_t * coeff1 = (mp_limb_t *) calloc(test_fmpz_poly->limbs, sizeof(mp_limb_t)); mp_limb_t * coeff2 = (mp_limb_t *) calloc(test_fmpz_poly->limbs, sizeof(mp_limb_t)); sign = fmpz_poly_get_coeff(coeff1, test_fmpz_poly, randint(test_fmpz_poly->length)); rand_coeff = randint(test_fmpz_poly->length); if (test_fmpz_poly->length) { fmpz_poly_set_coeff(test_fmpz_poly, rand_coeff, coeff1, sign, test_fmpz_poly->limbs); fmpz_poly_check_normalisation(test_fmpz_poly); sign2 = fmpz_poly_get_coeff(coeff2, test_fmpz_poly, rand_coeff); for (unsigned long i = 0; (i < test_fmpz_poly->limbs) && (result == 1); i++) { result = (coeff1[i] == coeff2[i]); } if (sign != sign2) result = 0; } free(coeff1); free(coeff2); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_getset_coeff_fmpz() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; unsigned long result = 1; unsigned long bits, length, rand_coeff; long sign, sign2; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 400) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); do { randpoly(test_poly, length, bits); } while (test_poly->length != length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_t coeff1 = fmpz_init(test_fmpz_poly->limbs); fmpz_t coeff2 = fmpz_init(test_fmpz_poly->limbs); _fmpz_poly_get_coeff_fmpz(coeff1, test_fmpz_poly, randint(test_fmpz_poly->length)); rand_coeff = randint(test_fmpz_poly->length); if (test_fmpz_poly->length) { _fmpz_poly_set_coeff_fmpz(test_fmpz_poly, rand_coeff, coeff1); fmpz_poly_check_normalisation(test_fmpz_poly); _fmpz_poly_get_coeff_fmpz(coeff2, test_fmpz_poly, rand_coeff); result = fmpz_equal(coeff1, coeff2); } fmpz_clear(coeff1); fmpz_clear(coeff2); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_getset_coeff_mpz() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; unsigned long result = 1; unsigned long bits, length, rand_coeff; long sign, sign2; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 400) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); do { randpoly(test_poly, length, bits); } while (test_poly->length != length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_t coeff1, coeff2; mpz_init(coeff1); mpz_init(coeff2); _fmpz_poly_get_coeff_mpz(coeff1, test_fmpz_poly, randint(test_fmpz_poly->length)); rand_coeff = randint(test_fmpz_poly->length); if (test_fmpz_poly->length) { _fmpz_poly_set_coeff_mpz(test_fmpz_poly, rand_coeff, coeff1); fmpz_poly_check_normalisation(test_fmpz_poly); _fmpz_poly_get_coeff_mpz(coeff2, test_fmpz_poly, rand_coeff); result = (mpz_cmp(coeff1, coeff2) == 0); } mpz_clear(coeff1); mpz_clear(coeff2); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test_fmpz_poly_getset_coeff_fmpz() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; unsigned long result = 1; unsigned long bits, length, rand_coeff; long sign, sign2; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 400) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); do { randpoly(test_poly, length, bits); } while (test_poly->length != length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_t coeff1 = fmpz_init(test_fmpz_poly->limbs); fmpz_t coeff2 = fmpz_init(test_fmpz_poly->limbs); fmpz_poly_get_coeff_fmpz(coeff1, test_fmpz_poly, randint(test_fmpz_poly->length)); rand_coeff = randint(test_fmpz_poly->length); if (test_fmpz_poly->length) { fmpz_poly_set_coeff_fmpz(test_fmpz_poly, rand_coeff, coeff1); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_get_coeff_fmpz(coeff2, test_fmpz_poly, rand_coeff); result = fmpz_equal(coeff1, coeff2); } fmpz_clear(coeff1); fmpz_clear(coeff2); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test_fmpz_poly_getset_coeff_mpz() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; unsigned long result = 1; unsigned long bits, length, rand_coeff; long sign, sign2; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 400) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); do { randpoly(test_poly, length, bits); } while (test_poly->length != length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_t coeff1, coeff2; mpz_init(coeff1); mpz_init(coeff2); fmpz_poly_get_coeff_mpz(coeff1, test_fmpz_poly, randint(test_fmpz_poly->length)); rand_coeff = randint(test_fmpz_poly->length); if (test_fmpz_poly->length) { fmpz_poly_set_coeff_mpz(test_fmpz_poly, rand_coeff, coeff1); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_get_coeff_mpz(coeff2, test_fmpz_poly, rand_coeff); result = (mpz_cmp(coeff1, coeff2) == 0); } mpz_clear(coeff1); mpz_clear(coeff2); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test_fmpz_poly_get_coeff_mpz_read_only() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly; unsigned long result = 1; unsigned long bits, length, rand_coeff; long sign, sign2; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 400) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); do { randpoly(test_poly, length, bits); } while (test_poly->length != length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_t coeff1, coeff2; mpz_init(coeff2); unsigned long rand_coeff2 = randint(test_fmpz_poly->length); fmpz_poly_get_coeff_mpz_read_only(coeff1, test_fmpz_poly, rand_coeff2); rand_coeff = randint(test_fmpz_poly->length); if ((test_fmpz_poly->length >= 2) && (rand_coeff != rand_coeff2)) { fmpz_poly_set_coeff_mpz(test_fmpz_poly, rand_coeff, coeff1); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_get_coeff_mpz(coeff2, test_fmpz_poly, rand_coeff); result = (mpz_cmp(coeff1, coeff2) == 0); } mpz_clear(coeff2); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_setequal() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, length; unsigned long altered_coeff; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+1+randint(30)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_set(test_fmpz_poly2, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly2); result = _fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly); } for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); do { randpoly(test_poly, length, bits); } while (test_poly->length != length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_set(test_fmpz_poly2, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly2); altered_coeff = randint(length); test_fmpz_poly2->coeffs[altered_coeff*(test_fmpz_poly2->limbs+1)+1]++; if (test_fmpz_poly2->coeffs[altered_coeff*(test_fmpz_poly2->limbs+1)] == 0) test_fmpz_poly2->coeffs[altered_coeff*(test_fmpz_poly2->limbs+1)] = 1; result = !_fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly); } for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); do { randpoly(test_poly, length, bits); } while (test_poly->length != length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_set(test_fmpz_poly2, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly2); altered_coeff = randint(length); test_fmpz_poly2->coeffs[altered_coeff*(test_fmpz_poly2->limbs+1)]*=-1L; if (test_fmpz_poly2->coeffs[altered_coeff*(test_fmpz_poly2->limbs+1)] == 0) test_fmpz_poly2->coeffs[altered_coeff*(test_fmpz_poly2->limbs+1)] = 1; result = !_fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); return result; } #if TESTFILE int test_fmpz_poly_freadprint() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, length; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(100)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+1+randint(30)); FILE * testfile; for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(100); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); testfile = fopen("testfile", "w"); fmpz_poly_fprint(test_fmpz_poly, testfile); fclose(testfile); testfile = fopen("testfile", "r"); fmpz_poly_fread(test_fmpz_poly2, testfile); fclose(testfile); fmpz_poly_check_normalisation(test_fmpz_poly2); result = _fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly); } remove("testfile"); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); return result; } #endif int test_fmpz_poly_tofromstring() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, length; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(100)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+1+randint(30)); FILE * testfile; for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(100); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); char * strbuf = fmpz_poly_to_string(test_fmpz_poly); int OK = fmpz_poly_from_string(test_fmpz_poly2, strbuf); free(strbuf); fmpz_poly_check_normalisation(test_fmpz_poly2); result = _fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly) && OK; } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_zero_coeffs() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, length, zeroes; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+1+randint(30)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_set(test_fmpz_poly2, test_fmpz_poly); zeroes = randint(test_fmpz_poly->length + 1); _fmpz_poly_zero_coeffs(test_fmpz_poly2, zeroes); fmpz_poly_check_normalisation(test_fmpz_poly2); unsigned long i; fmpz_t coeff1, coeff2; if (zeroes == test_fmpz_poly->length) { result = (test_fmpz_poly2->length == 0); } else { for (i = 0; i < zeroes; i++) { coeff1 = _fmpz_poly_get_coeff_ptr(test_fmpz_poly2, i); result &= fmpz_is_zero(coeff1); } for (i = zeroes; i < test_fmpz_poly->length; i++) { coeff1 = _fmpz_poly_get_coeff_ptr(test_fmpz_poly, i); coeff2 = _fmpz_poly_get_coeff_ptr(test_fmpz_poly2, i); result &= fmpz_equal(coeff1, coeff2); } } } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); return result; } int test_fmpz_poly_zero_coeffs() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, length, zeroes; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+1+randint(30)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_set(test_fmpz_poly2, test_fmpz_poly); zeroes = randint(2*test_fmpz_poly->length); fmpz_poly_zero_coeffs(test_fmpz_poly2, zeroes); fmpz_poly_check_normalisation(test_fmpz_poly2); unsigned long i; fmpz_t coeff1, coeff2; if (zeroes >= test_fmpz_poly->length) { result = (test_fmpz_poly2->length == 0); } else { for (i = 0; i < zeroes; i++) { coeff1 = _fmpz_poly_get_coeff_ptr(test_fmpz_poly2, i); result &= fmpz_is_zero(coeff1); } for (i = zeroes; i < test_fmpz_poly->length; i++) { coeff1 = _fmpz_poly_get_coeff_ptr(test_fmpz_poly, i); coeff2 = _fmpz_poly_get_coeff_ptr(test_fmpz_poly2, i); result &= fmpz_equal(coeff1, coeff2); } } } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); return result; } int test_fmpz_poly_swap() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; int result = 1; unsigned long bits, length, length2; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly3, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); length2 = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_poly_fit_length(test_fmpz_poly2, length2); fmpz_poly_fit_length(test_fmpz_poly3, length); randpoly(test_poly, length2, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly); _fmpz_poly_set(test_fmpz_poly3, test_fmpz_poly); fmpz_poly_swap(test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly2); result = _fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly3); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_shift() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; int result = 1; unsigned long bits, length; unsigned long shift; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif shift = randint(100); fmpz_poly_fit_length(test_fmpz_poly, length+shift); fmpz_poly_fit_length(test_fmpz_poly2, length+shift); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_set(test_fmpz_poly2, test_fmpz_poly); _fmpz_poly_left_shift(test_fmpz_poly, test_fmpz_poly, shift); fmpz_poly_check_normalisation(test_fmpz_poly); _fmpz_poly_right_shift(test_fmpz_poly, test_fmpz_poly, shift); fmpz_poly_check_normalisation(test_fmpz_poly); result = _fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly); } for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif shift = randint(length); fmpz_poly_fit_length(test_fmpz_poly, length+shift); fmpz_poly_fit_length(test_fmpz_poly2, length+shift); do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); test_fmpz_poly3->limbs = test_fmpz_poly->limbs; test_fmpz_poly3->length = test_fmpz_poly->length-shift; test_fmpz_poly3->coeffs = test_fmpz_poly->coeffs+shift*(test_fmpz_poly->limbs+1); _fmpz_poly_right_shift(test_fmpz_poly2, test_fmpz_poly, shift); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly2); result = _fmpz_poly_equal(test_fmpz_poly3, test_fmpz_poly2); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_neg() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; int result = 1; unsigned long bits, length, check_coeff; unsigned long extra_bits1, extra_bits2; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; extra_bits1 = randint(200); extra_bits2 = randint(200); fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits+extra_bits1-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly3, 1, (bits+extra_bits1+extra_bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); fmpz_poly_fit_length(test_fmpz_poly3, length); do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); check_coeff = randint(length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_neg(test_fmpz_poly2, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly2); _fmpz_poly_neg(test_fmpz_poly3, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly3); if (length == 0) result = (test_fmpz_poly2->length == 0); else result = _fmpz_poly_equal(test_fmpz_poly, test_fmpz_poly3) && (test_fmpz_poly->coeffs[check_coeff*(test_fmpz_poly->limbs+1)] == -test_fmpz_poly2->coeffs[check_coeff*(test_fmpz_poly2->limbs+1)]); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_add() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; int result = 1; unsigned long bits, bits2, bits3, length, length2, max_length; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = bits+random_ulong(200); bits3 = bits2+random_ulong(200)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly3, 1, (bits3-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(10); length2 = random_ulong(10); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld\n",length, length2, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); max_length = (length > length2) ? length : length2; fmpz_poly_fit_length(test_fmpz_poly3, max_length); randpoly(test_poly, length, bits); randpoly(test_poly2, length2, bits2); mpz_poly_add(test_poly3, test_poly, test_poly2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); _fmpz_poly_add(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly3); _fmpz_poly_normalise(test_fmpz_poly3); mpz_poly_init(test_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); result = mpz_poly_equal(test_poly4, test_poly3); #if DEBUG2 if (!result) { mpz_poly_print(test_poly); printf("\n"); mpz_poly_print(test_poly2); printf("\n"); mpz_poly_print(test_poly3); printf("\n"); mpz_poly_print(test_poly4); printf("\n"); } #endif mpz_poly_clear(test_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000) + 1; bits2 = bits+random_ulong(200) + 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); length2 = length + random_ulong(1000); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n",length, length2, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); randpoly(test_poly, length, bits); randpoly(test_poly2, length2, bits2-1); mpz_poly_add(test_poly3, test_poly, test_poly2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); _fmpz_poly_add(test_fmpz_poly2, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly2); mpz_poly_init(test_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly2); result = mpz_poly_equal(test_poly4, test_poly3); #if DEBUG2 if (!result) { mpz_poly_print(test_poly); printf("\n"); mpz_poly_print(test_poly2); printf("\n"); mpz_poly_print(test_poly3); printf("\n"); mpz_poly_print(test_poly4); printf("\n"); } #endif mpz_poly_clear(test_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000) + 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, bits/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_add(test_poly3, test_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_add(test_fmpz_poly2, test_fmpz_poly, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly2); mpz_poly_init(test_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly2); result = mpz_poly_equal(test_poly4, test_poly3); #if DEBUG2 if (!result) { mpz_poly_print(test_poly); printf("\n"); mpz_poly_print(test_poly3); printf("\n"); mpz_poly_print(test_poly4); printf("\n"); } #endif mpz_poly_clear(test_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test__fmpz_poly_sub() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; int result = 1; unsigned long bits, bits2, bits3, length, length2, max_length; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = bits+random_ulong(200); bits3 = bits2+random_ulong(200)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly3, 1, (bits3-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); length2 = random_ulong(1000); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld\n",length, length2, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); max_length = (length > length2) ? length : length2; fmpz_poly_fit_length(test_fmpz_poly3, max_length); randpoly(test_poly, length, bits); randpoly(test_poly2, length2, bits2); mpz_poly_sub(test_poly3, test_poly, test_poly2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); _fmpz_poly_sub(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly3); mpz_poly_init(test_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); #if DEBUG for (unsigned j = 0; j < test_poly3->length; j++) gmp_printf("%Zd, ",test_poly3->coeffs[j]); printf("\n\n"); for (unsigned j = 0; j < test_poly4->length; j++) gmp_printf("%Zd, ",test_poly4->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000) + 1; bits2 = bits+random_ulong(200) + 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); length2 = length + random_ulong(1000); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n",length, length2, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); randpoly(test_poly, length, bits); randpoly(test_poly2, length2, bits2-1); mpz_poly_sub(test_poly3, test_poly2, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); _fmpz_poly_sub(test_fmpz_poly2, test_fmpz_poly2, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly2); mpz_poly_init(test_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly2); #if DEBUG for (unsigned j = 0; j < test_poly3->length; j++) gmp_printf("%Zd, ",test_poly3->coeffs[j]); printf("\n\n"); for (unsigned j = 0; j < test_poly4->length; j++) gmp_printf("%Zd, ",test_poly4->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000) + 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_sub(test_poly3, test_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_sub(test_fmpz_poly2, test_fmpz_poly, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly2); mpz_poly_init(test_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly2); #if DEBUG for (unsigned j = 0; j < test_poly3->length; j++) gmp_printf("%Zd, ",test_poly3->coeffs[j]); printf("\n\n"); for (unsigned j = 0; j < test_poly4->length; j++) gmp_printf("%Zd, ",test_poly4->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_add() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; int result = 1; unsigned long bits, bits2, bits3, length, length2, max_length; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = bits+random_ulong(200); bits3 = bits2+random_ulong(200)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); fmpz_poly_init(test_fmpz_poly3); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(10); length2 = random_ulong(10); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld\n",length, length2, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); randpoly(test_poly, length, bits); randpoly(test_poly2, length2, bits2); mpz_poly_add(test_poly3, test_poly, test_poly2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); fmpz_poly_add(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly3); mpz_poly_init(test_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); result = mpz_poly_equal(test_poly4, test_poly3); #if DEBUG2 if (!result) { mpz_poly_print(test_poly); printf("\n"); mpz_poly_print(test_poly2); printf("\n"); mpz_poly_print(test_poly3); printf("\n"); mpz_poly_print(test_poly4); printf("\n"); } #endif mpz_poly_clear(test_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000) + 1; bits2 = bits+random_ulong(200) + 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); length2 = length + random_ulong(1000); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n",length, length2, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); randpoly(test_poly, length, bits); randpoly(test_poly2, length2, bits2-1); mpz_poly_add(test_poly3, test_poly, test_poly2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); fmpz_poly_add(test_fmpz_poly2, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly2); mpz_poly_init(test_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly2); result = mpz_poly_equal(test_poly4, test_poly3); #if DEBUG2 if (!result) { mpz_poly_print(test_poly); printf("\n"); mpz_poly_print(test_poly2); printf("\n"); mpz_poly_print(test_poly3); printf("\n"); mpz_poly_print(test_poly4); printf("\n"); } #endif mpz_poly_clear(test_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_sub() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; int result = 1; unsigned long bits, bits2, bits3, length, length2, max_length; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = bits+random_ulong(200); bits3 = bits2+random_ulong(200)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); fmpz_poly_init(test_fmpz_poly3); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); length2 = random_ulong(1000); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld\n",length, length2, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); randpoly(test_poly, length, bits); randpoly(test_poly2, length2, bits2); mpz_poly_sub(test_poly3, test_poly, test_poly2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); fmpz_poly_sub(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly3); mpz_poly_init(test_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); #if DEBUG for (unsigned j = 0; j < test_poly3->length; j++) gmp_printf("%Zd, ",test_poly3->coeffs[j]); printf("\n\n"); for (unsigned j = 0; j < test_poly4->length; j++) gmp_printf("%Zd, ",test_poly4->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000) + 1; bits2 = bits+random_ulong(200) + 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); length2 = length + random_ulong(1000); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n",length, length2, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); randpoly(test_poly, length, bits); randpoly(test_poly2, length2, bits2-1); mpz_poly_sub(test_poly3, test_poly2, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); fmpz_poly_sub(test_fmpz_poly2, test_fmpz_poly2, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly2); mpz_poly_init(test_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly2); #if DEBUG for (unsigned j = 0; j < test_poly3->length; j++) gmp_printf("%Zd, ",test_poly3->coeffs[j]); printf("\n\n"); for (unsigned j = 0; j < test_poly4->length; j++) gmp_printf("%Zd, ",test_poly4->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test__fmpz_poly_scalar_mul_ui() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, length; unsigned long mult; mpz_t temp; mpz_init(temp); mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = randint(34682739); _fmpz_poly_scalar_mul_ui(test_fmpz_poly2, test_fmpz_poly, mult); fmpz_poly_check_normalisation(test_fmpz_poly2); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul_ui(temp, test_poly->coeffs[i], mult); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); } mpz_poly_clear(test_poly2); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 1; (count1 < 8000) && (result == 1) ; count1++) { bits = random_ulong(150)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(40); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = randint(34682739); _fmpz_poly_scalar_mul_ui(test_fmpz_poly2, test_fmpz_poly, mult); fmpz_poly_check_normalisation(test_fmpz_poly2); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul_ui(temp, test_poly->coeffs[i], mult); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); } mpz_poly_clear(test_poly2); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_clear(temp); return result; } int test_fmpz_poly_scalar_mul_ui() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, length; unsigned long mult; mpz_t temp; mpz_init(temp); mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { fmpz_poly_init(test_fmpz_poly2); length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = randint(34682739); fmpz_poly_scalar_mul_ui(test_fmpz_poly2, test_fmpz_poly, mult); fmpz_poly_check_normalisation(test_fmpz_poly2); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul_ui(temp, test_poly->coeffs[i], mult); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); } mpz_poly_clear(test_poly2); fmpz_poly_clear(test_fmpz_poly2); } fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 1; (count1 < 8000) && (result == 1) ; count1++) { bits = random_ulong(150)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { fmpz_poly_init(test_fmpz_poly2); length = random_ulong(40); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = randint(34682739); fmpz_poly_scalar_mul_ui(test_fmpz_poly2, test_fmpz_poly, mult); fmpz_poly_check_normalisation(test_fmpz_poly2); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul_ui(temp, test_poly->coeffs[i], mult); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); } mpz_poly_clear(test_poly2); fmpz_poly_clear(test_fmpz_poly2); } fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 1; (count1 < 8000) && (result == 1) ; count1++) { bits = random_ulong(150)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(40); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = randint(34682739); fmpz_poly_scalar_mul_ui(test_fmpz_poly, test_fmpz_poly, mult); fmpz_poly_check_normalisation(test_fmpz_poly); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul_ui(temp, test_poly->coeffs[i], mult); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); } mpz_poly_clear(test_poly2); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_clear(temp); return result; } int test__fmpz_poly_scalar_mul_si() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, length, sign; long mult; mpz_t temp; mpz_init(temp); mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = (long) randint(34682739); sign = randint(2); if (sign) mult = -mult; _fmpz_poly_scalar_mul_si(test_fmpz_poly2, test_fmpz_poly, mult); fmpz_poly_check_normalisation(test_fmpz_poly2); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul_si(temp, test_poly->coeffs[i], mult); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); } mpz_poly_clear(test_poly2); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_clear(temp); return result; } int test_fmpz_poly_scalar_mul_si() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, length, sign; long mult; mpz_t temp; mpz_init(temp); mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { fmpz_poly_init(test_fmpz_poly2); length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = (long) randint(34682739); sign = randint(2); if (sign) mult = -mult; fmpz_poly_scalar_mul_si(test_fmpz_poly2, test_fmpz_poly, mult); fmpz_poly_check_normalisation(test_fmpz_poly2); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul_si(temp, test_poly->coeffs[i], mult); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); } mpz_poly_clear(test_poly2); fmpz_poly_clear(test_fmpz_poly2); } fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 1; (count1 < 8000) && (result == 1) ; count1++) { bits = random_ulong(150)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { fmpz_poly_init(test_fmpz_poly2); length = random_ulong(40); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = (long) randint(34682739); sign = randint(2); if (sign) mult = -mult; fmpz_poly_scalar_mul_si(test_fmpz_poly2, test_fmpz_poly, mult); fmpz_poly_check_normalisation(test_fmpz_poly2); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul_si(temp, test_poly->coeffs[i], mult); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); } mpz_poly_clear(test_poly2); fmpz_poly_clear(test_fmpz_poly2); } fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 1; (count1 < 8000) && (result == 1) ; count1++) { bits = random_ulong(150)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(100); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = (long) randint(34682739); sign = randint(2); if (sign) mult = -mult; fmpz_poly_scalar_mul_si(test_fmpz_poly, test_fmpz_poly, mult); fmpz_poly_check_normalisation(test_fmpz_poly); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul_si(temp, test_poly->coeffs[i], mult); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); } mpz_poly_clear(test_poly2); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_clear(temp); return result; } int test__fmpz_poly_scalar_div_exact_ui() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; int result = 1; unsigned long bits, length; unsigned long mult; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+2); fmpz_poly_init2(test_fmpz_poly3, 1, (bits-1)/FLINT_BITS+2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); fmpz_poly_fit_length(test_fmpz_poly3, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = randint(34682739) + 1; _fmpz_poly_scalar_mul_ui(test_fmpz_poly2, test_fmpz_poly, mult); _fmpz_poly_scalar_div_exact_ui(test_fmpz_poly3, test_fmpz_poly2, mult); fmpz_poly_check_normalisation(test_fmpz_poly3); result = _fmpz_poly_equal(test_fmpz_poly3, test_fmpz_poly); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = randint(34682739) + 1; _fmpz_poly_scalar_mul_ui(test_fmpz_poly2, test_fmpz_poly, mult); _fmpz_poly_scalar_div_exact_ui(test_fmpz_poly2, test_fmpz_poly2, mult); fmpz_poly_check_normalisation(test_fmpz_poly2); result = _fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+2); fmpz_poly_init2(test_fmpz_poly3, 1, (bits-1)/FLINT_BITS+3); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); fmpz_poly_fit_length(test_fmpz_poly3, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = randint(34682739) + 1; _fmpz_poly_scalar_mul_ui(test_fmpz_poly2, test_fmpz_poly, mult); _fmpz_poly_scalar_div_exact_ui(test_fmpz_poly3, test_fmpz_poly2, mult); fmpz_poly_check_normalisation(test_fmpz_poly3); result = _fmpz_poly_equal(test_fmpz_poly3, test_fmpz_poly); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = randint(34682739) + 1; _fmpz_poly_scalar_mul_ui(test_fmpz_poly2, test_fmpz_poly, mult); _fmpz_poly_scalar_div_exact_ui(test_fmpz_poly2, test_fmpz_poly2, mult); fmpz_poly_check_normalisation(test_fmpz_poly2); result = _fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_scalar_div_exact_si() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; int result = 1; unsigned long bits, length; long mult; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+2); fmpz_poly_init2(test_fmpz_poly3, 1, (bits-1)/FLINT_BITS+2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); fmpz_poly_fit_length(test_fmpz_poly3, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = randint(34682739) + 1; if (randint(2)) mult = -mult; _fmpz_poly_scalar_mul_si(test_fmpz_poly2, test_fmpz_poly, mult); _fmpz_poly_scalar_div_exact_si(test_fmpz_poly3, test_fmpz_poly2, mult); fmpz_poly_check_normalisation(test_fmpz_poly3); result = _fmpz_poly_equal(test_fmpz_poly3, test_fmpz_poly); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("2:length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = randint(34682739) + 1; if (randint(2)) mult = -mult; _fmpz_poly_scalar_mul_si(test_fmpz_poly2, test_fmpz_poly, mult); _fmpz_poly_scalar_div_exact_si(test_fmpz_poly2, test_fmpz_poly2, mult); fmpz_poly_check_normalisation(test_fmpz_poly2); result = _fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+2); fmpz_poly_init2(test_fmpz_poly3, 1, (bits-1)/FLINT_BITS+3); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); fmpz_poly_fit_length(test_fmpz_poly3, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = randint(34682739) + 1; if (randint(2)) mult = -mult; _fmpz_poly_scalar_mul_si(test_fmpz_poly2, test_fmpz_poly, mult); _fmpz_poly_scalar_div_exact_si(test_fmpz_poly3, test_fmpz_poly2, mult); fmpz_poly_check_normalisation(test_fmpz_poly3); result = _fmpz_poly_equal(test_fmpz_poly3, test_fmpz_poly); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mult = randint(34682739) + 1; if (randint(2)) mult = -mult; _fmpz_poly_scalar_mul_si(test_fmpz_poly2, test_fmpz_poly, mult); _fmpz_poly_scalar_div_exact_si(test_fmpz_poly2, test_fmpz_poly2, mult); fmpz_poly_check_normalisation(test_fmpz_poly2); result = _fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_scalar_tdiv_ui() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, length; unsigned long div; mpz_t temp; mpz_init(temp); mpz_poly_init(test_poly); for (unsigned long count1 = 0; (count1 < 20) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; //bits = 64*1000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); //length = 10000; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); div = randint(34682739)+1; for (unsigned long i = 0; i < 100; i++) { _fmpz_poly_scalar_tdiv_ui(test_fmpz_poly2, test_fmpz_poly, div); fmpz_poly_check_normalisation(test_fmpz_poly2); } mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_fmpz_poly2->length; i++) { mpz_tdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); } for (unsigned long i = test_fmpz_poly2->length; i < test_poly->length; i++) { mpz_tdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp_ui(temp, 0) == 0); } mpz_poly_clear(test_poly2); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 0; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; //bits = 64*1000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 50) && (result == 1); count2++) { length = random_ulong(1000); //length = 10000; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); div = randint(34682739)+1; _fmpz_poly_scalar_tdiv_ui(test_fmpz_poly, test_fmpz_poly, div); fmpz_poly_check_normalisation(test_fmpz_poly); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_fmpz_poly->length; i++) { mpz_tdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); } for (unsigned long i = test_fmpz_poly->length; i < test_poly->length; i++) { mpz_tdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp_ui(temp, 0) == 0); } mpz_poly_clear(test_poly2); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_clear(temp); return result; } int test__fmpz_poly_scalar_div_ui() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, length; unsigned long div; mpz_t temp; mpz_init(temp); mpz_poly_init(test_poly); for (unsigned long count1 = 0; (count1 < 20) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; //bits = 64*1000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); //length = 10000; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); div = randint(34682739)+1; for (unsigned long i = 0; i < 100; i++) { _fmpz_poly_scalar_div_ui(test_fmpz_poly2, test_fmpz_poly, div); fmpz_poly_check_normalisation(test_fmpz_poly2); } mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_fmpz_poly2->length; i++) { mpz_fdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); } for (unsigned long i = test_fmpz_poly2->length; i < test_poly->length; i++) { mpz_fdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp_ui(temp, 0) == 0); } mpz_poly_clear(test_poly2); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 0; (count1 < 50) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; //bits = 64*1000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { length = random_ulong(1000); //length = 10000; #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); div = randint(34682739)+1; _fmpz_poly_scalar_div_ui(test_fmpz_poly, test_fmpz_poly, div); fmpz_poly_check_normalisation(test_fmpz_poly); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_fmpz_poly->length; i++) { mpz_fdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); } for (unsigned long i = test_fmpz_poly->length; i < test_poly->length; i++) { mpz_fdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp_ui(temp, 0) == 0); } mpz_poly_clear(test_poly2); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_clear(temp); return result; } int test__fmpz_poly_scalar_tdiv_si() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, length; long div; mpz_t temp; mpz_init(temp); mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 400) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); div = randint(34682739)+1; if (randint(2)) div = -div; _fmpz_poly_scalar_tdiv_si(test_fmpz_poly2, test_fmpz_poly, div); fmpz_poly_check_normalisation(test_fmpz_poly2); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_fmpz_poly2->length; i++) { if (div < 0) { mpz_tdiv_q_ui(temp, test_poly->coeffs[i], -div); mpz_neg(temp, temp); } else mpz_tdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); } for (unsigned long i = test_fmpz_poly2->length; i < test_poly->length; i++) { if (div < 0) mpz_tdiv_q_ui(temp, test_poly->coeffs[i], -div); else mpz_tdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp_ui(temp, 0) == 0); } mpz_poly_clear(test_poly2); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 1; (count1 < 400) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); div = randint(34682739)+1; if (randint(2)) div = -div; _fmpz_poly_scalar_tdiv_si(test_fmpz_poly, test_fmpz_poly, div); fmpz_poly_check_normalisation(test_fmpz_poly); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_fmpz_poly->length; i++) { if (div < 0) { mpz_tdiv_q_ui(temp, test_poly->coeffs[i], -div); mpz_neg(temp, temp); } else mpz_tdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); } for (unsigned long i = test_fmpz_poly->length; i < test_poly->length; i++) { if (div < 0) mpz_tdiv_q_ui(temp, test_poly->coeffs[i], -div); else mpz_tdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp_ui(temp, 0) == 0); } mpz_poly_clear(test_poly2); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_clear(temp); return result; } int test__fmpz_poly_scalar_div_si() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, length; long div; mpz_t temp; mpz_init(temp); mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 400) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); div = randint(34682739)+1; if (randint(2)) div = -div; _fmpz_poly_scalar_div_si(test_fmpz_poly2, test_fmpz_poly, div); fmpz_poly_check_normalisation(test_fmpz_poly2); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_fmpz_poly2->length; i++) { if (div < 0L) { mpz_cdiv_q_ui(temp, test_poly->coeffs[i], -div); mpz_neg(temp, temp); } else mpz_fdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); #if DEBUG2 if (!result) { gmp_printf("%Zd, %ld, %Zd, %Zd\n", test_poly->coeffs[i], div, temp, test_poly2->coeffs[i]); break; } #endif } for (unsigned long i = test_fmpz_poly2->length; i < test_poly->length; i++) { if (div < 0L) mpz_cdiv_q_ui(temp, test_poly->coeffs[i], -div); else mpz_fdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp_ui(temp, 0) == 0); #if DEBUG2 if (!result) { gmp_printf("%Zd, %ld, %Zd\n", test_poly->coeffs[i], div, temp); break; } #endif } mpz_poly_clear(test_poly2); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 1; (count1 < 400) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("length = %ld, bits = %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); div = randint(34682739)+1; if (randint(2)) div = -div; _fmpz_poly_scalar_div_si(test_fmpz_poly, test_fmpz_poly, div); fmpz_poly_check_normalisation(test_fmpz_poly); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_fmpz_poly->length; i++) { if (div < 0L) { mpz_cdiv_q_ui(temp, test_poly->coeffs[i], -div); mpz_neg(temp, temp); } else mpz_fdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp(temp, test_poly2->coeffs[i]) == 0); #if DEBUG2 if (!result) { gmp_printf("%Zd, %ld, %Zd, %Zd\n", test_poly->coeffs[i], div, temp, test_poly2->coeffs[i]); break; } #endif } for (unsigned long i = test_fmpz_poly->length; i < test_poly->length; i++) { if (div < 0L) mpz_cdiv_q_ui(temp, test_poly->coeffs[i], -div); else mpz_fdiv_q_ui(temp, test_poly->coeffs[i], div); result &= (mpz_cmp_ui(temp, 0) == 0); #if DEBUG2 if (!result) { gmp_printf("%Zd, %ld, %Zd\n", test_poly->coeffs[i], div, temp); break; } #endif } mpz_poly_clear(test_poly2); } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_clear(temp); return result; } int test__fmpz_poly_mul_classical() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(100)+1; length2 = random_ulong(100); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_mul_naive(test_poly3, test_poly, test_poly2); mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); _fmpz_poly_mul_classical(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 0; (count1 < 10000) && (result == 1) ; count1++) { bits = random_ulong(2000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); length = random_ulong(35); unsigned long log_length = 0; while ((1<limbs+1); //(output_bits-1)/FLINT_BITS+1); else fmpz_poly_init(test_fmpz_poly3); for (unsigned long count2 = 0; (count2 < 1) && (result == 1); count2++) { _fmpz_poly_mul_classical(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); #if DEBUG mpz_poly_print(test_poly3);printf("\n"); mpz_poly_print(test_poly4);printf("\n"); #endif result = mpz_poly_equal(test_poly4, test_poly3); } mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(100)+1; length2 = random_ulong(100); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_mul_naive(test_poly3, test_poly, test_poly2); mpz_poly_init(test_poly4); fmpz_poly_fit_length(test_fmpz_poly, length+length2-1); fmpz_poly_fit_limbs(test_fmpz_poly, (bits+bits2-1)/FLINT_BITS+2); _fmpz_poly_mul_classical(test_fmpz_poly, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(100)+1; length2 = random_ulong(100); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_mul_naive(test_poly3, test_poly, test_poly2); mpz_poly_init(test_poly4); fmpz_poly_fit_length(test_fmpz_poly2, length+length2-1); fmpz_poly_fit_limbs(test_fmpz_poly2, (bits+bits2-1)/FLINT_BITS+2); _fmpz_poly_mul_classical(test_fmpz_poly2, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly2); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly2); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test__fmpz_poly_mul_classical_trunc() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2, trunc; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(100)+1; length2 = random_ulong(100); if (length+length2 == 0) trunc = 0; else trunc = random_ulong(length+length2); #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); fmpz_poly_init2(test_fmpz_poly4, trunc, (bits+bits2-1)/FLINT_BITS+2); _fmpz_poly_mul_classical(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_truncate(test_fmpz_poly3, trunc); _fmpz_poly_mul_classical_trunc(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(100)+1; if (length == 0) trunc = 0; else trunc = random_ulong(2*length); #if DEBUG printf("length = %ld, trunc = %ld, bits = %ld\n", length, trunc, bits); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, 2*length-1, (2*bits-1)/FLINT_BITS+2); fmpz_poly_init2(test_fmpz_poly4, trunc, (2*bits-1)/FLINT_BITS+2); _fmpz_poly_mul_classical(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly); _fmpz_poly_truncate(test_fmpz_poly3, trunc); _fmpz_poly_mul_classical_trunc(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(100)+1; length2 = random_ulong(100); if (length+length2 == 0) trunc = 0; else trunc = random_ulong(length+length2); #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); fmpz_poly_fit_length(test_fmpz_poly, trunc); fmpz_poly_fit_limbs(test_fmpz_poly, (bits+bits2-1)/FLINT_BITS+2); _fmpz_poly_mul_classical(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_truncate(test_fmpz_poly3, trunc); _fmpz_poly_mul_classical_trunc(test_fmpz_poly, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test__fmpz_poly_mul_classical_trunc_left() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2, trunc; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(100); length2 = random_ulong(100); if (length+length2 < 2) trunc = 0; else trunc = random_ulong(length+length2-1)+1; #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_init(test_poly4); if (length + length2 < 2) { fmpz_poly_init2(test_fmpz_poly3, 1, (bits+bits2-1)/FLINT_BITS+2); fmpz_poly_init2(test_fmpz_poly4, 1, (bits+bits2-1)/FLINT_BITS+2); } else { fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); fmpz_poly_init2(test_fmpz_poly4, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); } _fmpz_poly_mul_classical(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_zero_coeffs(test_fmpz_poly3, FLINT_MIN(trunc, test_fmpz_poly3->length)); _fmpz_poly_mul_classical_trunc_left(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(100); if (length < 1) trunc = 0; else trunc = random_ulong(2*length-1)+1; #if DEBUG printf("length = %ld, trunc = %ld, bits = %ld\n", length, trunc, bits); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_init(test_poly4); if (length < 1) { fmpz_poly_init2(test_fmpz_poly3, 1, (2*bits-1)/FLINT_BITS+2); fmpz_poly_init2(test_fmpz_poly4, 1, (2*bits-1)/FLINT_BITS+2); } else { fmpz_poly_init2(test_fmpz_poly3, 2*length-1, (2*bits-1)/FLINT_BITS+2); fmpz_poly_init2(test_fmpz_poly4, 2*length-1, (2*bits-1)/FLINT_BITS+2); } _fmpz_poly_mul_classical(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly); _fmpz_poly_zero_coeffs(test_fmpz_poly3, FLINT_MIN(trunc, test_fmpz_poly3->length)); _fmpz_poly_mul_classical_trunc_left(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(100); length2 = random_ulong(100); if (length+length2 < 2) trunc = 0; else trunc = random_ulong(length+length2-1)+1; #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_init(test_poly4); if (length + length2 < 2) { fmpz_poly_init2(test_fmpz_poly3, 1, (bits+bits2-1)/FLINT_BITS+2); fmpz_poly_fit_length(test_fmpz_poly, 1); fmpz_poly_fit_limbs(test_fmpz_poly, (bits+bits2-1)/FLINT_BITS+2); } else { fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); fmpz_poly_fit_length(test_fmpz_poly, length+length2-1); fmpz_poly_fit_limbs(test_fmpz_poly, (bits+bits2-1)/FLINT_BITS+2); } _fmpz_poly_mul_classical(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_zero_coeffs(test_fmpz_poly3, FLINT_MIN(trunc, test_fmpz_poly3->length)); _fmpz_poly_mul_classical_trunc_left(test_fmpz_poly, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test__fmpz_poly_mul_karatsuba() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; int result = 1; unsigned long bits, bits2, length, length2, max_length, log_length, output_bits; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 10000) && (result == 1) ; count1++) { bits = random_ulong(2000)+ 1; bits2 = random_ulong(2000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(35); length = random_ulong(35); max_length = FLINT_MIN(length, length2); log_length = 0; while ((1<limbs+test_fmpz_poly2->limbs+1); //(output_bits-1)/FLINT_BITS+1); else fmpz_poly_init(test_fmpz_poly3); for (unsigned long count2 = 0; (count2 < 1) && (result == 1); count2++) { _fmpz_poly_mul_karatsuba(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); #if DEBUG mpz_poly_print(test_poly3);printf("\n"); mpz_poly_print(test_poly4);printf("\n"); #endif result = mpz_poly_equal(test_poly4, test_poly3); } mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 0; (count1 < 10000) && (result == 1) ; count1++) { bits = random_ulong(2000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); length = random_ulong(35); log_length = 0; while ((1<limbs+1); //(output_bits-1)/FLINT_BITS+1); else fmpz_poly_init(test_fmpz_poly3); for (unsigned long count2 = 0; (count2 < 1) && (result == 1); count2++) { _fmpz_poly_mul_karatsuba(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); #if DEBUG mpz_poly_print(test_poly3);printf("\n"); mpz_poly_print(test_poly4);printf("\n"); #endif result = mpz_poly_equal(test_poly4, test_poly3); } mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 0; (count1 < 10000) && (result == 1) ; count1++) { bits = random_ulong(2000)+ 1; bits2 = random_ulong(2000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(35); length = random_ulong(35); max_length = FLINT_MIN(length, length2); log_length = 0; while ((1<limbs+test_fmpz_poly2->limbs+1); //(output_bits-1)/FLINT_BITS+1); } _fmpz_poly_mul_karatsuba(test_fmpz_poly, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly); #if DEBUG mpz_poly_print(test_poly3);printf("\n"); mpz_poly_print(test_poly4);printf("\n"); #endif result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 0; (count1 < 10000) && (result == 1) ; count1++) { bits = random_ulong(2000)+ 1; bits2 = random_ulong(2000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(35); length = random_ulong(35); max_length = FLINT_MIN(length, length2); log_length = 0; while ((1<limbs+test_fmpz_poly2->limbs+1); //(output_bits-1)/FLINT_BITS+1); } _fmpz_poly_mul_karatsuba(test_fmpz_poly2, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly2); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly3);printf("\n"); mpz_poly_print(test_poly4);printf("\n"); #endif result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test__fmpz_poly_mul_karatsuba_trunc() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2, trunc; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 1; (count1 < 1000) && (result == 1) ; count1++) { bits = random_ulong(2000)+ 1; bits2 = random_ulong(2000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length2 = random_ulong(35); length = random_ulong(35); if (length+length2 == 0) trunc = 0; else trunc = random_ulong(length+length2); #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); if (length + length2) fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); else fmpz_poly_init(test_fmpz_poly3); fmpz_poly_init2(test_fmpz_poly4, trunc, (bits+bits2-1)/FLINT_BITS+2); _fmpz_poly_mul_classical(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_truncate(test_fmpz_poly3, trunc); _fmpz_poly_mul_karatsuba_trunc(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); result = _fmpz_poly_equal(test_fmpz_poly4, test_fmpz_poly3); #if DEBUG if (!result) { mpz_poly_print(test_poly3); printf("\n\n"); mpz_poly_print(test_poly4); } #endif fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 1; (count1 < 1000) && (result == 1) ; count1++) { bits = random_ulong(2000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(35); if (length == 0) trunc = 0; else trunc = random_ulong(length*2); #if DEBUG printf("length = %ld, trunc = %ld, bits = %ld\n", length, trunc, bits); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); if (length) fmpz_poly_init2(test_fmpz_poly3, 2*length-1, (2*bits-1)/FLINT_BITS+2); else fmpz_poly_init(test_fmpz_poly3); fmpz_poly_init2(test_fmpz_poly4, trunc, (2*bits-1)/FLINT_BITS+2); _fmpz_poly_mul_classical(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly); _fmpz_poly_truncate(test_fmpz_poly3, trunc); _fmpz_poly_mul_karatsuba_trunc(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); result = _fmpz_poly_equal(test_fmpz_poly4, test_fmpz_poly3); #if DEBUG if (!result) { mpz_poly_print(test_poly3); printf("\n\n"); mpz_poly_print(test_poly4); } #endif fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 1; (count1 < 1000) && (result == 1) ; count1++) { bits = random_ulong(2000)+ 1; bits2 = random_ulong(2000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length2 = random_ulong(35); length = random_ulong(35); if (length+length2 == 0) trunc = 0; else trunc = random_ulong(length+length2); #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); if (length + length2) fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); else fmpz_poly_init(test_fmpz_poly3); fmpz_poly_fit_length(test_fmpz_poly, trunc); fmpz_poly_fit_limbs(test_fmpz_poly, (bits+bits2-1)/FLINT_BITS+2); _fmpz_poly_mul_classical(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_truncate(test_fmpz_poly3, trunc); _fmpz_poly_mul_karatsuba_trunc(test_fmpz_poly, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly); result = _fmpz_poly_equal(test_fmpz_poly, test_fmpz_poly3); #if DEBUG if (!result) { mpz_poly_print(test_poly3); printf("\n\n"); mpz_poly_print(test_poly4); } #endif fmpz_poly_clear(test_fmpz_poly3); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); return result; } int test__fmpz_poly_mul_karatsuba_trunc_left() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2, trunc; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(100); length2 = random_ulong(100); if (length+length2 < 2) trunc = 0; else trunc = random_ulong(length+length2-1)+1; #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_init(test_poly4); if (length + length2) { fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); fmpz_poly_init2(test_fmpz_poly4, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); } else { fmpz_poly_init(test_fmpz_poly3); fmpz_poly_init(test_fmpz_poly4); } _fmpz_poly_mul_classical(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_zero_coeffs(test_fmpz_poly3, FLINT_MIN(trunc, test_fmpz_poly3->length)); _fmpz_poly_mul_karatsuba_trunc_left(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(100); if (length < 1) trunc = 0; else trunc = random_ulong(2*length-1)+1; #if DEBUG printf("length = %ld, trunc = %ld, bits = %ld\n", length, trunc, bits); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_init(test_poly4); if (length) { fmpz_poly_init2(test_fmpz_poly3, 2*length-1, (2*bits-1)/FLINT_BITS+2); fmpz_poly_init2(test_fmpz_poly4, 2*length-1, (2*bits-1)/FLINT_BITS+2); } else { fmpz_poly_init(test_fmpz_poly3); fmpz_poly_init(test_fmpz_poly4); } _fmpz_poly_mul_classical(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly); _fmpz_poly_zero_coeffs(test_fmpz_poly3, FLINT_MIN(trunc, test_fmpz_poly3->length)); _fmpz_poly_mul_karatsuba_trunc_left(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 1; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(100); length2 = random_ulong(100); if (length+length2 < 2) trunc = 0; else trunc = random_ulong(length+length2-1)+1; #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_init(test_poly4); if (length + length2) { fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); fmpz_poly_fit_length(test_fmpz_poly, length+length2-1); fmpz_poly_fit_limbs(test_fmpz_poly, (bits+bits2-1)/FLINT_BITS+2); } else { fmpz_poly_init(test_fmpz_poly3); } _fmpz_poly_mul_classical(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_zero_coeffs(test_fmpz_poly3, FLINT_MIN(trunc, test_fmpz_poly3->length)); _fmpz_poly_mul_karatsuba_trunc_left(test_fmpz_poly, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test__fmpz_poly_mul_KS() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; ZmodF_poly_t test_modF_poly; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); mpz_poly_init(test_poly4); for (unsigned long count1 = 0; count1 < 60; count1++) { bits = random_ulong(100) + 1; bits2 = random_ulong(100) + 1; //bits = 4; //bits2 = 4; length2 = random_ulong(100); length = random_ulong(100); //length = 32000; //length2 = 32000; _fmpz_poly_stack_init(test_fmpz_poly, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(test_fmpz_poly2, length2, (bits2-1)/FLINT_BITS+1); if (length + length2) _fmpz_poly_stack_init(test_fmpz_poly3, length + length2 - 1, test_fmpz_poly->limbs + test_fmpz_poly2->limbs + 1); else fmpz_poly_init(test_fmpz_poly3); #if DEBUG printf("%ld, %ld, %ld, %ld\n", length, length2, bits, bits2); #endif mpz_poly_realloc(test_poly, length); mpz_poly_realloc(test_poly2, length2); if (length + length2) { mpz_poly_realloc(test_poly3, length + length2 - 1); mpz_poly_realloc(test_poly4, length + length2 - 1); } for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); #if DEBUG if (bits2 == 64) { printf("Input poly 1:\n"); for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zx, ",test_poly->coeffs[j]); printf("\n\n"); printf("Input poly 2:\n"); for (unsigned j = 0; j < test_poly2->length; j++) gmp_printf("%Zx, ",test_poly2->coeffs[j]); printf("\n\n"); } #endif mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_mul_naive_KS(test_poly3, test_poly, test_poly2); for (unsigned long i = 0; i < 10; i++) _fmpz_poly_mul_KS(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); result = mpz_poly_equal(test_poly3, test_poly4); #if DEBUG if (!result) { printf("Output poly correct\n"); for (unsigned j = 0; j < test_poly3->length; j++) gmp_printf("%Zx, ",test_poly3->coeffs[j]); printf("\n\n"); printf("Output poly incorrect\n"); for (unsigned j = 0; j < test_poly4->length; j++) gmp_printf("%Zx, ",test_poly4->coeffs[j]); printf("\n\n"); } #endif } _fmpz_poly_stack_clear(test_fmpz_poly3); _fmpz_poly_stack_clear(test_fmpz_poly2); _fmpz_poly_stack_clear(test_fmpz_poly); } for (unsigned long count1 = 0; count1 < 60; count1++) { bits = random_ulong(100) + 1; length = random_ulong(100); _fmpz_poly_stack_init(test_fmpz_poly, length, (bits-1)/FLINT_BITS+1); if (length) _fmpz_poly_stack_init(test_fmpz_poly3, 2*length - 1, 2*test_fmpz_poly->limbs + 1); else fmpz_poly_init(test_fmpz_poly3); #if DEBUG printf("%ld, %ld\n", length, bits); #endif mpz_poly_realloc(test_poly, length); if (length) { mpz_poly_realloc(test_poly3, 2*length - 1); mpz_poly_realloc(test_poly4, 2*length - 1); } for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); #if DEBUG if (bits2 == 64) { printf("Input poly 1:\n"); for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zx, ",test_poly->coeffs[j]); printf("\n\n"); } #endif mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_mul_naive_KS(test_poly3, test_poly, test_poly); _fmpz_poly_mul_KS(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); result = mpz_poly_equal(test_poly3, test_poly4); #if DEBUG if (!result) { printf("Output poly correct\n"); for (unsigned j = 0; j < test_poly3->length; j++) gmp_printf("%Zx, ",test_poly3->coeffs[j]); printf("\n\n"); printf("Output poly incorrect\n"); for (unsigned j = 0; j < test_poly4->length; j++) gmp_printf("%Zx, ",test_poly4->coeffs[j]); printf("\n\n"); } #endif } _fmpz_poly_stack_clear(test_fmpz_poly3); _fmpz_poly_stack_clear(test_fmpz_poly); } for (unsigned long count1 = 0; count1 < 60; count1++) { bits = random_ulong(100) + 1; bits2 = random_ulong(100) + 1; //bits = 4; //bits2 = 4; length2 = random_ulong(100); length = random_ulong(100); //length = 32000; //length2 = 32000; fmpz_poly_init2(test_fmpz_poly, length, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, length2, (bits2-1)/FLINT_BITS+1); if (length + length2) { fmpz_poly_fit_length(test_fmpz_poly, length + length2 - 1); fmpz_poly_fit_limbs(test_fmpz_poly, test_fmpz_poly->limbs + test_fmpz_poly2->limbs + 1); } #if DEBUG printf("%ld, %ld, %ld, %ld\n", length, length2, bits, bits2); #endif mpz_poly_realloc(test_poly, length); mpz_poly_realloc(test_poly2, length2); if (length + length2) { mpz_poly_realloc(test_poly3, length + length2 - 1); mpz_poly_realloc(test_poly4, length + length2 - 1); } for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); #if DEBUG if (bits2 == 64) { printf("Input poly 1:\n"); for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zx, ",test_poly->coeffs[j]); printf("\n\n"); printf("Input poly 2:\n"); for (unsigned j = 0; j < test_poly2->length; j++) gmp_printf("%Zx, ",test_poly2->coeffs[j]); printf("\n\n"); } #endif mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_mul_naive_KS(test_poly3, test_poly, test_poly2); _fmpz_poly_mul_KS(test_fmpz_poly, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly); result = mpz_poly_equal(test_poly3, test_poly4); #if DEBUG if (!result) { printf("Output poly correct\n"); for (unsigned j = 0; j < test_poly3->length; j++) gmp_printf("%Zx, ",test_poly3->coeffs[j]); printf("\n\n"); printf("Output poly incorrect\n"); for (unsigned j = 0; j < test_poly4->length; j++) gmp_printf("%Zx, ",test_poly4->coeffs[j]); printf("\n\n"); } #endif } fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); mpz_poly_clear(test_poly4); return result; } int test__fmpz_poly_mul_KS_trunc() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2, trunc; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 1; (count1 < 50) && (result == 1) ; count1++) { bits = random_ulong(100)+ 1; bits2 = random_ulong(100)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(2000); length2 = random_ulong(2000); if (length+length2 < 2) trunc = 0; else trunc = random_ulong(length+length2-1)+1; #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_init(test_poly4); if (length + length2) fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); else fmpz_poly_init(test_fmpz_poly3); fmpz_poly_init2(test_fmpz_poly4, trunc, (bits+bits2-1)/FLINT_BITS+2); _fmpz_poly_mul_KS(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_truncate(test_fmpz_poly3, trunc); _fmpz_poly_normalise(test_fmpz_poly3); _fmpz_poly_mul_KS_trunc(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly3); #if DEBUG if (!result) { mpz_poly_print(test_poly3); printf("\n\n"); mpz_poly_print(test_poly4); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 1; (count1 < 50) && (result == 1) ; count1++) { bits = random_ulong(100)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(2000); if (length < 1) trunc = 0; else trunc = random_ulong(2*length-1)+1; #if DEBUG printf("length = %ld, trunc = %ld, bits = %ld\n", length, trunc, bits); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_init(test_poly4); if (length) fmpz_poly_init2(test_fmpz_poly3, 2*length-1, (2*bits-1)/FLINT_BITS+2); else fmpz_poly_init(test_fmpz_poly3); fmpz_poly_init2(test_fmpz_poly4, trunc, (2*bits-1)/FLINT_BITS+2); _fmpz_poly_mul_KS(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly); _fmpz_poly_truncate(test_fmpz_poly3, trunc); _fmpz_poly_normalise(test_fmpz_poly3); _fmpz_poly_mul_KS_trunc(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly3); #if DEBUG if (!result) { mpz_poly_print(test_poly3); printf("\n\n"); mpz_poly_print(test_poly4); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 1; (count1 < 50) && (result == 1) ; count1++) { bits = random_ulong(100)+ 1; bits2 = random_ulong(100)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(2000); length2 = random_ulong(2000); if (length+length2 < 2) trunc = 0; else trunc = random_ulong(length+length2-1)+1; #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_init(test_poly4); if (length + length2) fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); else fmpz_poly_init(test_fmpz_poly3); fmpz_poly_fit_length(test_fmpz_poly, trunc); fmpz_poly_fit_limbs(test_fmpz_poly, (bits+bits2-1)/FLINT_BITS+2); _fmpz_poly_mul_KS(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_truncate(test_fmpz_poly3, trunc); _fmpz_poly_normalise(test_fmpz_poly3); _fmpz_poly_mul_KS_trunc(test_fmpz_poly, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly); result = mpz_poly_equal(test_poly4, test_poly3); #if DEBUG if (!result) { mpz_poly_print(test_poly3); printf("\n\n"); mpz_poly_print(test_poly4); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } #if 1 for (unsigned long count1 = 1; (count1 < 10) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(500); length2 = random_ulong(500); if (length+length2 < 2) trunc = 0; else trunc = random_ulong(length+length2-1)+1; #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_init(test_poly4); if (length + length2) fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); else fmpz_poly_init(test_fmpz_poly3); fmpz_poly_init2(test_fmpz_poly4, trunc, (bits+bits2-1)/FLINT_BITS+2); _fmpz_poly_mul_KS(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_truncate(test_fmpz_poly3, trunc); _fmpz_poly_normalise(test_fmpz_poly3); _fmpz_poly_mul_KS_trunc(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly3); #if DEBUG if (!result) { mpz_poly_print(test_poly3); printf("\n\n"); mpz_poly_print(test_poly4); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } #endif mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test__fmpz_poly_mul_SS() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 50) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; //bits = bits2 = 1000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(35); length = random_ulong(35); //length = length2 = 256; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_mul_naive_KS(test_poly3, test_poly, test_poly2); mpz_poly_init(test_poly4); if (length + length2) fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); else fmpz_poly_init(test_fmpz_poly3); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { _fmpz_poly_mul_SS(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly3); } fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); result = mpz_poly_equal(test_poly4, test_poly3); if (!result) { #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); mpz_poly_print(test_poly4);printf("\n\n"); #endif } mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); } for (unsigned long count1 = 0; (count1 < 50) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); length = random_ulong(35); #if DEBUG printf("length = %ld, bits = %ld\n", length, bits); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); #endif mpz_poly_mul_naive_KS(test_poly3, test_poly, test_poly); mpz_poly_init(test_poly4); if (length) fmpz_poly_init2(test_fmpz_poly3, 2*length-1, 2*test_fmpz_poly->limbs+1); else fmpz_poly_init(test_fmpz_poly3); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { _fmpz_poly_mul_SS(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly3); } fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); result = mpz_poly_equal(test_poly4, test_poly3); if (!result) { #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); mpz_poly_print(test_poly4);printf("\n\n"); #endif } mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); } for (unsigned long count1 = 0; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; //bits = bits2 = 1000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(35); length = random_ulong(35); //length = length2 = 256; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_mul_naive_KS(test_poly3, test_poly, test_poly2); mpz_poly_init(test_poly4); if (length + length2) { fmpz_poly_fit_length(test_fmpz_poly, length+length2-1); fmpz_poly_fit_limbs(test_fmpz_poly, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); } _fmpz_poly_mul_SS(test_fmpz_poly, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly); result = mpz_poly_equal(test_poly4, test_poly3); if (!result) { #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); mpz_poly_print(test_poly4);printf("\n\n"); #endif } mpz_poly_clear(test_poly4); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test__fmpz_poly_mul_SS_trunc() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2, trunc; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 1; (count1 < 30) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); length2 = random_ulong(1000); if (length+length2 == 0) trunc = 0; else trunc = random_ulong(length+length2); #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_init(test_poly4); if (length + length2) fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); else fmpz_poly_init(test_fmpz_poly3); fmpz_poly_init2(test_fmpz_poly4, trunc, (bits+bits2-1)/FLINT_BITS+2); _fmpz_poly_mul_SS(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_truncate(test_fmpz_poly3, trunc); _fmpz_poly_normalise(test_fmpz_poly3); _fmpz_poly_mul_SS_trunc(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly3); #if DEBUG if (!result) { mpz_poly_print(test_poly3); printf("\n\n"); mpz_poly_print(test_poly4); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 1; (count1 < 30) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); if (length == 0) trunc = 0; else trunc = random_ulong(2*length); #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_init(test_poly4); if (length) fmpz_poly_init2(test_fmpz_poly3, 2*length-1, (2*bits-1)/FLINT_BITS+2); else fmpz_poly_init(test_fmpz_poly3); fmpz_poly_init2(test_fmpz_poly4, trunc, (2*bits-1)/FLINT_BITS+2); _fmpz_poly_mul_SS(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly); _fmpz_poly_truncate(test_fmpz_poly3, trunc); _fmpz_poly_normalise(test_fmpz_poly3); _fmpz_poly_mul_SS_trunc(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly3); #if DEBUG if (!result) { mpz_poly_print(test_poly3); printf("\n\n"); mpz_poly_print(test_poly4); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 1; (count1 < 30) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); length2 = random_ulong(1000); if (length+length2 == 0) trunc = 0; else trunc = random_ulong(length+length2); #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_init(test_poly4); if (length + length2) { fmpz_poly_fit_length(test_fmpz_poly, length+length2-1); fmpz_poly_fit_limbs(test_fmpz_poly, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); } else fmpz_poly_init(test_fmpz_poly3); _fmpz_poly_mul_SS(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_truncate(test_fmpz_poly3, trunc); _fmpz_poly_normalise(test_fmpz_poly3); _fmpz_poly_mul_SS_trunc(test_fmpz_poly, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly); result = mpz_poly_equal(test_poly4, test_poly3); #if DEBUG if (!result) { mpz_poly_print(test_poly3); printf("\n\n"); mpz_poly_print(test_poly4); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test__fmpz_poly_mul_trunc_n() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2, trunc; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 1; (count1 < 25) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); length2 = length; trunc = length; #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_init(test_poly4); if (length + length2) fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); else fmpz_poly_init(test_fmpz_poly3); fmpz_poly_init2(test_fmpz_poly4, trunc, (bits+bits2-1)/FLINT_BITS+2); _fmpz_poly_mul_KS(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_truncate(test_fmpz_poly3, trunc); _fmpz_poly_normalise(test_fmpz_poly3); _fmpz_poly_mul_trunc_n(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_mul_trunc_n() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2, trunc; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 1; (count1 < 25) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); length2 = length; trunc = length; #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_init(test_poly4); if (length + length2) fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); else fmpz_poly_init(test_fmpz_poly3); fmpz_poly_init(test_fmpz_poly4); _fmpz_poly_mul_KS(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_truncate(test_fmpz_poly3, trunc); _fmpz_poly_normalise(test_fmpz_poly3); fmpz_poly_mul_trunc_n(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test__fmpz_poly_mul_trunc_left_n() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2, trunc; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 1; (count1 < 25) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); length2 = length; trunc = length; #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_init(test_poly4); if (length + length2) fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); else fmpz_poly_init(test_fmpz_poly3); if (length + length2) fmpz_poly_init2(test_fmpz_poly4, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); else fmpz_poly_init(test_fmpz_poly4); _fmpz_poly_mul_KS(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_zero_coeffs(test_fmpz_poly3, trunc); _fmpz_poly_mul_trunc_left_n(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); _fmpz_poly_zero_coeffs(test_fmpz_poly4, trunc); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_mul_trunc_left_n() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2, trunc; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 1; (count1 < 25) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; bits2 = random_ulong(1000)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); length2 = length; trunc = length; #if DEBUG printf("length = %ld, length2 = %ld, trunc = %ld, bits = %ld, bits2 = %ld\n", length, length2, trunc, bits, bits2); #endif do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_init(test_poly4); if (length + length2) fmpz_poly_init2(test_fmpz_poly3, length+length2-1, (bits+bits2-1)/FLINT_BITS+2); else fmpz_poly_init(test_fmpz_poly3); fmpz_poly_init(test_fmpz_poly4); _fmpz_poly_mul_KS(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); _fmpz_poly_zero_coeffs(test_fmpz_poly3, trunc); fmpz_poly_mul_trunc_left_n(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2, trunc); fmpz_poly_check_normalisation(test_fmpz_poly4); _fmpz_poly_zero_coeffs(test_fmpz_poly4, trunc); fmpz_poly_to_mpz_poly(test_poly3, test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly3); mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test__fmpz_poly_mul() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; ZmodF_poly_t test_modF_poly; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); mpz_poly_init(test_poly4); for (unsigned long count1 = 0; count1 < 20; count1++) { bits = random_ulong(1000) + 1; bits2 = random_ulong(1000) + 1; //bits = 4; //bits2 = 4; length2 = random_ulong(1000); length = random_ulong(1000); //length = 32000; //length2 = 32000; _fmpz_poly_stack_init(test_fmpz_poly, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(test_fmpz_poly2, length2, (bits2-1)/FLINT_BITS+1); if (length + length2) _fmpz_poly_stack_init(test_fmpz_poly3, length + length2 - 1, test_fmpz_poly->limbs + test_fmpz_poly2->limbs + 1); else fmpz_poly_init(test_fmpz_poly3); #if DEBUG printf("%ld, %ld, %ld, %ld\n", length, length2, bits, bits2); #endif mpz_poly_realloc(test_poly, length); mpz_poly_realloc(test_poly2, length2); if (length + length2) { mpz_poly_realloc(test_poly3, length + length2 - 1); mpz_poly_realloc(test_poly4, length + length2 - 1); } for (unsigned long count2 = 0; (count2 < 20) && (result == 1); count2++) { do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_mul_naive_KS(test_poly3, test_poly, test_poly2); for (unsigned long i = 0; i < 2; i++) _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); result = mpz_poly_equal(test_poly3, test_poly4); #if DEBUG if (!result) { printf("Output poly correct\n"); for (unsigned j = 0; j < test_poly3->length; j++) gmp_printf("%Zx, ",test_poly3->coeffs[j]); printf("\n\n"); printf("Output poly incorrect\n"); for (unsigned j = 0; j < test_poly4->length; j++) gmp_printf("%Zx, ",test_poly4->coeffs[j]); printf("\n\n"); } #endif } _fmpz_poly_stack_clear(test_fmpz_poly3); _fmpz_poly_stack_clear(test_fmpz_poly2); _fmpz_poly_stack_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); mpz_poly_clear(test_poly4); return result; } int test_fmpz_poly_mul() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; ZmodF_poly_t test_modF_poly; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); mpz_poly_init(test_poly4); for (unsigned long count1 = 0; count1 < 20; count1++) { bits = random_ulong(1000) + 1; bits2 = random_ulong(1000) + 1; //bits = 4; //bits2 = 4; length2 = random_ulong(1000); length = random_ulong(1000); //length = 32000; //length2 = 32000; _fmpz_poly_stack_init(test_fmpz_poly, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(test_fmpz_poly2, length2, (bits2-1)/FLINT_BITS+1); fmpz_poly_init(test_fmpz_poly3); #if DEBUG printf("%ld, %ld, %ld, %ld\n", length, length2, bits, bits2); #endif mpz_poly_realloc(test_poly, length); mpz_poly_realloc(test_poly2, length2); if (length + length2) { mpz_poly_realloc(test_poly3, length + length2 - 1); mpz_poly_realloc(test_poly4, length + length2 - 1); } for (unsigned long count2 = 0; (count2 < 20) && (result == 1); count2++) { do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_mul_naive_KS(test_poly3, test_poly, test_poly2); for (unsigned long i = 0; i < 2; i++) fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); result = mpz_poly_equal(test_poly3, test_poly4); #if DEBUG if (!result) { printf("Output poly correct\n"); for (unsigned j = 0; j < test_poly3->length; j++) gmp_printf("%Zx, ",test_poly3->coeffs[j]); printf("\n\n"); printf("Output poly incorrect\n"); for (unsigned j = 0; j < test_poly4->length; j++) gmp_printf("%Zx, ",test_poly4->coeffs[j]); printf("\n\n"); } #endif } fmpz_poly_clear(test_fmpz_poly3); _fmpz_poly_stack_clear(test_fmpz_poly2); _fmpz_poly_stack_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); mpz_poly_clear(test_poly4); return result; } int test__fmpz_poly_scalar_mul_fmpz() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, bits2, limbs2, length; mpz_t temp, x_mpz; mpz_init(temp); mpz_init(x_mpz); mp_limb_t * x; mpz_poly_init(test_poly); for (unsigned long count1 = 0; (count1 < 7) && (result == 1) ; count1++) { bits = randint(100000) + 150000; bits2 = randint(10000) + 150000; limbs2 = (bits2-1)/FLINT_BITS+1; bits2 = limbs2*FLINT_BITS; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+(bits2-1)/FLINT_BITS+2); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 3) && (result == 1); count2++) { length = randint(100); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); F_mpn_clear(x, limbs2+1); mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); for (unsigned long j = 0; j < 1; j++) { _fmpz_poly_scalar_mul_fmpz(test_fmpz_poly2, test_fmpz_poly, x); fmpz_poly_check_normalisation(test_fmpz_poly2); } mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 0; (count1 < 100) && (result == 1) ; count1++) { bits = randint(1000) + 1500; bits2 = randint(1000) + 1500; limbs2 = (bits2-1)/FLINT_BITS+1; bits2 = limbs2*FLINT_BITS; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+(bits2-1)/FLINT_BITS+2); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = randint(100); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); F_mpn_clear(x, limbs2+1); mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); for (unsigned long j = 0; j < 5; j++) { _fmpz_poly_scalar_mul_fmpz(test_fmpz_poly2, test_fmpz_poly, x); fmpz_poly_check_normalisation(test_fmpz_poly2); } mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_clear(temp); mpz_clear(x_mpz); return result; } int test__fmpz_poly_scalar_div_fmpz() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, bits2, limbs, limbs2, length; mpz_t temp, x_mpz; mpz_init(temp); mpz_init(x_mpz); mp_limb_t * x; mpz_poly_init(test_poly); for (unsigned long count1 = 0; (count1 < 10000) && (result == 1) ; count1++) { bits = randint(300)+1; bits2 = randint(300)+1; limbs = (bits-1)/FLINT_BITS+1; limbs2 = (bits2-1)/FLINT_BITS+1; bits2 = limbs2*FLINT_BITS; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); if (limbs >= limbs2) fmpz_poly_init2(test_fmpz_poly2, 1, limbs-limbs2+1); else fmpz_poly_init2(test_fmpz_poly2, 1, 1); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = randint(100); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); F_mpn_clear(x, limbs2+1); while (!x[limbs2]) mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); for (unsigned long j = 0; j < 5; j++) { _fmpz_poly_scalar_div_fmpz(test_fmpz_poly2, test_fmpz_poly, x); fmpz_poly_check_normalisation(test_fmpz_poly2); } mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); for (long i = 0; i < test_poly2->length; i++) { mpz_fdiv_q(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); } } for (long i = test_poly2->length; i < test_poly->length; i++) { mpz_fdiv_q(temp, test_poly->coeffs[i], x_mpz); result &= (mpz_cmp_ui(temp, 0L) == 0); if (!result) { printf("Coefficient %ld of %ld not zero\n", i, test_poly->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); gmp_printf("%Zd, %Zd, %Zd\n", temp, test_poly->coeffs[i], x_mpz); break; } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 0; (count1 < 10000) && (result == 1) ; count1++) { bits = randint(300)+1; bits2 = randint(300)+1; limbs = (bits-1)/FLINT_BITS+1; limbs2 = (bits2-1)/FLINT_BITS+1; bits2 = limbs2*FLINT_BITS; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = randint(100); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); F_mpn_clear(x, limbs2+1); while (!x[limbs2]) mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_scalar_div_fmpz(test_fmpz_poly, test_fmpz_poly, x); fmpz_poly_check_normalisation(test_fmpz_poly); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); for (long i = 0; i < test_poly2->length; i++) { mpz_fdiv_q(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); } } for (long i = test_poly2->length; i < test_poly->length; i++) { mpz_fdiv_q(temp, test_poly->coeffs[i], x_mpz); result &= (mpz_cmp_ui(temp, 0L) == 0); if (!result) { printf("Coefficient %ld of %ld not zero\n", i, test_poly->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); gmp_printf("%Zd, %Zd, %Zd\n", temp, test_poly->coeffs[i], x_mpz); break; } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_clear(temp); mpz_clear(x_mpz); return result; } int test_fmpz_poly_scalar_div_fmpz() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, bits2, limbs, limbs2, length; mpz_t temp, x_mpz; mpz_init(temp); mpz_init(x_mpz); mp_limb_t * x; mpz_poly_init(test_poly); for (unsigned long count1 = 0; (count1 < 10000) && (result == 1) ; count1++) { bits = randint(300)+1; bits2 = randint(300)+1; limbs = (bits-1)/FLINT_BITS+1; limbs2 = (bits2-1)/FLINT_BITS+1; bits2 = limbs2*FLINT_BITS; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init(test_fmpz_poly2); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = randint(100); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); F_mpn_clear(x, limbs2+1); while (!x[limbs2]) mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); for (unsigned long j = 0; j < 5; j++) { fmpz_poly_scalar_div_fmpz(test_fmpz_poly2, test_fmpz_poly, x); fmpz_poly_check_normalisation(test_fmpz_poly2); } mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); for (long i = 0; i < test_poly2->length; i++) { mpz_fdiv_q(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); } } for (long i = test_poly2->length; i < test_poly->length; i++) { mpz_fdiv_q(temp, test_poly->coeffs[i], x_mpz); result &= (mpz_cmp_ui(temp, 0L) == 0); if (!result) { printf("Coefficient %ld of %ld not zero\n", i, test_poly->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); gmp_printf("%Zd, %Zd, %Zd\n", temp, test_poly->coeffs[i], x_mpz); break; } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 0; (count1 < 10000) && (result == 1) ; count1++) { bits = randint(300)+1; bits2 = randint(300)+1; limbs = (bits-1)/FLINT_BITS+1; limbs2 = (bits2-1)/FLINT_BITS+1; bits2 = limbs2*FLINT_BITS; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 30) && (result == 1); count2++) { length = randint(100); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); F_mpn_clear(x, limbs2+1); while (!x[limbs2]) mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_poly_scalar_div_fmpz(test_fmpz_poly, test_fmpz_poly, x); fmpz_poly_check_normalisation(test_fmpz_poly); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); for (long i = 0; i < test_poly2->length; i++) { mpz_fdiv_q(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); } } for (long i = test_poly2->length; i < test_poly->length; i++) { mpz_fdiv_q(temp, test_poly->coeffs[i], x_mpz); result &= (mpz_cmp_ui(temp, 0L) == 0); if (!result) { printf("Coefficient %ld of %ld not zero\n", i, test_poly->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); gmp_printf("%Zd, %Zd, %Zd\n", temp, test_poly->coeffs[i], x_mpz); break; } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_clear(temp); mpz_clear(x_mpz); return result; } int test_fmpz_poly_scalar_div_mpz() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, bits2, limbs, limbs2, length; mpz_t temp, x_mpz; mpz_init(temp); mpz_init(x_mpz); mp_limb_t * x; mpz_poly_init(test_poly); for (unsigned long count1 = 0; (count1 < 10000) && (result == 1) ; count1++) { bits = randint(300)+1; bits2 = randint(300)+1; limbs = (bits-1)/FLINT_BITS+1; limbs2 = (bits2-1)/FLINT_BITS+1; bits2 = limbs2*FLINT_BITS; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init(test_fmpz_poly2); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = randint(100); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); F_mpn_clear(x, limbs2+1); while (!x[limbs2]) mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); for (unsigned long j = 0; j < 5; j++) { fmpz_poly_scalar_div_mpz(test_fmpz_poly2, test_fmpz_poly, x_mpz); fmpz_poly_check_normalisation(test_fmpz_poly2); } mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (long i = 0; i < test_poly2->length; i++) { mpz_fdiv_q(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); } } for (long i = test_poly2->length; i < test_poly->length; i++) { mpz_fdiv_q(temp, test_poly->coeffs[i], x_mpz); result &= (mpz_cmp_ui(temp, 0L) == 0); if (!result) { printf("Coefficient %ld of %ld not zero\n", i, test_poly->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); gmp_printf("%Zd, %Zd, %Zd\n", temp, test_poly->coeffs[i], x_mpz); break; } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_clear(temp); mpz_clear(x_mpz); return result; } int test_fmpz_poly_scalar_mul_fmpz() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, bits2, limbs2, length; mpz_t temp, x_mpz; mpz_init(temp); mpz_init(x_mpz); mp_limb_t * x; mpz_poly_init(test_poly); for (unsigned long count1 = 0; (count1 < 7) && (result == 1) ; count1++) { bits = randint(100000) + 150000; bits2 = randint(10000) + 150000; limbs2 = (bits2-1)/FLINT_BITS+1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 3) && (result == 1); count2++) { fmpz_poly_init(test_fmpz_poly2); length = randint(100); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); F_mpn_clear(x, limbs2+1); mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); for (unsigned long j = 0; j < 1; j++) { fmpz_poly_scalar_mul_fmpz(test_fmpz_poly2, test_fmpz_poly, x); fmpz_poly_check_normalisation(test_fmpz_poly2); } mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); fmpz_poly_clear(test_fmpz_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 0; (count1 < 100) && (result == 1) ; count1++) { bits = randint(1000) + 1500; bits2 = randint(1000) + 1500; limbs2 = (bits2-1)/FLINT_BITS+1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { fmpz_poly_init(test_fmpz_poly2); length = randint(100); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); F_mpn_clear(x, limbs2+1); mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); for (unsigned long j = 0; j < 5; j++) { fmpz_poly_scalar_mul_fmpz(test_fmpz_poly2, test_fmpz_poly, x); fmpz_poly_check_normalisation(test_fmpz_poly2); } mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); fmpz_poly_clear(test_fmpz_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 0; (count1 < 10000) && (result == 1) ; count1++) { bits = randint(150) + 1; bits2 = randint(150) + 1; limbs2 = (bits2-1)/FLINT_BITS+1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { fmpz_poly_init(test_fmpz_poly2); length = randint(40); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); F_mpn_clear(x, limbs2+1); mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); for (unsigned long j = 0; j < 2; j++) { fmpz_poly_scalar_mul_fmpz(test_fmpz_poly2, test_fmpz_poly, x); fmpz_poly_check_normalisation(test_fmpz_poly2); } mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); printf("bits 2 actually equals %ld\n",mpz_sizeinbase(x_mpz,2)); } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); fmpz_poly_clear(test_fmpz_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 0; (count1 < 10000) && (result == 1) ; count1++) { bits = randint(10) + 1; bits2 = randint(10) + 1; limbs2 = (bits2-1)/FLINT_BITS+1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = randint(100); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); F_mpn_clear(x, limbs2+1); mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_poly_scalar_mul_fmpz(test_fmpz_poly, test_fmpz_poly, x); fmpz_poly_check_normalisation(test_fmpz_poly); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); printf("bits 2 actually equals %ld\n", mpz_sizeinbase(x_mpz, 2)); } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 0; (count1 < 100) && (result == 1) ; count1++) { bits = randint(6400) + 1; bits2 = randint(6400) + 1; limbs2 = (bits2-1)/FLINT_BITS+1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = randint(100); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); F_mpn_clear(x, limbs2+1); mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_poly_scalar_mul_fmpz(test_fmpz_poly, test_fmpz_poly, x); fmpz_poly_check_normalisation(test_fmpz_poly); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); printf("bits 2 actually equals %ld\n",mpz_sizeinbase(x_mpz,2)); } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 0; (count1 < 20) && (result == 1) ; count1++) { bits = randint(128000) + 1; bits2 = randint(128000) + 1; limbs2 = (bits2-1)/FLINT_BITS+1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = randint(100); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); F_mpn_clear(x, limbs2+1); mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_poly_scalar_mul_fmpz(test_fmpz_poly, test_fmpz_poly, x); fmpz_poly_check_normalisation(test_fmpz_poly); mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); printf("bits 2 actually equals %ld\n",mpz_sizeinbase(x_mpz,2)); } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_clear(temp); mpz_clear(x_mpz); return result; } int test_fmpz_poly_scalar_mul_mpz() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, bits2, limbs2, length; mpz_t temp, x_mpz; mpz_init(temp); mpz_init(x_mpz); mp_limb_t * x; mpz_poly_init(test_poly); for (unsigned long count1 = 0; (count1 < 7) && (result == 1) ; count1++) { bits = randint(100000) + 150000; bits2 = randint(10000) + 150000; limbs2 = (bits2-1)/FLINT_BITS+1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 3) && (result == 1); count2++) { fmpz_poly_init(test_fmpz_poly2); length = randint(100); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); F_mpn_clear(x, limbs2+1); mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); do randpoly(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); for (unsigned long j = 0; j < 1; j++) { fmpz_poly_scalar_mul_mpz(test_fmpz_poly2, test_fmpz_poly, x_mpz); fmpz_poly_check_normalisation(test_fmpz_poly2); } mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); fmpz_poly_clear(test_fmpz_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 0; (count1 < 100) && (result == 1) ; count1++) { bits = randint(1000) + 1500; bits2 = randint(1000) + 1500; limbs2 = (bits2-1)/FLINT_BITS+1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { fmpz_poly_init(test_fmpz_poly2); length = randint(100); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); F_mpn_clear(x, limbs2+1); mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); for (unsigned long j = 0; j < 5; j++) { fmpz_poly_scalar_mul_mpz(test_fmpz_poly2, test_fmpz_poly, x_mpz); fmpz_poly_check_normalisation(test_fmpz_poly2); } mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); fmpz_poly_clear(test_fmpz_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 0; (count1 < 10000) && (result == 1) ; count1++) { bits = randint(150) + 1; bits2 = randint(150) + 1; limbs2 = (bits2-1)/FLINT_BITS+1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { fmpz_poly_init(test_fmpz_poly2); length = randint(40); #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); F_mpn_clear(x, limbs2+1); mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; mpz_import(x_mpz, ABS(x[0]), -1, sizeof(mp_limb_t), 0, 0, x+1); if ((long) x[0] < 0) mpz_neg(x_mpz, x_mpz); randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); for (unsigned long j = 0; j < 2; j++) { fmpz_poly_scalar_mul_mpz(test_fmpz_poly2, test_fmpz_poly, x_mpz); fmpz_poly_check_normalisation(test_fmpz_poly2); } mpz_poly_init(test_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); #if DEBUG printf("length = %ld\n",_fmpz_poly_length(test_fmpz_poly)); #endif for (unsigned long i = 0; i < test_poly->length; i++) { mpz_mul(test_poly->coeffs[i], test_poly->coeffs[i], x_mpz); result &= (mpz_cmp(test_poly->coeffs[i], test_poly2->coeffs[i]) == 0); if (!result) { printf("Coefficient %ld of %ld incorrect\n", i, test_poly2->length); printf("bits = %ld, bits2 = %ld, length1 = %ld, length2 = %ld\n", bits, bits2, test_poly->length, test_poly2->length); printf("bits 2 actually equals %ld\n",mpz_sizeinbase(x_mpz,2)); } } #if DEBUG if (!result) { mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); } #endif mpz_poly_clear(test_poly2); fmpz_poly_clear(test_fmpz_poly2); } free(x); fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_clear(temp); mpz_clear(x_mpz); return result; } int test_fmpz_poly_div_classical() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 20000) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; //bits = bits2 = 1000000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(100); length = random_ulong(100)+1; //length = length2 = 20; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); randpoly(test_poly2, length2, bits2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif for (unsigned long i = 1; i < 5; i++) { fmpz_poly_div_classical(test_fmpz_poly4, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly4); } fmpz_poly_div_classical(test_fmpz_poly4, test_fmpz_poly3, test_fmpz_poly); _fmpz_poly_normalise(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_divrem_classical() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4, test_fmpz_poly5; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 20000) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; //bits = bits2 = 1000000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(100); length = random_ulong(100)+1; //length = length2 = 20; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif for (unsigned long i = 1; i < 5; i++) { fmpz_poly_divrem_classical(test_fmpz_poly4, test_fmpz_poly5, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); } fmpz_poly_divrem_classical(test_fmpz_poly4, test_fmpz_poly5, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_div_divconquer_recursive() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4, test_fmpz_poly5; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 3000) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; //bits = bits2 = 100000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(128); length = random_ulong(128)+1; //length = 12; //length2 = 5; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif for (unsigned long i = 0; i < 10; i++) { fmpz_poly_div_divconquer_recursive(test_fmpz_poly4, test_fmpz_poly5, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); } fmpz_poly_div_divconquer_recursive(test_fmpz_poly4, test_fmpz_poly5, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG if (!result) { mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly4);printf("\n\n"); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_divrem_divconquer() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4, test_fmpz_poly5; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 3000) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; //bits = bits2 = 100000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(128); length = random_ulong(128)+1; //length = 12; //length2 = 5; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif for (unsigned long i = 0; i < 10; i++) { fmpz_poly_divrem_divconquer(test_fmpz_poly4, test_fmpz_poly5, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); } fmpz_poly_divrem_divconquer(test_fmpz_poly4, test_fmpz_poly5, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG if (!result) { mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly4);printf("\n\n"); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_divrem() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4, test_fmpz_poly5; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 3000) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; //bits = bits2 = 100000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(128); length = random_ulong(128)+1; //length = 12; //length2 = 5; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif for (unsigned long i = 0; i < 10; i++) { fmpz_poly_divrem(test_fmpz_poly4, test_fmpz_poly5, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); } fmpz_poly_divrem(test_fmpz_poly4, test_fmpz_poly5, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG if (!result) { mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly4);printf("\n\n"); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); } for (unsigned long count1 = 0; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(200)+ 2; bits2 = random_ulong(200)+ 1; //bits = bits2 = 100000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(128); length = random_ulong(128)+1; //length = 12; //length2 = 5; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly5); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif fmpz_poly_divrem(test_fmpz_poly3, test_fmpz_poly5, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG if (!result) { mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly4);printf("\n\n"); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly5); } for (unsigned long count1 = 0; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(200)+ 2; bits2 = random_ulong(200)+ 1; //bits = bits2 = 100000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(128); length = random_ulong(128)+1; //length = 12; //length2 = 5; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif fmpz_poly_divrem(test_fmpz_poly4, test_fmpz_poly3, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG if (!result) { mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly4);printf("\n\n"); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } for (unsigned long count1 = 0; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(200)+ 2; bits2 = random_ulong(200)+ 1; //bits = bits2 = 100000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(128); length = random_ulong(128)+1; //length = 12; //length2 = 5; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif fmpz_poly_divrem(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG if (!result) { mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly4);printf("\n\n"); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_div() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 3000) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; //bits = bits2 = 100000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(128); length = random_ulong(128)+1; //length = 12; //length2 = 5; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif for (unsigned long i = 0; i < 10; i++) { fmpz_poly_div(test_fmpz_poly4, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly4); } fmpz_poly_div(test_fmpz_poly4, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG if (!result) { mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly4);printf("\n\n"); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } for (unsigned long count1 = 0; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(200)+ 2; bits2 = random_ulong(200)+ 1; //bits = bits2 = 100000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(128); length = random_ulong(128)+1; //length = 12; //length2 = 5; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif fmpz_poly_div(test_fmpz_poly3, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG if (!result) { mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly4);printf("\n\n"); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } for (unsigned long count1 = 0; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(200)+ 2; bits2 = random_ulong(200)+ 1; //bits = bits2 = 100000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(128); length = random_ulong(128)+1; //length = 12; //length2 = 5; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif fmpz_poly_div(test_fmpz_poly, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG if (!result) { mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly4);printf("\n\n"); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_div_divconquer() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 100) && (result == 1) ; count1++) { bits = random_ulong(2000)+ 1; bits2 = random_ulong(2000)+ 1; //bits = bits2 = 100; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(256)+1; length = random_ulong(256)+1; //length = 1000; //length2 = 1000; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); randpoly(test_poly2, length2, bits2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif for (unsigned long i = 1; i < 10; i++) { fmpz_poly_div_divconquer(test_fmpz_poly4, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); //fmpz_poly_clear(test_fmpz_poly4); //fmpz_poly_init(test_fmpz_poly4); } fmpz_poly_div_divconquer(test_fmpz_poly4, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG if (!result) { mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly4);printf("\n\n"); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_div_mulders() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 10000) && (result == 1) ; count1++) { bits = random_ulong(100)+ 1; bits2 = random_ulong(100)+ 1; //bits = bits2 = 10000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(128); length = random_ulong(128)+1; //length = 1000; //length2 = 1000; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); randpoly(test_poly2, length2, bits2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif for (unsigned long i = 1; i < 10; i++) { fmpz_poly_div_mulders(test_fmpz_poly4, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly2); //fmpz_poly_clear(test_fmpz_poly4); //fmpz_poly_init(test_fmpz_poly4); } fmpz_poly_div_mulders(test_fmpz_poly4, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG if (!result) { mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly4);printf("\n\n"); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_newton_invert_basecase() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; int result = 1; unsigned long bits, length, n; mpz_poly_init(test_poly); for (unsigned long count1 = 0; (count1 < 20000) && (result == 1) ; count1++) { bits = random_ulong(100)+ 2; //bits = 100000; fmpz_poly_init(test_fmpz_poly); fmpz_poly_init(test_fmpz_poly2); fmpz_poly_init(test_fmpz_poly3); length = random_ulong(64)+1; //length = 12; #if DEBUG printf("length = %ld, bits = %ld\n", length, bits); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); fmpz_poly_set_coeff_ui(test_fmpz_poly, test_fmpz_poly->length - 1, 1L); n = randint(test_fmpz_poly->length) + 1; #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); #endif fmpz_poly_newton_invert_basecase(test_fmpz_poly2, test_fmpz_poly, n); fmpz_poly_check_normalisation(test_fmpz_poly2); fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); for (unsigned long i = 0; i < n - 1; i++) { result &= (test_fmpz_poly3->coeffs[(i+test_fmpz_poly3->length-n)*(test_fmpz_poly3->limbs+1)] == 0); } result &= (test_fmpz_poly3->coeffs[(test_fmpz_poly3->length-1)*(test_fmpz_poly3->limbs+1)] == 1); result &= (test_fmpz_poly3->coeffs[(test_fmpz_poly3->length-1)*(test_fmpz_poly3->limbs+1)+1] == 1); #if DEBUG if (!result) { fmpz_poly_print(test_fmpz_poly); printf("\n"); fmpz_poly_print(test_fmpz_poly2); printf("\n"); fmpz_poly_print(test_fmpz_poly3); printf("\n"); } #endif fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } mpz_poly_clear(test_poly); return result; } int test__fmpz_poly_reverse() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, length, length2; mpz_poly_init(test_poly); for (unsigned long count1 = 0; (count1 < 5000) && (result == 1) ; count1++) { bits = random_ulong(100)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+1); length = random_ulong(100); length2 = length + randint(200); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld\n", length, length2, bits); #endif randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); fmpz_poly_fit_length(test_fmpz_poly2, length2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); #endif _fmpz_poly_reverse(test_fmpz_poly2, test_fmpz_poly, length2); fmpz_poly_check_normalisation(test_fmpz_poly2); _fmpz_poly_reverse(test_fmpz_poly2, test_fmpz_poly2, length2); fmpz_poly_check_normalisation(test_fmpz_poly2); result = _fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } for (unsigned long count1 = 0; (count1 < 5000) && (result == 1) ; count1++) { bits = random_ulong(100)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+1); length = random_ulong(100); length2 = length + randint(200); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld\n", length, length2, bits); #endif randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); fmpz_poly_fit_length(test_fmpz_poly2, length); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); #endif _fmpz_poly_set(test_fmpz_poly2, test_fmpz_poly); _fmpz_poly_reverse(test_fmpz_poly, test_fmpz_poly, length2); fmpz_poly_check_normalisation(test_fmpz_poly); _fmpz_poly_reverse(test_fmpz_poly, test_fmpz_poly, length2); fmpz_poly_check_normalisation(test_fmpz_poly); result = _fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); return result; } int test_fmpz_poly_newton_invert() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3; int result = 1; unsigned long bits, length; mpz_poly_init(test_poly); for (unsigned long count1 = 0; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(100)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init(test_fmpz_poly2); fmpz_poly_init(test_fmpz_poly3); length = random_ulong(250)+1; #if DEBUG printf("length = %ld, bits = %ld\n", length, bits); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); _fmpz_poly_set_coeff_ui(test_fmpz_poly, test_fmpz_poly->length - 1, 1); length = test_fmpz_poly->length; _fmpz_poly_reverse(test_fmpz_poly, test_fmpz_poly, length); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); #endif fmpz_poly_newton_invert(test_fmpz_poly2, test_fmpz_poly, length); fmpz_poly_check_normalisation(test_fmpz_poly2); fmpz_poly_mul_trunc_n(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2, length); _fmpz_poly_normalise(test_fmpz_poly3); result &= (test_fmpz_poly3->length == 1); result &= (test_fmpz_poly3->coeffs[0] == 1); result &= (test_fmpz_poly3->coeffs[1] == 1); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } mpz_poly_clear(test_poly); return result; } int test_fmpz_poly_div_series() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, length; mpz_poly_init(test_poly); for (unsigned long count1 = 0; (count1 < 500) && (result == 1) ; count1++) { bits = random_ulong(100)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init(test_fmpz_poly3); fmpz_poly_init(test_fmpz_poly4); length = random_ulong(200)+1; #if DEBUG printf("length = %ld, bits = %ld\n", length, bits); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); _fmpz_poly_set_coeff_ui(test_fmpz_poly, test_fmpz_poly->length - 1, 1); length = test_fmpz_poly->length; randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly2, length); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly); _fmpz_poly_reverse(test_fmpz_poly, test_fmpz_poly, length); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); #endif fmpz_poly_div_series(test_fmpz_poly3, test_fmpz_poly2, test_fmpz_poly, length); fmpz_poly_check_normalisation(test_fmpz_poly3); fmpz_poly_mul_trunc_n(test_fmpz_poly4, test_fmpz_poly3, test_fmpz_poly, length); _fmpz_poly_normalise(test_fmpz_poly4); result = _fmpz_poly_equal(test_fmpz_poly4, test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } mpz_poly_clear(test_poly); return result; } int test_fmpz_poly_div_newton() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 1000) && (result == 1) ; count1++) { bits = random_ulong(10)+ 1; bits2 = random_ulong(10)+ 1; //bits = bits2 = 100000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(128); length = random_ulong(128)+1; //length = 100000; //length2 = 100000; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif randpoly(test_poly, length, bits); mpz_poly_set_coeff_ui(test_poly, length - 1, 1); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); randpoly(test_poly2, length2, bits2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); _fmpz_poly_normalise(test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif for (unsigned long i = 1; i < 10; i++) { fmpz_poly_div_newton(test_fmpz_poly4, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly4); } fmpz_poly_div_newton(test_fmpz_poly4, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG2 if (!result) { mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly4);printf("\n\n"); } #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_power() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, temp; int result = 1; unsigned long bits, length, exp; mpz_poly_init(test_poly); fmpz_poly_init(temp); fmpz_poly_init(test_fmpz_poly2); fmpz_poly_init(test_fmpz_poly3); for (unsigned long count1 = 1; (count1 < 50) && (result == 1) ; count1++) { bits = random_ulong(100)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(10); exp = random_ulong(20); #if DEBUG printf("length = %ld, bits = %ld, exp = %ld\n", length, bits, exp); #endif do { randpoly(test_poly, length, bits); mpz_poly_normalise(test_poly); } while (test_poly->length != length); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); fmpz_poly_fit_length(test_fmpz_poly2, 1); fmpz_poly_fit_limbs(test_fmpz_poly2, 1); fmpz_poly_set_coeff_ui(test_fmpz_poly2, 0, 1); test_fmpz_poly2->length = 1; for (unsigned long i = 0; i < exp; i++) { fmpz_poly_mul(temp, test_fmpz_poly2, test_fmpz_poly); fmpz_poly_fit_length(test_fmpz_poly2, temp->length); fmpz_poly_fit_limbs(test_fmpz_poly2, temp->limbs); _fmpz_poly_set(test_fmpz_poly2, temp); } fmpz_poly_power(test_fmpz_poly3, test_fmpz_poly, exp); fmpz_poly_check_normalisation(test_fmpz_poly3); result = _fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly3); #if DEBUG2 if (!result) { fmpz_poly_print(test_fmpz_poly); printf("\n"); fmpz_poly_print(test_fmpz_poly2); printf("\n"); fmpz_poly_print(test_fmpz_poly3); printf("\n"); } #endif } fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 1; (count1 < 50) && (result == 1) ; count1++) { bits = random_ulong(100)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(10); exp = random_ulong(20); #if DEBUG printf("length = %ld, bits = %ld, exp = %ld\n", length, bits, exp); #endif do { randpoly(test_poly, length, bits); mpz_poly_normalise(test_poly); } while (test_poly->length != length); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); fmpz_poly_fit_length(test_fmpz_poly2, 1); fmpz_poly_fit_limbs(test_fmpz_poly2, 1); fmpz_poly_set_coeff_ui(test_fmpz_poly2, 0, 1); test_fmpz_poly2->length = 1; for (unsigned long i = 0; i < exp; i++) { fmpz_poly_mul(temp, test_fmpz_poly2, test_fmpz_poly); fmpz_poly_fit_length(test_fmpz_poly2, temp->length); fmpz_poly_fit_limbs(test_fmpz_poly2, temp->limbs); _fmpz_poly_set(test_fmpz_poly2, temp); } fmpz_poly_power(test_fmpz_poly, test_fmpz_poly, exp); fmpz_poly_check_normalisation(test_fmpz_poly); result = _fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly); #if DEBUG2 if (!result) { printf("Exp = %ld\n", exp); fmpz_poly_print(test_fmpz_poly); printf("\n"); fmpz_poly_print(test_fmpz_poly2); printf("\n"); } #endif } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); fmpz_poly_clear(temp); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); return result; } int test_fmpz_poly_power_trunc_n() { mpz_poly_t test_poly; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, temp; int result = 1; unsigned long bits, length, exp, n; mpz_poly_init(test_poly); fmpz_poly_init(temp); fmpz_poly_init(test_fmpz_poly2); fmpz_poly_init(test_fmpz_poly3); for (unsigned long count1 = 1; (count1 < 50) && (result == 1) ; count1++) { bits = random_ulong(100)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(10); exp = random_ulong(20); n = random_ulong(20); #if DEBUG printf("length = %ld, bits = %ld, exp = %ld\n", length, bits, exp); #endif randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); fmpz_poly_fit_length(test_fmpz_poly2, 1); fmpz_poly_fit_limbs(test_fmpz_poly2, 1); fmpz_poly_set_coeff_ui(test_fmpz_poly2, 0, 1); test_fmpz_poly2->length = 1; for (unsigned long i = 0; i < exp; i++) { fmpz_poly_mul(temp, test_fmpz_poly2, test_fmpz_poly); fmpz_poly_fit_length(test_fmpz_poly2, temp->length); fmpz_poly_fit_limbs(test_fmpz_poly2, temp->limbs); _fmpz_poly_set(test_fmpz_poly2, temp); } _fmpz_poly_truncate(test_fmpz_poly2, n); if (test_fmpz_poly->length == 0) _fmpz_poly_zero(test_fmpz_poly2); fmpz_poly_power_trunc_n(test_fmpz_poly3, test_fmpz_poly, exp, n); fmpz_poly_check_normalisation(test_fmpz_poly3); result = _fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly3); #if DEBUG2 if (!result) { fmpz_poly_print(test_fmpz_poly); printf("\n"); fmpz_poly_print(test_fmpz_poly2); printf("\n"); fmpz_poly_print(test_fmpz_poly3); printf("\n"); } #endif } fmpz_poly_clear(test_fmpz_poly); } for (unsigned long count1 = 1; (count1 < 50) && (result == 1) ; count1++) { bits = random_ulong(100)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(10); exp = random_ulong(20); n = random_ulong(20); #if DEBUG printf("length = %ld, bits = %ld, exp = %ld\n", length, bits, exp); #endif randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); fmpz_poly_fit_length(test_fmpz_poly2, 1); fmpz_poly_fit_limbs(test_fmpz_poly2, 1); fmpz_poly_set_coeff_ui(test_fmpz_poly2, 0, 1); test_fmpz_poly2->length = 1; for (unsigned long i = 0; i < exp; i++) { fmpz_poly_mul(temp, test_fmpz_poly2, test_fmpz_poly); fmpz_poly_fit_length(test_fmpz_poly2, temp->length); fmpz_poly_fit_limbs(test_fmpz_poly2, temp->limbs); _fmpz_poly_set(test_fmpz_poly2, temp); } _fmpz_poly_truncate(test_fmpz_poly2, n); if (test_fmpz_poly->length == 0) _fmpz_poly_zero(test_fmpz_poly2); fmpz_poly_power_trunc_n(test_fmpz_poly, test_fmpz_poly, exp, n); fmpz_poly_check_normalisation(test_fmpz_poly); result = _fmpz_poly_equal(test_fmpz_poly2, test_fmpz_poly); #if DEBUG2 if (!result) { fmpz_poly_print(test_fmpz_poly); printf("\n"); fmpz_poly_print(test_fmpz_poly2); printf("\n"); } #endif } fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); fmpz_poly_clear(temp); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); return result; } int test_fmpz_poly_power2() { fmpz_poly_t poly, power; fmpz_poly_init(power); fmpz_poly_init(poly); fmpz_poly_set_coeff_ui(poly, 0, 743); fmpz_poly_set_coeff_ui(poly, 1, 423); fmpz_poly_power(power, poly, 2000);//(1UL<<13)); fmpz_poly_check_normalisation(power); #if DEBUG fmpz_poly_print(power); printf("\n"); #endif return 1; } int test_fmpz_poly_pseudo_divrem_cohen() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4, test_fmpz_poly5; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 1000) && (result == 1) ; count1++) { bits = random_ulong(300)+ 2; bits2 = random_ulong(300)+ 1; //bits = bits2 = 1000000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(40); length = random_ulong(40)+1; //length = length2 = 20; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif for (unsigned long i = 1; i < 5; i++) { fmpz_poly_pseudo_divrem_cohen(test_fmpz_poly4, test_fmpz_poly5, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); } fmpz_poly_pseudo_divrem_cohen(test_fmpz_poly4, test_fmpz_poly5, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = (test_fmpz_poly5->length == 0);//mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_pseudo_divrem_shoup() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4, test_fmpz_poly5; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 1000) && (result == 1) ; count1++) { bits = random_ulong(300)+ 2; bits2 = random_ulong(300)+ 1; //bits = bits2 = 1000000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(40); length = random_ulong(40)+1; //length = length2 = 20; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif for (unsigned long i = 1; i < 5; i++) { fmpz_poly_pseudo_divrem_shoup(test_fmpz_poly4, test_fmpz_poly5, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); } fmpz_poly_pseudo_divrem_shoup(test_fmpz_poly4, test_fmpz_poly5, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = (test_fmpz_poly5->length == 0);//mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_pseudo_divrem_basecase() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4, test_fmpz_poly5; int result = 1; unsigned long bits, bits2, length, length2, d; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 6000) && (result == 1) ; count1++) { bits = random_ulong(100)+ 2; bits2 = random_ulong(100)+ 1; //bits = bits2 = 1000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(100); length = random_ulong(100)+1; //length = 100; //length2 = 199; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif for (unsigned long i = 1; i < 5; i++) { fmpz_poly_pseudo_divrem_basecase(test_fmpz_poly4, test_fmpz_poly5, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); } fmpz_poly_pseudo_divrem_basecase(test_fmpz_poly4, test_fmpz_poly5, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_pseudo_div_basecase() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2, d; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 6000) && (result == 1) ; count1++) { bits = random_ulong(100)+ 2; bits2 = random_ulong(100)+ 1; //bits = bits2 = 1000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = random_ulong(100); length = random_ulong(100)+1; //length = 100; //length2 = 199; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif for (unsigned long i = 1; i < 5; i++) { fmpz_poly_pseudo_div_basecase(test_fmpz_poly4, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly4); } fmpz_poly_pseudo_div_basecase(test_fmpz_poly4, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_pseudo_divrem_recursive() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4, test_fmpz_poly5; int result = 1; unsigned long bits, bits2, length, length2, d; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 600) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; //bits = bits2 = 1000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length = random_ulong(300)+1; length2 = random_ulong(300); //length = 100; //length2 = 199; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif for (unsigned long i = 1; i < 5; i++) { fmpz_poly_pseudo_divrem_recursive(test_fmpz_poly4, test_fmpz_poly5, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); } fmpz_poly_pseudo_divrem_recursive(test_fmpz_poly4, test_fmpz_poly5, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_pseudo_divrem() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4, test_fmpz_poly5; int result = 1; unsigned long bits, bits2, length, length2, d; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 600) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length = random_ulong(300)+1; length2 = random_ulong(300); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif fmpz_poly_pseudo_divrem(test_fmpz_poly4, test_fmpz_poly5, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); } for (unsigned long count1 = 0; (count1 < 600) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length = random_ulong(300)+1; length2 = random_ulong(300); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly5); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif fmpz_poly_pseudo_divrem(test_fmpz_poly3, test_fmpz_poly5, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly5); } for (unsigned long count1 = 0; (count1 < 600) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length = random_ulong(300)+1; length2 = random_ulong(300); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly5); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif fmpz_poly_pseudo_divrem(test_fmpz_poly, test_fmpz_poly5, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly5); } for (unsigned long count1 = 0; (count1 < 600) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length = random_ulong(300)+1; length2 = random_ulong(300); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif fmpz_poly_pseudo_divrem(test_fmpz_poly4, test_fmpz_poly3, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } for (unsigned long count1 = 0; (count1 < 600) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length = random_ulong(300)+1; length2 = random_ulong(300); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif fmpz_poly_pseudo_divrem(test_fmpz_poly4, test_fmpz_poly, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_pseudo_div_recursive() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2, d; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 600) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; //bits = bits2 = 1000; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length = random_ulong(300)+1; length2 = random_ulong(300); //length = 100; //length2 = 199; #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif for (unsigned long i = 1; i < 5; i++) { fmpz_poly_pseudo_div_recursive(test_fmpz_poly4, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly4); } fmpz_poly_pseudo_div_recursive(test_fmpz_poly4, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_pseudo_div() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; int result = 1; unsigned long bits, bits2, length, length2, d; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); for (unsigned long count1 = 0; (count1 < 600) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length = random_ulong(300)+1; length2 = random_ulong(300); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); fmpz_poly_init(test_fmpz_poly4); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif fmpz_poly_pseudo_div(test_fmpz_poly4, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly4); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly4); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly4); } for (unsigned long count1 = 0; (count1 < 600) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length = random_ulong(300)+1; length2 = random_ulong(300); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif fmpz_poly_pseudo_div(test_fmpz_poly3, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly3); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly3); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } for (unsigned long count1 = 0; (count1 < 600) && (result == 1) ; count1++) { bits = random_ulong(20)+ 2; bits2 = random_ulong(20)+ 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits2-1)/FLINT_BITS+1); length = random_ulong(300)+1; length2 = random_ulong(300); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld, bits2 = %ld\n", length, length2, bits, bits2); #endif do { randpoly(test_poly, length, bits); fmpz_poly_fit_length(test_fmpz_poly, length); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); _fmpz_poly_normalise(test_fmpz_poly); } while (test_fmpz_poly->length == 0); do randpoly(test_poly2, length2, bits2); while (mpz_poly_length(test_poly2) < length2); fmpz_poly_fit_length(test_fmpz_poly2, length2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); #endif mpz_poly_init(test_poly4); fmpz_poly_init2(test_fmpz_poly3, length+length2-1, test_fmpz_poly->limbs+test_fmpz_poly2->limbs+1); _fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); #if DEBUG mpz_poly_print(test_poly);printf("\n\n"); mpz_poly_print(test_poly2);printf("\n\n"); mpz_poly_print(test_poly3);printf("\n\n"); #endif fmpz_poly_pseudo_div(test_fmpz_poly, &d, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_check_normalisation(test_fmpz_poly); fmpz_poly_to_mpz_poly(test_poly4, test_fmpz_poly); result = mpz_poly_equal(test_poly4, test_poly2); #if DEBUG mpz_poly_print(test_poly4);printf("\n\n"); #endif mpz_poly_clear(test_poly4); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly3); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); return result; } int test_fmpz_poly_to_ZmodF_poly() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; ZmodF_poly_t test_modF_poly; mpz_t temp; mpz_init(temp); int result = 1; unsigned long bits, length, depth; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 1; (count1 < 1000) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 2; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; depth = 0; while ((1<length; j++) gmp_printf("%Zd, ",test_poly->coeffs[j]); printf("\n\n"); #endif mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); ZmodF_poly_init(test_modF_poly, depth, (bits-1)/FLINT_BITS+1, 0); fmpz_poly_to_ZmodF_poly(test_modF_poly, test_fmpz_poly, length); ZmodF_poly_to_fmpz_poly(test_fmpz_poly2, test_modF_poly, 1); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); ZmodF_poly_clear(test_modF_poly); #if DEBUG for (unsigned j = 0; j < test_poly2->length; j++) gmp_printf("%Zd, ",test_poly2->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly, test_poly2); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_clear(temp); mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); return result; } int test_fmpz_poly_bit_pack() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; ZmodF_poly_t test_modF_poly; mpz_t temp; mpz_init(temp); int result = 1; unsigned long bits, length, depth, bundle; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 1; (count1 < 1000) && (result == 1) ; count1++) { bits = random_ulong(FLINT_BITS-2)+ 2; fmpz_poly_init2(test_fmpz_poly, 1, 1); fmpz_poly_init2(test_fmpz_poly2, 1, 10); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; bundle = length/5; if (bundle == 0) bundle = length; depth = 0; while ((1<coeffs[i])<0) // Final coeff in each bundle // must be positive mpz_neg(test_poly->coeffs[i], test_poly->coeffs[i]); if (mpz_sgn(test_poly->coeffs[i]) == 0) mpz_set_ui(test_poly->coeffs[i], 1); } if (mpz_sgn(test_poly->coeffs[length-1])<0) mpz_neg(test_poly->coeffs[length-1], test_poly->coeffs[length-1]); if (mpz_sgn(test_poly->coeffs[length-1]) == 0) mpz_set_ui(test_poly->coeffs[length-1], 1); #if DEBUG for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zx, ",test_poly->coeffs[j]); printf("\n\n"); #endif mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); ZmodF_poly_init(test_modF_poly, depth, (bits*bundle-1)/FLINT_BITS+1, 0); fmpz_poly_bit_pack(test_modF_poly, test_fmpz_poly, bundle, -bits, length, 1L); test_fmpz_poly2->length = length; for (unsigned long i = 0; i < length; i++) // Must clear coeffs in advance test_fmpz_poly2->coeffs[i*(test_fmpz_poly2->limbs+1)] = 0; fmpz_poly_bit_unpack(test_fmpz_poly2, test_modF_poly, bundle, bits); fmpz_poly_check_normalisation(test_fmpz_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); ZmodF_poly_clear(test_modF_poly); #if DEBUG for (unsigned j = 0; j < test_poly2->length; j++) gmp_printf("%Zx, ",test_poly2->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly, test_poly2); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_clear(temp); mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); return result; } int test_fmpz_poly_bit_pack_unsigned() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; ZmodF_poly_t test_modF_poly; mpz_t temp; mpz_init(temp); int result = 1; unsigned long bits, length, depth, bundle; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 1; (count1 < 1000) && (result == 1) ; count1++) { bits = random_ulong(FLINT_BITS-2)+ 2; fmpz_poly_init2(test_fmpz_poly, 1, 1); fmpz_poly_init2(test_fmpz_poly2, 1, 10); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; bundle = length/5; if (bundle == 0) bundle = length; depth = 0; while ((1<length; j++) gmp_printf("%Zx, ",test_poly->coeffs[j]); printf("\n\n"); #endif mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); ZmodF_poly_init(test_modF_poly, depth, (bits*bundle-1)/FLINT_BITS+1, 0); fmpz_poly_bit_pack(test_modF_poly, test_fmpz_poly, bundle, bits, length, 1L); test_fmpz_poly2->length = length; for (unsigned long i = 0; i < length; i++) // Must clear coeffs in advance test_fmpz_poly2->coeffs[i*(test_fmpz_poly2->limbs+1)] = 0; fmpz_poly_bit_unpack_unsigned(test_fmpz_poly2, test_modF_poly, bundle, bits); fmpz_poly_check_normalisation(test_fmpz_poly2); fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); ZmodF_poly_clear(test_modF_poly); #if DEBUG for (unsigned j = 0; j < test_poly2->length; j++) gmp_printf("%Zx, ",test_poly2->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly, test_poly2); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_clear(temp); mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); return result; } int test_fmpz_poly_limb_pack_unsigned() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; ZmodF_poly_t test_modF_poly; int result = 1; unsigned long bits, length, length2, depth, bundle, limbs; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 1; (count1 < 1000) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 1; limbs = (bits-1)/FLINT_BITS + 1; fmpz_poly_init2(test_fmpz_poly, 1, limbs); fmpz_poly_init2(test_fmpz_poly2, 1, limbs); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("%ld, %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); mpz_poly_realloc(test_poly2, length); do randpoly_unsigned(test_poly, length, bits); while (mpz_poly_length(test_poly) < length); #if DEBUG for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zx, ",test_poly->coeffs[j]); printf("\n\n"); #endif mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); ZmodF_poly_init(test_modF_poly, 0, length*limbs, 0); fmpz_poly_limb_pack(test_modF_poly, test_fmpz_poly, length, limbs); fmpz_poly_limb_unpack_unsigned(test_fmpz_poly2, test_modF_poly, length, limbs); fmpz_poly_check_normalisation(test_fmpz_poly2); test_fmpz_poly2->length = length; fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); ZmodF_poly_clear(test_modF_poly); #if DEBUG for (unsigned j = 0; j < test_poly2->length; j++) gmp_printf("%Zx, ",test_poly2->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly, test_poly2); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); return result; } int test_fmpz_poly_limb_pack() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; ZmodF_poly_t test_modF_poly; int result = 1; unsigned long bits, length, length2, depth, bundle, limbs; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 1; (count1 < 1000) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 2; limbs = (bits-1)/FLINT_BITS + 1; fmpz_poly_init2(test_fmpz_poly, 1, limbs); fmpz_poly_init2(test_fmpz_poly2, 1, limbs); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("%ld, %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); mpz_poly_realloc(test_poly2, length); do randpoly(test_poly, length, bits-1); while (mpz_poly_length(test_poly) < length); #if DEBUG for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zx, ",test_poly->coeffs[j]); printf("\n\n"); #endif mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); ZmodF_poly_init(test_modF_poly, 0, length*limbs, 0); fmpz_poly_limb_pack(test_modF_poly, test_fmpz_poly, length, limbs); fmpz_poly_limb_unpack(test_fmpz_poly2, test_modF_poly, length, limbs); fmpz_poly_check_normalisation(test_fmpz_poly2); test_fmpz_poly2->length = length; fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); ZmodF_poly_clear(test_modF_poly); #if DEBUG for (unsigned j = 0; j < test_poly2->length; j++) gmp_printf("%Zx, ",test_poly2->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly, test_poly2); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); return result; } int test_fmpz_poly_byte_pack_unsigned() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; ZmodF_poly_t test_modF_poly; mpz_t temp; mpz_init(temp); int result = 1; unsigned long bits, length, bytes; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 1; (count1 < 200) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 64; bytes = ((bits-1)>>3)+1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 1) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("%ld, %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); mpz_poly_realloc(test_poly2, length); do randpoly_unsigned(test_poly, length, bits/2); while (mpz_poly_length(test_poly) < length); #if DEBUG for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zx, ",test_poly->coeffs[j]); printf("\n\n"); #endif mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); ZmodF_poly_init(test_modF_poly, 0, ((bytes*length-1)>>FLINT_LG_BYTES_PER_LIMB)+1, 0); for (unsigned long j = 0; j < 100; j++) { fmpz_poly_byte_pack(test_modF_poly, test_fmpz_poly, length, bytes, length, 1L); test_fmpz_poly2->length = length; for (unsigned long i = 0; i < length; i++) // Must clear coeffs in advance test_fmpz_poly2->coeffs[i*(test_fmpz_poly2->limbs+1)] = 0; fmpz_poly_byte_unpack_unsigned(test_fmpz_poly2, test_modF_poly->coeffs[0], length, bytes); fmpz_poly_check_normalisation(test_fmpz_poly2); } fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); ZmodF_poly_clear(test_modF_poly); #if DEBUG for (unsigned j = 0; j < test_poly2->length; j++) gmp_printf("%Zx, ",test_poly2->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly, test_poly2); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_clear(temp); mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); return result; } int test_fmpz_poly_byte_pack() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; ZmodF_poly_t test_modF_poly; mpz_t temp; mpz_init(temp); int result = 1; unsigned long bits, length, bytes; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 1; (count1 < 5) && (result == 1) ; count1++) { bits = random_ulong(1000)+ 130; bytes = ((bits-1)>>3)+1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+1); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { length = random_ulong(1000)+1; #if DEBUG printf("%ld, %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); mpz_poly_realloc(test_poly2, length); do randpoly(test_poly, length, bits/2); while (mpz_poly_length(test_poly) < length); #if DEBUG for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zx, ",test_poly->coeffs[j]); printf("\n\n"); #endif mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); ZmodF_poly_init(test_modF_poly, 0, ((bytes*length-1)>>FLINT_LG_BYTES_PER_LIMB)+1, 0); for (unsigned long j = 0; j < 100; j++) { fmpz_poly_byte_pack(test_modF_poly, test_fmpz_poly, length, bytes, length, 1L); test_fmpz_poly2->length = length; for (unsigned long i = 0; i < length; i++) // Must clear coeffs in advance test_fmpz_poly2->coeffs[i*(test_fmpz_poly2->limbs+1)] = 0; fmpz_poly_byte_unpack(test_fmpz_poly2, test_modF_poly->coeffs[0], length, bytes); fmpz_poly_check_normalisation(test_fmpz_poly2); } fmpz_poly_to_mpz_poly(test_poly2, test_fmpz_poly2); ZmodF_poly_clear(test_modF_poly); #if DEBUG for (unsigned j = 0; j < test_poly2->length; j++) gmp_printf("%Zx, ",test_poly2->coeffs[j]); printf("\n\n"); #endif result = mpz_poly_equal(test_poly, test_poly2); if (!result) { for (unsigned j = 0; j < test_poly->length; j++) gmp_printf("%Zx, ",test_poly->coeffs[j]); printf("\n\n"); for (unsigned j = 0; j < test_poly2->length; j++) gmp_printf("%Zx, ",test_poly2->coeffs[j]); printf("\n\n"); } } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_clear(temp); mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); return result; } int test_fmpz_poly_content() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, bits2, limbs2, length; mpz_t temp, x_mpz; mpz_init(temp); mpz_init(x_mpz); mp_limb_t * x; mpz_poly_init(test_poly); for (unsigned long count1 = 0; (count1 < 1000) && (result == 1) ; count1++) { bits = randint(1000) + 1; bits2 = randint(1000) + 1; limbs2 = (bits2-1)/FLINT_BITS+1; bits2 = limbs2*FLINT_BITS; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_fmpz_poly2, 1, (bits-1)/FLINT_BITS+(bits2-1)/FLINT_BITS+2); x = (mp_limb_t*) malloc(sizeof(mp_limb_t)*(limbs2+1)); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = randint(100)+1; #if DEBUG printf("length = %ld, bits = %ld, bits2 = %ld\n",length, bits, bits2); #endif fmpz_poly_fit_length(test_fmpz_poly, length); fmpz_poly_fit_length(test_fmpz_poly2, length); F_mpn_clear(x, limbs2+1); mpn_random2(x+1, limbs2); if (randint(2)) x[0] = limbs2; else x[0] = -limbs2; fmpz_t c = fmpz_init((bits-1)/FLINT_BITS+1); do { randpoly(test_poly, length, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_poly_content(c, test_fmpz_poly); } while(!fmpz_is_one(c) && (test_fmpz_poly->length != 1)); fmpz_clear(c); _fmpz_poly_scalar_mul_fmpz(test_fmpz_poly2, test_fmpz_poly, x); c = fmpz_init((bits+bits2-1)/FLINT_BITS+1); fmpz_poly_content(c, test_fmpz_poly2); if ((long)x[0] < 0L) x[0] = -x[0]; result = (fmpz_equal(c, x) || (test_fmpz_poly->length == 1)); #if DEBUG2 if (!result) { fmpz_print(c); printf("\n"); fmpz_print(x); printf("\n"); fmpz_poly_print_pretty(test_fmpz_poly, "x"); printf("\n"); } #endif fmpz_clear(c); } free(x); fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); mpz_clear(temp); mpz_clear(x_mpz); return result; } int test_fmpz_poly_gcd_subresultant() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4; ZmodF_poly_t test_modF_poly; int result = 1; unsigned long bits, bits2, bits3, length, length2, length3; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); mpz_poly_init(test_poly4); for (unsigned long count1 = 0; (count1 < 3000) && (result == 1); count1++) { bits = random_ulong(1000) + 1; bits2 = random_ulong(1000) + 1; bits3 = random_ulong(1000) + 1; length2 = random_ulong(10)+1; length = random_ulong(10)+1; length3 = random_ulong(10); fmpz_poly_init(test_fmpz_poly); fmpz_poly_init(test_fmpz_poly2); fmpz_poly_init(test_fmpz_poly3); #if DEBUG printf("%ld, %ld, %ld, %ld\n", length, length2, bits, bits2); #endif mpz_poly_realloc(test_poly, length); mpz_poly_realloc(test_poly2, length2); mpz_poly_realloc(test_poly3, length3); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { do { randpoly(test_poly, length, bits); randpoly(test_poly2, length2, bits2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_poly_gcd_subresultant(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); } while ((test_fmpz_poly3->length != 1) || (test_fmpz_poly3->coeffs[0] != 1L) || (test_fmpz_poly3->coeffs[1] != 1L)); randpoly(test_poly3, length3, bits3); mpz_poly_to_fmpz_poly(test_fmpz_poly3, test_poly3); fmpz_poly_mul(test_fmpz_poly, test_fmpz_poly, test_fmpz_poly3); fmpz_poly_mul(test_fmpz_poly2, test_fmpz_poly2, test_fmpz_poly3); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_gcd_subresultant(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2); #if DEBUG printf("GCD = "); fmpz_poly_print_pretty(test_fmpz_poly3, "x"); printf("\n\n"); #endif if (test_fmpz_poly3->length) if (fmpz_sgn(_fmpz_poly_lead(test_fmpz_poly4)) != fmpz_sgn(_fmpz_poly_lead(test_fmpz_poly3))) fmpz_poly_neg(test_fmpz_poly4, test_fmpz_poly4); result = fmpz_poly_equal(test_fmpz_poly3, test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly4); #if DEBUG if (!result) { } #endif } fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); mpz_poly_clear(test_poly4); return result; } int test_fmpz_poly_gcd_modular() { mpz_poly_t test_poly, test_poly2, test_poly3, test_poly4; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4, test_fmpz_poly5; ZmodF_poly_t test_modF_poly; int result = 1; unsigned long bits, bits2, bits3, length, length2, length3; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); mpz_poly_init(test_poly3); mpz_poly_init(test_poly4); for (unsigned long count1 = 0; (count1 < 3000) && (result == 1); count1++) { bits = random_ulong(100) + 1; bits2 = random_ulong(100) + 1; bits3 = random_ulong(100) + 1; length2 = random_ulong(30); length = random_ulong(30); length3 = random_ulong(10); fmpz_poly_init(test_fmpz_poly); fmpz_poly_init(test_fmpz_poly2); fmpz_poly_init(test_fmpz_poly3); #if DEBUG printf("%ld, %ld, %ld, %ld\n", length, length2, bits, bits2); #endif mpz_poly_realloc(test_poly, length); mpz_poly_realloc(test_poly2, length2); mpz_poly_realloc(test_poly3, length3); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { randpoly(test_poly, length, bits); randpoly(test_poly2, length2, bits2); randpoly(test_poly3, length3, bits3); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly3, test_poly3); fmpz_poly_mul(test_fmpz_poly, test_fmpz_poly, test_fmpz_poly3); fmpz_poly_mul(test_fmpz_poly2, test_fmpz_poly2, test_fmpz_poly3); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); fmpz_poly_gcd_subresultant(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_gcd_modular(test_fmpz_poly5, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_check_normalisation(test_fmpz_poly5); result = fmpz_poly_equal(test_fmpz_poly4, test_fmpz_poly5); #if DEBUG if (!result) { printf("GCD = "); fmpz_poly_print(test_fmpz_poly4); printf("\n\n"); printf("GCD2 = "); fmpz_poly_print(test_fmpz_poly5); printf("\n\n"); } #endif fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); } fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); mpz_poly_clear(test_poly3); mpz_poly_clear(test_poly4); return result; } int test_fmpz_poly_CRT_unsigned() { mpz_poly_t pol1; fmpz_poly_t fpol1, fpol2; zmod_poly_t zpol; unsigned long bits, length; int result = 1; mpz_poly_init(pol1); fmpz_poly_init(fpol1); fmpz_poly_init(fpol2); for (unsigned long i = 0; (i < 4000) && (result == 1); i++) { bits = random_ulong(1000)+1; length = random_ulong(100)+1; randpoly_unsigned(pol1, length, bits); mpz_poly_to_fmpz_poly(fpol1, pol1); #if DEBUG printf("bits = %ld, length = %ld\n", bits, length); #endif unsigned long * primes = flint_stack_alloc((long) FLINT_MAX(bits-1, 0)/(FLINT_BITS-2)+1); unsigned long num_primes = 0; fmpz_t modulus = fmpz_init((long) FLINT_MAX(bits-1, 0)/FLINT_BITS+2); fmpz_t new_modulus = fmpz_init((long) FLINT_MAX(bits-1, 0)/FLINT_BITS+2); primes[0] = z_nextprime(1L<<(FLINT_BITS-2)); fmpz_set_ui(modulus, primes[0]); while (fmpz_bits(modulus) <= bits) { primes[num_primes+1] = z_nextprime(primes[num_primes]); fmpz_mul_ui(modulus, modulus, primes[num_primes+1]); num_primes++; } num_primes++; zmod_poly_init(zpol, primes[0]); fmpz_poly_to_zmod_poly(zpol, fpol1); zmod_poly_to_fmpz_poly_unsigned(fpol2, zpol); fmpz_set_ui(modulus, primes[0]); unsigned long c, r2; double pre; for (unsigned long i = 1; i < num_primes; i++) { zmod_poly_clear(zpol); zmod_poly_init(zpol, primes[i]); fmpz_poly_to_zmod_poly(zpol, fpol1); fmpz_poly_CRT_unsigned(fpol2, fpol2, zpol, new_modulus, modulus); fmpz_set(modulus, new_modulus); } result = (fmpz_poly_equal(fpol1, fpol2)); #if DEBUG if (!result) { fmpz_poly_print(fpol1); printf("\n\n"); fmpz_poly_print(fpol2); printf("\n\n"); } #endif zmod_poly_clear(zpol); fmpz_clear(modulus); fmpz_clear(new_modulus); flint_stack_release(); } mpz_poly_clear(pol1); fmpz_poly_clear(fpol1); fmpz_poly_clear(fpol2); return result; } int test_fmpz_poly_CRT() { mpz_poly_t pol1; fmpz_poly_t fpol1, fpol2; zmod_poly_t zpol; unsigned long bits, length; int result = 1; mpz_poly_init(pol1); fmpz_poly_init(fpol1); fmpz_poly_init(fpol2); for (unsigned long i = 0; (i < 4000) && (result == 1); i++) { bits = random_ulong(1000)+1; length = random_ulong(100)+1; randpoly(pol1, length, bits); mpz_poly_to_fmpz_poly(fpol1, pol1); #if DEBUG printf("bits = %ld, length = %ld\n", bits, length); #endif unsigned long * primes = flint_stack_alloc((long) FLINT_MAX(bits, 0)/(FLINT_BITS-2)+1); unsigned long num_primes = 0; fmpz_t modulus = fmpz_init((long) FLINT_MAX(bits, 0)/FLINT_BITS+2); fmpz_t new_modulus = fmpz_init((long) FLINT_MAX(bits, 0)/FLINT_BITS+2); primes[0] = z_nextprime(1L<<(FLINT_BITS-2)); fmpz_set_ui(modulus, primes[0]); while (fmpz_bits(modulus) <= bits + 1) { primes[num_primes+1] = z_nextprime(primes[num_primes]); fmpz_mul_ui(modulus, modulus, primes[num_primes+1]); num_primes++; } num_primes++; zmod_poly_init(zpol, primes[0]); fmpz_poly_to_zmod_poly(zpol, fpol1); zmod_poly_to_fmpz_poly(fpol2, zpol); fmpz_set_ui(modulus, primes[0]); unsigned long c, r2; double pre; for (unsigned long i = 1; i < num_primes; i++) { zmod_poly_clear(zpol); zmod_poly_init(zpol, primes[i]); fmpz_poly_to_zmod_poly(zpol, fpol1); fmpz_poly_CRT(fpol2, fpol2, zpol, new_modulus, modulus); fmpz_set(modulus, new_modulus); } result = (fmpz_poly_equal(fpol1, fpol2)); #if DEBUG for (unsigned long i = 0; i < fpol1->length; i++) { fmpz_t c1 = fmpz_poly_get_coeff_ptr(fpol1, i); fmpz_t c2 = fmpz_poly_get_coeff_ptr(fpol2, i); if (!fmpz_equal(c1, c2)) { fmpz_print(c1); printf("\n\n"); fmpz_print(c2); printf("\n\n"); } } /*if (!result) { fmpz_poly_print(fpol1); printf("\n\n"); fmpz_poly_print(fpol2); printf("\n\n"); }*/ #endif zmod_poly_clear(zpol); fmpz_clear(modulus); fmpz_clear(new_modulus); flint_stack_release(); } mpz_poly_clear(pol1); fmpz_poly_clear(fpol1); fmpz_poly_clear(fpol2); return result; } int test_fmpz_poly_invmod_modular() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4, test_fmpz_poly5; int result = 1; unsigned long bits, bits2, length, length2, length3; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 0; (count1 < 500) && (result == 1); count1++) { bits = random_ulong(100) + 1; bits2 = random_ulong(100) + 1; length2 = random_ulong(30) + 2; fmpz_poly_init(test_fmpz_poly); fmpz_poly_init(test_fmpz_poly2); fmpz_poly_init(test_fmpz_poly3); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); mpz_poly_realloc(test_poly, length); mpz_poly_realloc(test_poly2, length2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { fmpz_t c = fmpz_init(bits/FLINT_BITS + 1); do { do { randpoly(test_poly2, length2, bits2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); } while ((test_fmpz_poly2->length == 0) || (test_fmpz_poly2->length == 1)); randpoly(test_poly, randint(test_fmpz_poly2->length-1)+1, bits); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_set_ui(_fmpz_poly_lead(test_fmpz_poly2), 1UL); fmpz_poly_gcd(test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_content(c, test_fmpz_poly); } while ((test_fmpz_poly->length == 0) || (test_fmpz_poly4->length != 1) || (!fmpz_is_one(c) && (test_fmpz_poly->length == 1))); fmpz_clear(c); #if DEBUG printf("length1 = %ld, length2 = %ld, bits1 = %ld, bits2 = %ld\n", test_fmpz_poly->length, test_fmpz_poly2->length, bits, bits2); #endif fmpz_t d = fmpz_init(fmpz_poly_resultant_bound(test_fmpz_poly, test_fmpz_poly2)/FLINT_BITS+2); fmpz_poly_invmod_modular(d, test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2); fmpz_poly_mul(test_fmpz_poly5, test_fmpz_poly4, test_fmpz_poly); fmpz_poly_divrem(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly5, test_fmpz_poly2); result = ((test_fmpz_poly->length == 1) && (fmpz_equal(d, test_fmpz_poly->coeffs))); fmpz_clear(d); #if DEBUG if (!result) { printf("Inverse = "); fmpz_poly_print(test_fmpz_poly4); printf("\n\n"); printf("Prod. mod = "); fmpz_poly_print(test_fmpz_poly); printf("\n\n"); } #endif } fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly5); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); return result; } int test_fmpz_poly_2norm() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly; mpz_t temp1, temp2; mpz_init(temp1); mpz_init(temp2); int result = 1; unsigned long bits, length; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 1; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(1000) + 1; for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(20); #if DEBUG printf("%ld, %ld\n",length, bits); #endif randpoly(test_poly, length, bits); #if DEBUG mpz_poly_print_pretty(test_poly, "x"); printf("\n\n"); #endif fmpz_poly_init(test_fmpz_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_t norm = fmpz_init(test_fmpz_poly->limbs+1); fmpz_poly_2norm(norm, test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly); fmpz_to_mpz(temp2, norm); fmpz_clear(norm); mpz_poly_2norm(temp1, test_poly); result = (mpz_cmp(temp1, temp2) == 0); #if DEBUG if (!result) { gmp_printf("%Zd, %Zd\n", temp1, temp2); } #endif } } mpz_clear(temp1); mpz_clear(temp2); mpz_poly_clear(test_poly); return result; } int test_fmpz_poly_resultant() { int result = 1; fmpz_poly_t pol1, pol2, lin; unsigned long bits, limbs, size; for (unsigned long count1 = 0; (count1 < 50) && (result == 1); count1++) { bits = randint(16)+2; limbs = bits/FLINT_BITS + 1; size = limbs + 1; fmpz_poly_init(pol1); fmpz_poly_init(pol2); fmpz_poly_init(lin); unsigned long r1 = randint(bits); unsigned long r2 = randint(bits); mpz_t * roots1 = flint_stack_alloc(sizeof(mpz_t)*r1); mpz_t * roots2 = flint_stack_alloc(sizeof(mpz_t)*r2); for (unsigned long i = 0; i < r1; i++) { mpz_init(roots1[i]); } for (unsigned long i = 0; i < r2; i++) { mpz_init(roots2[i]); } for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { #if DEBUG printf("r1 = %ld, r2 = %ld, bits = %ld\n", r1, r2, bits); #endif int exists; for (unsigned long i = 0; i < r1; ) { exists = 0; mpz_rrandomb(roots1[i], randstate, bits); for (unsigned long j = 0; j < i; j++) if (mpz_cmp(roots1[j], roots1[i]) == 0) exists = 1; if (!exists) i++; } for (unsigned long i = 0; i < r2; ) { exists = 0; mpz_rrandomb(roots2[i], randstate, bits); for (unsigned long j = 0; j < i; j++) if (mpz_cmp(roots2[j], roots2[i]) == 0) exists = 1; if (!exists) i++; } fmpz_poly_set_coeff_ui(pol1, 0, 1L); pol1->length = 1; fmpz_poly_set_coeff_ui(pol2, 0, 1L); pol2->length = 1; fmpz_poly_set_coeff_ui(lin, 1, 1L); lin->length = 2; for (unsigned long i = 0; i < r1; i++) { mpz_neg(roots1[i], roots1[i]); fmpz_poly_set_coeff_mpz(lin, 0, roots1[i]); mpz_neg(roots1[i], roots1[i]); fmpz_poly_mul(pol1, pol1, lin); } for (unsigned long i = 0; i < r2; i++) { mpz_neg(roots2[i], roots2[i]); fmpz_poly_set_coeff_mpz(lin, 0, roots2[i]); mpz_neg(roots2[i], roots2[i]); fmpz_poly_mul(pol2, pol2, lin); } mpz_t diff; mpz_t res1; mpz_t res2; mpz_init(diff); mpz_init(res1); mpz_init(res2); mpz_set_ui(res1, 1L); for (unsigned long i = 0; i < r1; i++) { for (unsigned long j = 0; j < r2; j++) { mpz_sub(diff, roots1[i], roots2[j]); mpz_mul(res1, res1, diff); } } unsigned long bound = fmpz_poly_resultant_bound(pol1, pol2)+2; fmpz_t res = fmpz_init(bound/FLINT_BITS + 2); fmpz_poly_resultant(res, pol1, pol2); fmpz_to_mpz(res2, res); result = (mpz_cmp(res1, res2) == 0); #if DEBUG if (!result) { gmp_printf("res1 = %Zd, res2 = %Zd\n", res1, res2); fmpz_poly_print(pol1); printf("\n\n"); fmpz_poly_print(pol2); printf("\n\n"); for (unsigned long i = 0; i < r1; i++) gmp_printf("%Zd, ", roots1[i]); printf("\n"); for (unsigned long i = 0; i < r2; i++) gmp_printf("%Zd, ", roots2[i]); printf("\n"); } #endif mpz_clear(diff); mpz_clear(res1); mpz_clear(res2); } for (unsigned long i = 0; i < r1; i++) { mpz_clear(roots1[i]); } for (unsigned long i = 0; i < r2; i++) { mpz_clear(roots2[i]); } flint_stack_release(); flint_stack_release(); fmpz_poly_clear(lin); fmpz_poly_clear(pol1); fmpz_poly_clear(pol2); } return result; } int test_fmpz_poly_xgcd_modular() { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2, test_fmpz_poly3, test_fmpz_poly4, test_fmpz_poly5; int result = 1; unsigned long bits, bits2, length, length2; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); for (unsigned long count1 = 0; (count1 < 1000) && (result == 1); count1++) { bits = random_ulong(100) + 1; bits2 = random_ulong(100) + 1; length2 = random_ulong(30)+1; length = random_ulong(30)+1; fmpz_poly_init(test_fmpz_poly); fmpz_poly_init(test_fmpz_poly2); fmpz_poly_init(test_fmpz_poly3); fmpz_poly_init(test_fmpz_poly4); fmpz_poly_init(test_fmpz_poly5); #if DEBUG printf("%ld, %ld, %ld, %ld\n", length, length2, bits, bits2); #endif mpz_poly_realloc(test_poly, length); mpz_poly_realloc(test_poly2, length2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { do { randpoly(test_poly, length, bits); randpoly(test_poly2, length2, bits2); mpz_poly_to_fmpz_poly(test_fmpz_poly2, test_poly2); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_poly_primitive_part(test_fmpz_poly, test_fmpz_poly); fmpz_poly_primitive_part(test_fmpz_poly2, test_fmpz_poly2); fmpz_poly_gcd(test_fmpz_poly3, test_fmpz_poly, test_fmpz_poly2); } while (test_fmpz_poly3->length > 1); unsigned long bound = fmpz_poly_resultant_bound(test_fmpz_poly, test_fmpz_poly2)+2; fmpz_t res = fmpz_init(bound/FLINT_BITS+2); fmpz_poly_xgcd_modular(res, test_fmpz_poly3, test_fmpz_poly4, test_fmpz_poly, test_fmpz_poly2); if (res[0] != 0L) { fmpz_poly_mul(test_fmpz_poly3, test_fmpz_poly3, test_fmpz_poly); fmpz_poly_mul(test_fmpz_poly4, test_fmpz_poly4, test_fmpz_poly2); fmpz_poly_add(test_fmpz_poly5, test_fmpz_poly3, test_fmpz_poly4); result = ((test_fmpz_poly5->length == 1) && (fmpz_equal(test_fmpz_poly5->coeffs, res))); } #if DEBUG if (!result) { printf("Resultant = "); fmpz_poly_print(test_fmpz_poly5); printf("\n\n"); printf("Resultant = "); fmpz_print(res); printf("\n\n"); } #endif fmpz_clear(res); } fmpz_poly_clear(test_fmpz_poly5); fmpz_poly_clear(test_fmpz_poly4); fmpz_poly_clear(test_fmpz_poly3); fmpz_poly_clear(test_fmpz_poly2); fmpz_poly_clear(test_fmpz_poly); } mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); return result; } void fmpz_poly_test_all() { int success, all_success = 1; printf("FLINT_BITS = %ld\n", FLINT_BITS); #if TESTFILE RUN_TEST(fmpz_poly_freadprint); #endif RUN_TEST(fmpz_poly_tofromstring); RUN_TEST(fmpz_poly_to_ZmodF_poly); RUN_TEST(fmpz_poly_bit_pack); RUN_TEST(fmpz_poly_bit_pack_unsigned); RUN_TEST(fmpz_poly_byte_pack_unsigned); RUN_TEST(fmpz_poly_byte_pack); RUN_TEST(fmpz_poly_limb_pack_unsigned); RUN_TEST(fmpz_poly_limb_pack); RUN_TEST(_fmpz_poly_attach); RUN_TEST(_fmpz_poly_attach_shift); RUN_TEST(_fmpz_poly_attach_truncate); RUN_TEST(_fmpz_poly_truncate); RUN_TEST(_fmpz_poly_max_bits); RUN_TEST(_fmpz_poly_max_bits1); RUN_TEST(_fmpz_poly_max_limbs); RUN_TEST(_fmpz_poly_convert); RUN_TEST(_fmpz_poly_getset_ui); RUN_TEST(fmpz_poly_getset_ui); RUN_TEST(_fmpz_poly_getset_si); RUN_TEST(fmpz_poly_getset_si); RUN_TEST(_fmpz_poly_get_coeff_ptr); RUN_TEST(fmpz_poly_get_coeff_ptr); RUN_TEST(_fmpz_poly_normalise); RUN_TEST(_fmpz_poly_getset_coeff); RUN_TEST(fmpz_poly_getset_coeff); RUN_TEST(_fmpz_poly_getset_coeff_fmpz); RUN_TEST(fmpz_poly_getset_coeff_fmpz); RUN_TEST(_fmpz_poly_getset_coeff_mpz); RUN_TEST(fmpz_poly_getset_coeff_mpz); RUN_TEST(fmpz_poly_get_coeff_mpz_read_only); RUN_TEST(_fmpz_poly_setequal); RUN_TEST(_fmpz_poly_zero_coeffs); RUN_TEST(fmpz_poly_zero_coeffs); RUN_TEST(fmpz_poly_swap); RUN_TEST(_fmpz_poly_reverse); RUN_TEST(_fmpz_poly_neg); RUN_TEST(_fmpz_poly_shift); RUN_TEST(_fmpz_poly_add); RUN_TEST(fmpz_poly_add); RUN_TEST(_fmpz_poly_sub); RUN_TEST(fmpz_poly_sub); RUN_TEST(_fmpz_poly_scalar_mul_ui); RUN_TEST(fmpz_poly_scalar_mul_ui); RUN_TEST(_fmpz_poly_scalar_mul_si); RUN_TEST(fmpz_poly_scalar_mul_si); RUN_TEST(_fmpz_poly_scalar_mul_fmpz); RUN_TEST(fmpz_poly_scalar_mul_fmpz); RUN_TEST(fmpz_poly_scalar_mul_mpz); RUN_TEST(_fmpz_poly_scalar_div_exact_ui); RUN_TEST(_fmpz_poly_scalar_div_exact_si); RUN_TEST(_fmpz_poly_scalar_div_ui); RUN_TEST(_fmpz_poly_scalar_tdiv_ui); RUN_TEST(_fmpz_poly_scalar_div_si); RUN_TEST(_fmpz_poly_scalar_tdiv_si); RUN_TEST(_fmpz_poly_scalar_div_fmpz); RUN_TEST(fmpz_poly_scalar_div_fmpz); RUN_TEST(fmpz_poly_scalar_div_mpz); RUN_TEST(_fmpz_poly_mul_classical); RUN_TEST(_fmpz_poly_mul_classical_trunc); RUN_TEST(_fmpz_poly_mul_classical_trunc_left); RUN_TEST(_fmpz_poly_mul_karatsuba); RUN_TEST(_fmpz_poly_mul_karatsuba_trunc); RUN_TEST(_fmpz_poly_mul_karatsuba_trunc_left); RUN_TEST(_fmpz_poly_mul_KS); RUN_TEST(_fmpz_poly_mul_KS_trunc); RUN_TEST(_fmpz_poly_mul_SS); RUN_TEST(_fmpz_poly_mul_SS_trunc); RUN_TEST(_fmpz_poly_mul); RUN_TEST(fmpz_poly_mul); RUN_TEST(_fmpz_poly_mul_trunc_n); RUN_TEST(fmpz_poly_mul_trunc_n); RUN_TEST(_fmpz_poly_mul_trunc_left_n); RUN_TEST(fmpz_poly_mul_trunc_left_n); RUN_TEST(fmpz_poly_div_classical); RUN_TEST(fmpz_poly_divrem_classical); RUN_TEST(fmpz_poly_div_divconquer_recursive); RUN_TEST(fmpz_poly_divrem_divconquer); RUN_TEST(fmpz_poly_div_divconquer); RUN_TEST(fmpz_poly_newton_invert_basecase); RUN_TEST(fmpz_poly_newton_invert); RUN_TEST(fmpz_poly_div_series); RUN_TEST(fmpz_poly_div_newton); RUN_TEST(fmpz_poly_div_mulders); RUN_TEST(fmpz_poly_divrem); RUN_TEST(fmpz_poly_div); RUN_TEST(fmpz_poly_pseudo_divrem_recursive); RUN_TEST(fmpz_poly_pseudo_divrem_basecase); RUN_TEST(fmpz_poly_pseudo_div_basecase); RUN_TEST(fmpz_poly_pseudo_div_recursive); RUN_TEST(fmpz_poly_pseudo_divrem_cohen); RUN_TEST(fmpz_poly_pseudo_divrem_shoup); RUN_TEST(fmpz_poly_pseudo_divrem); RUN_TEST(fmpz_poly_pseudo_div); RUN_TEST(fmpz_poly_power); RUN_TEST(fmpz_poly_power_trunc_n); RUN_TEST(fmpz_poly_content); RUN_TEST(fmpz_poly_CRT_unsigned); RUN_TEST(fmpz_poly_CRT); RUN_TEST(fmpz_poly_gcd_subresultant); RUN_TEST(fmpz_poly_gcd_modular); RUN_TEST(fmpz_poly_resultant); RUN_TEST(fmpz_poly_2norm); RUN_TEST(fmpz_poly_invmod_modular); RUN_TEST(fmpz_poly_xgcd_modular); printf(all_success ? "\nAll tests passed\n" : "\nAt least one test FAILED!\n"); } int main() { test_support_init(); fmpz_poly_test_all(); test_support_cleanup(); flint_stack_cleanup(); return 0; } flint-1.011/ZmodF.c0000644017361200017500000004026411025357254013714 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** ZmodF.c Copyright (C) 2007, David Harvey Routines for arithmetic on elements of Z/pZ where p = B^n + 1, B = 2^FLINT_BITS. These are currently used only in the ZmodF_poly module, which supplies the Schoenhage-Strassen FFT code. ******************************************************************************/ #include "ZmodF.h" #include "longlong_wrapper.h" #include "longlong.h" /* For odd s, finds "limbs" and "bits" such that 2^(s/2) is decomposed into 2^(-bits) * B^limbs * (1 - B^(n/2)), where 0 <= bits < FLINT_BITS, and 0 <= limbs < 2n. i.e. we are decomposing a rotation involving a sqrt2 into a fractional limbshift and a pseudosqrt2 call. PRECONDIITONS: s must be odd 0 <= s < 2n*FLINT_BITS */ void ZmodF_decompose_rotation(unsigned long* limbs, unsigned long* bits, unsigned long s, unsigned long n) { FLINT_ASSERT(s & 1); FLINT_ASSERT(s < 2*n*FLINT_BITS); // first split into 2^r * (1 - B^(n/2)) unsigned long r = (s >> 1) - 3*n*FLINT_BITS/4; if ((long)r < 0) r += 2*n*FLINT_BITS; // now split 2^r into 2^(-bits) and B^limbs unsigned long z = r & (FLINT_BITS - 1); r /= FLINT_BITS; if (z) { *bits = FLINT_BITS - z; if (++r == 2*n) r = 0; } else *bits = 0; *limbs = r; } void ZmodF_normalise(ZmodF_t a, unsigned long n) { mp_limb_t hi = a[n]; if ((mp_limb_signed_t) hi < 0) { // If top limb (hi) is negative, we add -hi multiples of p a[n] = 0; mpn_add_1(a, a, n + 1, -hi); // If the result is >= p (very unlikely)... if (a[n] && a[0]) { // ... need to subtract off p. a[n] = 0; a[0]--; } } else { // If top limb (hi) is non-negative, we subtract hi multiples of p a[n] = 0; mpn_sub_1(a, a, n + 1, hi); // If the result is negative (very unlikely)... if (a[n]) { // ... need to add back p. a[n] = 0; mpn_add_1(a, a, n + 1, 1); } } } void ZmodF_mul_2exp(ZmodF_t b, ZmodF_t a, unsigned long s, unsigned long n) { FLINT_ASSERT(s < n*FLINT_BITS); FLINT_ASSERT(a != b); unsigned long bits = s & (FLINT_BITS - 1); s /= FLINT_BITS; if (bits) { if (++s == n) { // special case if s == n-1 ZmodF_neg(b, a, n); ZmodF_short_div_2exp(b, b, FLINT_BITS - bits, n); return; } // Need to shift left by s limbs and right by // (FLINT_BITS - bits) bits. bits = FLINT_BITS - bits; // Shift top part of input directly into bottom part of output ZmodF_fast_reduce(a, n); mp_limb_t carry1 = mpn_rshift(b, a+n-s, s+1, bits); mp_limb_t overlap = b[s]; // complement the part we just shifted in long i = s-1; do b[i] = ~b[i]; while (--i >= 0); // shift bottom part of input directly into top part of output mp_limb_t carry2 = mpn_rshift(b+s, a, n-s, bits); b[n] = -1; // compensate mod p for 1's complement // fiddle with carries mpn_add_1(b+n-1, b+n-1, 2, carry1); mpn_add_1(b+s-1, b+s-1, n-s+2, carry2); mpn_sub_1(b+s, b+s, n-s+1, overlap+1); } else { if (s) ZmodF_mul_Bexp(b, a, s, n); else ZmodF_set(b, a, n); } } void ZmodF_mul_sqrt2exp(ZmodF_t b, ZmodF_t a, unsigned long s, unsigned long n) { FLINT_ASSERT(s < 2*n*FLINT_BITS); FLINT_ASSERT(a != b); if (s & 1) { unsigned long limbs, bits; ZmodF_decompose_rotation(&limbs, &bits, s, n); if (n & 1) ZmodF_mul_pseudosqrt2_n_odd(b, a, limbs, n); else ZmodF_mul_pseudosqrt2_n_even(b, a, limbs, n); if (bits) ZmodF_short_div_2exp(b, b, bits, n); } else ZmodF_mul_2exp(b, a, s >> 1, n); } void ZmodF_sub_mul_2exp(ZmodF_t c, ZmodF_t a, ZmodF_t b, unsigned long s, unsigned long n) { FLINT_ASSERT(s < n*FLINT_BITS); FLINT_ASSERT(c != a); FLINT_ASSERT(c != b); unsigned long bits = s & (FLINT_BITS - 1); s /= FLINT_BITS; if (bits) { // shift a-b left by s+1 limbs... if (++s == n) ZmodF_sub(c, b, a, n); else ZmodF_sub_mul_Bexp(c, a, b, s, n); // ... and then shift right by remaining bits ZmodF_short_div_2exp(c, c, FLINT_BITS - bits, n); } else { if (s) ZmodF_sub_mul_Bexp(c, a, b, s, n); else ZmodF_sub(c, a, b, n); } } void ZmodF_mul_pseudosqrt2_n_odd(ZmodF_t b, ZmodF_t a, unsigned long s, unsigned long n) { FLINT_ASSERT(a != b); FLINT_ASSERT((n & 1) == 1); FLINT_ASSERT(s < 2*n); // Let ss = s+(n+1)/2 mod n, in the range (0, n]. unsigned long ss = s + (n+1)/2; if (ss > n) ss -= n; if (ss > n) ss -= n; // The next block of code has the following effect. // Pretend that the input is normalised to be divisible by B^(1/2) // (i.e. imagine that the bottom half-limb has been relocated mod p to the // overflow limb). Now write the input as // a = (X + Y*B^(n-ss) + Z*B^n) * B^(1/2), // where X is exactly n-ss limbs long, Y is exactly ss limbs long, // and where Z is a signed quantity, just a few bits long. // This block computes Z, and sets b to Y + X*B^ss. // (It doesn't set the overflow limb of b to anything meaningful.) mp_limb_signed_t Z; ZmodF_fast_reduce(a, n); mpn_rshift(b, a+n-ss, ss+1, FLINT_BITS/2); mp_limb_t underflow = mpn_rshift(b+ss, a, n-ss+1, FLINT_BITS/2); sub_ddmmss(Z, b[ss-1], 0, b[ss-1], 0, underflow); mp_limb_t carry1, carry2; // Now we need to add in B^s*a, taking into account the fact that some of // b currently has the wrong sign. We split into various cases depending // on relative locations of s and ss, and depending on sign issues. if (s <= n) { if (s <= (n-1)/2) { carry1 = s ? -mpn_sub_n(b, b, a+n-s, s) : 0; carry2 = mpn_add_n(b+s, b+s, a, (n+1)/2); b[n] = (ss < n) ? -mpn_sub_n(b+ss, a+(n+1)/2, b+ss, n-ss) : 0; signed_add_1(b+s, n-s+1, carry1 - a[n]); signed_add_1(b+ss, n-ss+1, Z + carry2); } else { carry1 = mpn_add_n(b, b, a+n-s, ss); long i = ss-1; do b[i] = ~b[i]; while (--i >= 0); carry2 = (n > 1) ? mpn_sub_n(b+ss, b+ss, a+(n+1)/2, (n-1)/2) : 0; b[n] = (s < n) ? mpn_add_n(b+s, b+s, a, n-s) - 1 : -1; signed_add_1(b+ss, n-ss+1, -carry1 - Z - 1); signed_add_1(b+s, n-s+1, -carry2 - a[n]); } } else { s -= n; if (s <= (n-1)/2) { carry1 = s ? -mpn_sub_n(b, a+n-s, b, s) : 0; carry2 = mpn_add_n(b+s, b+s, a, (n+1)/2); long i = ss-1; do b[i] = ~b[i]; while (--i >= s); b[n] = (ss < n) ? -mpn_sub_n(b+ss, b+ss, a+(n+1)/2, n-ss) : 0; signed_add_1(b+s, n-s+1, carry1 + a[n] + 1); signed_add_1(b+ss, n-ss+1, -Z - carry2 - 1); } else { carry1 = mpn_add_n(b, b, a+n-s, ss); carry2 = (n > 1) ? mpn_sub_n(b+ss, a+(n+1)/2, b+ss, (n-1)/2) : 0; b[n] = mpn_add_n(b+s, b+s, a, n-s); long i = n; do b[i] = ~b[i]; while (--i >= s); signed_add_1(b+ss, n-ss+1, carry1 + Z); signed_add_1(b+s, n-s+1, -carry2 + a[n] + 1); } } } void ZmodF_mul_pseudosqrt2_n_even(ZmodF_t b, ZmodF_t a, unsigned long s, unsigned long n) { FLINT_ASSERT(a != b); FLINT_ASSERT((n & 1) == 0); FLINT_ASSERT(s < 2*n); mp_limb_t carry; if (s < n) { if (s <= n/2) { // We're computing B^s * (1 - B^(n/2)) * a. // If input is // 0 n/2-s n/2 n-s n // | x0 | y0 | x1 | y1 | // then output should be // 0 s n/2 n/2+s n // | y0-y1 | x0+x1 | y0+y1 | -x0+x1 | // Store x1 - x0 b[n] = (s < n/2) ? -mpn_sub_n(b+n/2+s, a+n/2, a, n/2-s) : 0; // Store x0 + x1 and y0 + y1 carry = mpn_add_n(b+s, a, a+n/2, n/2); signed_add_1(b+s+n/2, n/2-s+1, carry + a[n]); // Store y0 - y1 carry = s ? mpn_sub_n(b, a+n/2-s, a+n-s, s) : 0; signed_add_1(b+s, n-s+1, -a[n] - carry); } else { s -= n/2; // We're computing B^s * (1 + B^(n/2)) * a. // If input is // 0 n/2-s n/2 n-s n // | x0 | y0 | x1 | y1 | // then output should be // 0 s n/2 n/2+s n // | -y0-y1 | x0-x1 | y0-y1 | x0+x1 | // Store x0 + x1 // (the -1 compensates mod p for the bottom bit of the complement) b[n] = mpn_add_n(b+s+n/2, a, a+n/2, n-s-n/2) - 1; // Store x0 - x1 and y0 - y1 carry = mpn_sub_n(b+s, a, a+n/2, n/2); signed_add_1(b+s+n/2, n/2-s+1, -a[n] - carry); // Store -y0 - y1 carry = mpn_add_n(b, a+n/2-s, a+n-s, s); // (the -1 compensates for the top bit of the complement) signed_add_1(b+s, n-s+1, -a[n] - carry - 1); long i = s-1; do b[i] = ~b[i]; while (--i >= 0); } } else { s -= n; if (s < n/2) { // We're computing B^s * (-1 + B^(n/2)) * a. // If input is // 0 n/2-s n/2 n-s n // | x0 | y0 | x1 | y1 | // then output should be // 0 s n/2 n/2+s n // | -y0+y1 | -x0-x1 | -y0-y1 | x0-x1 | // Store x0 - x1 b[n] = -mpn_sub_n(b+n/2+s, a, a+n/2, n/2-s); // Store -x0 - x1 and -y0 - y1 carry = mpn_add_n(b+s, a, a+n/2, n/2); // (the -1 compensates for the top bit of the complement) signed_add_1(b+n/2+s, n/2-s+1, -a[n] - carry - 1); long i = n/2-1; do b[s+i] = ~b[s+i]; while (--i >= 0); // Store y1 - y0 carry = s ? mpn_sub_n(b, a+n-s, a+n/2-s, s) : 0; // (the +1 compensates for the bottom bit of the complement) signed_add_1(b+s, n-s+1, a[n] - carry + 1); } else { s -= n/2; // We're computing B^s * (-1 - B^(n/2)) * a. // If input is // 0 n/2-s n/2 n-s n // | x0 | y0 | x1 | y1 | // then output should be // 0 s n/2 n/2+s n // | y0+y1 | -x0+x1 | -y0+y1 | -x0-x1 | // Store -x0 - x1 // (the -1 compensates for the top bit of the complement) b[n] = -mpn_add_n(b+n/2+s, a, a+n/2, n/2-s) - 1; long i = n/2-s-1; do b[n/2+s+i] = ~b[n/2+s+i]; while (i-- >= 0); // Store x1 - x0 and y1 - y0 carry = mpn_sub_n(b+s, a+n/2, a, n/2); // (the +1 compensates for the bottom bit of the complement) signed_add_1(b+n/2+s, n/2-s+1, a[n] - carry + 1); // Store y0 + y1 carry = s ? mpn_add_n(b, a+n/2-s, a+n-s, s) : 0; signed_add_1(b+s, n-s+1, a[n] + carry); } } } void ZmodF_forward_butterfly_2exp(ZmodF_t* a, ZmodF_t* b, ZmodF_t* z, unsigned long s, unsigned long n) { FLINT_ASSERT(s < n*FLINT_BITS); FLINT_ASSERT(*a != *b); FLINT_ASSERT(*b != *z); FLINT_ASSERT(*a != *z); ZmodF_sub_mul_2exp(*z, *a, *b, s, n); ZmodF_add(*a, *a, *b, n); ZmodF_swap(b, z); } void ZmodF_forward_butterfly_sqrt2exp(ZmodF_t* a, ZmodF_t* b, ZmodF_t* z, unsigned long s, unsigned long n) { FLINT_ASSERT(s < 2*n*FLINT_BITS); FLINT_ASSERT(*a != *b); FLINT_ASSERT(*b != *z); FLINT_ASSERT(*a != *z); if (s & 1) { unsigned long limbs, bits; ZmodF_decompose_rotation(&limbs, &bits, s, n); if (limbs == 0) ZmodF_sub(*z, *a, *b, n); else if (limbs < n) ZmodF_sub_mul_Bexp(*z, *a, *b, limbs, n); else if (limbs == n) ZmodF_sub(*z, *b, *a, n); else ZmodF_sub_mul_Bexp(*z, *b, *a, limbs - n, n); ZmodF_add(*a, *a, *b, n); if (n & 1) ZmodF_mul_pseudosqrt2_n_odd(*b, *z, 0, n); else ZmodF_mul_pseudosqrt2_n_even(*b, *z, 0, n); if (bits) ZmodF_short_div_2exp(*b, *b, bits, n); } else ZmodF_forward_butterfly_2exp(a, b, z, s >> 1, n); } void ZmodF_inverse_butterfly_2exp(ZmodF_t* a, ZmodF_t* b, ZmodF_t* z, unsigned long s, unsigned long n) { FLINT_ASSERT(s < n*FLINT_BITS); FLINT_ASSERT(*a != *b); FLINT_ASSERT(*b != *z); FLINT_ASSERT(*a != *z); unsigned long bits = s & (FLINT_BITS - 1); if (bits) // shift right by leftover bits ZmodF_short_div_2exp(*b, *b, bits, n); s /= FLINT_BITS; if (s) { ZmodF_div_Bexp_sub(*z, *a, *b, s, n); ZmodF_div_Bexp_add(*a, *a, *b, s, n); } else { ZmodF_sub(*z, *a, *b, n); ZmodF_add(*a, *a, *b, n); } ZmodF_swap(z, b); } void ZmodF_inverse_butterfly_sqrt2exp(ZmodF_t* a, ZmodF_t* b, ZmodF_t* z, unsigned long s, unsigned long n) { FLINT_ASSERT(s < 2*n*FLINT_BITS); FLINT_ASSERT(*a != *b); FLINT_ASSERT(*b != *z); FLINT_ASSERT(*a != *z); if (s & 1) { unsigned long limbs, bits; ZmodF_decompose_rotation(&limbs, &bits, 2*n*FLINT_BITS - s, n); if (n & 1) ZmodF_mul_pseudosqrt2_n_odd(*z, *b, 0, n); else ZmodF_mul_pseudosqrt2_n_even(*z, *b, 0, n); if (bits) ZmodF_short_div_2exp(*z, *z, bits, n); if (limbs == 0) { ZmodF_add(*b, *a, *z, n); ZmodF_sub(*a, *a, *z, n); } else if (limbs < n) { ZmodF_div_Bexp_sub(*b, *a, *z, n - limbs, n); ZmodF_div_Bexp_add(*a, *a, *z, n - limbs, n); } else if (limbs == n) { ZmodF_sub(*b, *a, *z, n); ZmodF_add(*a, *a, *z, n); } else { ZmodF_div_Bexp_add(*b, *a, *z, 2*n - limbs, n); ZmodF_div_Bexp_sub(*a, *a, *z, 2*n - limbs, n); } } else ZmodF_inverse_butterfly_2exp(a, b, z, s >> 1, n); } void ZmodF_divby3(ZmodF_t b, ZmodF_t a, unsigned long n) { // make overflow limb nonnegative ZmodF_fast_reduce(a, n); // compute a "total" which is congruent to a mod 3 unsigned long hi = 0, lo = 0; for (unsigned long i = 0; i <= n; i++) add_ssaaaa(hi, lo, hi, lo, 0, a[i]); unsigned long total = lo & ((1UL << (FLINT_BITS/2)) - 1); total += (lo >> (FLINT_BITS/2)); total += hi; // add "total" times B^n + 1 (the latter is 2 mod 3), // so that a becomes exactly divisible by 3 mpn_add_1(a, a, n+1, total); a[n] += total; unsigned long rem = mpn_divexact_by3(b, a, n+1); FLINT_ASSERT(!rem); } // end of file **************************************************************** flint-1.011/mpz_extras.h0000644017361200017500000000726211025357254015077 0ustar tabbotttabbott/*============================================================================ Copyright (C) 2007, William Hart This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ #include #include #include #include #include #ifndef FLINT_MPZ_EXTRAS_H #define FLINT_MPZ_EXTRAS_H #ifdef __cplusplus extern "C" { #endif #define mpz_t mpz_t /*----------------------------------------------------------------------------- Memory Management Functions -----------------------------------------------------------------------------*/ mpz_t* F_mpz_alloc(void); void F_mpz_release(void); /*----------------------------------------------------------------------------- Modular Arithmetic -----------------------------------------------------------------------------*/ void F_mpz_mulmod(mpz_t, mpz_t, mpz_t, mpz_t); /* sets res to a*b modulo p assumes a and b are not (much) bigger than p and that res is not p */ static inline void mulmod2(mpz_t res, mpz_t a, mpz_t b, mpz_t p) { mpz_mul(res,a,b); mpz_fdiv_r(res,res,p); } unsigned long F_mpz_mulmod_ui(mpz_t, mpz_t, mpz_t, unsigned long); long F_mpz_powm_long(long, long, long); int F_mpz_sqrtmod(mpz_t, mpz_t, mpz_t); void F_mpz_sqrtmodpklift(mpz_t, mpz_t, mpz_t, mpz_t); void F_mpz_sqrtmodptopk(mpz_t, mpz_t, mpz_t, mpz_t, int); int F_mpz_sqrtmodpk(mpz_t, mpz_t, mpz_t, int); /*----------------------------------------------------------------------------- Number Theoretic -----------------------------------------------------------------------------*/ void F_mpz_CRT(mpz_t, mpz_t, mpz_t, mpz_t, mpz_t, mpz_t); /*=================================================================================== Montgomery routines ====================================================================================*/ unsigned long F_mpz_mont_red(mpz_t res, mpz_t a, mpz_t m); void F_mpz_mont_mul(mpz_t res, mpz_t a, mpz_t b, mpz_t m, mpz_t R, unsigned long n); void F_mpz_expmod_mont(mpz_t res, mpz_t a, mpz_t exp, mpz_t m); /*=================================================================================== Burnikel_Ziegler Division ====================================================================================*/ void F_mpz_divrem_BZ(mpz_t Q, mpz_t R, mpz_t A, mpz_t B); void F_mpz_rem_BZ(mpz_t R, mpz_t A, mpz_t B); void F_mpz_mulmod_BZ(mpz_t res, mpz_t a, mpz_t b, mpz_t m); void F_mpz_expmod_BZ(mpz_t res, mpz_t a, mpz_t exp, mpz_t m); /*=================================================================================== Large integer multiplication ====================================================================================*/ void F_mpz_mul(mpz_t res, mpz_t a, mpz_t b); void __F_mpz_mul(mpz_t res, mpz_t a, mpz_t b, unsigned long twk); #ifdef __cplusplus } #endif #endif flint-1.011/make-profile-tables.py0000644017361200017500000001072711025357254016727 0ustar tabbotttabbott####################################################################### # This file is part of FLINT. # # FLINT is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # FLINT is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with FLINT; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ############################################################################### # # Script for generating profiling information tables for C modules being # profiled by profiler-main.c. # # It takes one command-line parameter: the name of the module, e.g. "xyz". # It reads through xyz-profile.c, pulls out function names having certain # initial substrings, and builds a file xyz-profile-tables.c which should # be linked against xyz-profile.c and profiler-main.c. # # See the makefile for typical usage. See also profiler-main.c. # # (C) 2007 William Hart and David Harvey # ############################################################################### import sys import re if len(sys.argv) != 2: raise ValueError, "no file specified" module = sys.argv[1] ############ process input file cfilename = module + "-profile.c" cfile = open(cfilename) profDriver_re = re.compile("void profDriver_(.*)\(.*") profDriverString_re = re.compile("char\* profDriverString_(.*)\(.*") profDriverDefaultParams_re = re.compile("char\* profDriverDefaultParams_(.*)\(.*") prof_re = [profDriver_re, profDriverString_re, profDriverDefaultParams_re] # dictionary from profile name to a tuple of bools, indicating which # functions are defined for each target prof_data = {} for line in cfile: for i in range(len(prof_re)): m = prof_re[i].match(line) if m is not None: name = m.group(1) if name not in prof_data: prof_data[name] = [False] * len(prof_re) else: if prof_data[name][i]: raise ValueError, "duplicate target \"%s\"" % name prof_data[name][i] = True ############ generate output file tfilename = module + "-profile-tables.c" tfile = open(tfilename, "w") tfile.write( "/* ===================================================================\n" "\n" " " + tfilename + "\n" "\n" " This file was AUTOMATICALLY GENERATED by make-profile-tables.py.\n" " DO NOT EDIT IT -- your changes will go the way of all LOST SOCKS.\n" "\n" "=================================================================== */\n" "\n" ) tfile.write("#include \n") tfile.write("#include \"profiler-main.h\"\n") tfile.write("\n") tfile.write("char* prof_module_name = \"" + module + "\";\n\n") tfile.write("int prof_target_count = %s;\n\n" % len(prof_data)) for (name, flags) in prof_data.iteritems(): if flags[0]: tfile.write("extern void profDriver_%s(char* params);\n" % name) if flags[1]: tfile.write("extern char* profDriverString_%s(char* params);\n" % name) if flags[2]: tfile.write("extern char* profDriverDefaultParams_%s();\n" % name) tfile.write("\n") tfile.write("char* prof_target_name[] = {\n") for (name, flags) in prof_data.iteritems(): tfile.write(" \"%s\",\n" % name) tfile.write("};\n\n") tfile.write("prof_Driver_t prof_Driver_list[] = {\n") for (name, flags) in prof_data.iteritems(): if flags[0]: tfile.write(" profDriver_%s,\n" % name) else: tfile.write(" NULL,\n") tfile.write("};\n\n") tfile.write("prof_DriverString_t prof_DriverString_list[] = {\n") for (name, flags) in prof_data.iteritems(): if flags[1]: tfile.write(" profDriverString_%s,\n" % name) else: tfile.write(" NULL,\n") tfile.write("};\n\n") tfile.write("prof_DriverDefaultParams_t prof_DriverDefaultParams_list[] = {\n") for (name, flags) in prof_data.iteritems(): if flags[2]: tfile.write(" profDriverDefaultParams_%s,\n" % name) else: tfile.write(" NULL,\n") tfile.write("};\n\n") ########### end of file flint-1.011/bernoulli_zmod.c0000644017361200017500000001652411025357254015723 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** bernoulli_zmod.c: Finds Bernoulli numbers B_{2k} Based on the implementation in SAGE written by David Harvey Uses zmod_polys for calculation. Copyright (C) 2007, David Howden *****************************************************************************/ #include #include #include #include #include "flint.h" #include "long_extras.h" #include "zmod_poly.h" #define TRUE 1 #define FALSE 0 /* Debugging function */ // void print_var(char *name, unsigned long value) // { // printf("%s = %d\n", name, value); // } /* Computes the bernoulli numbers B_0, B_2, ..., B_{p-3} for prime p Requires that res be allocated for (p-1)/2 unsigned longs which will hold the result. If returns 0, then the factoring of p has failed, otherwise will always return 1. */ int bernoulli_mod_p(unsigned long *res, unsigned long p) { FLINT_ASSERT(p > 2); FLINT_ASSERT(z_isprime(p) == 1); unsigned long g, g_inv, g_sqr, g_sqr_inv; double p_inv = z_precompute_inverse(p); g = z_primitive_root_precomp(p, p_inv); if(!g) { return FALSE; } g_inv = z_invert(g, p); g_sqr = z_mulmod_precomp(g, g, p, p_inv); g_sqr_inv = z_mulmod_precomp(g_inv, g_inv, p, p_inv); unsigned long poly_size = (p-1)/2; int is_odd = poly_size % 2; unsigned long g_power, g_power_inv; g_power = g_inv; g_power_inv = 1; // constant is (g-1)/2 mod p unsigned long constant; if(g % 2) { constant = (g-1)/2; } else { constant = (g+p-1)/2; } // fudge holds g^{i^2}, fudge_inv holds g^{-i^2} unsigned long fudge, fudge_inv; fudge = fudge_inv = 1; // compute the polynomials F(X) and G(X) zmod_poly_t F, G; zmod_poly_init2(F, p, poly_size); zmod_poly_init2(G, p, poly_size); unsigned long i, temp, h; for(i = 0; i < poly_size; i++) { // compute h(g^i)/g^i (h(x) is as in latex notes) temp = g * g_power; h = z_mulmod_precomp(p + constant - (temp / p), g_power_inv, p, p_inv); g_power = z_mod_precomp(temp, p, p_inv); g_power_inv = z_mulmod_precomp(g_power_inv, g_inv, p, p_inv); // store coefficient g^{i^2} h(g^i)/g^i zmod_poly_set_coeff_ui(G, i, z_mulmod_precomp(h, fudge, p, p_inv)); zmod_poly_set_coeff_ui(F, i, fudge_inv); // update fudge and fudge_inv fudge = z_mulmod_precomp(z_mulmod_precomp(fudge, g_power, p, p_inv), z_mulmod_precomp(g_power, g, p, p_inv), p, p_inv); fudge_inv = z_mulmod_precomp(z_mulmod_precomp(fudge_inv, g_power_inv, p, p_inv), z_mulmod_precomp(g_power_inv, g, p, p_inv), p, p_inv); } zmod_poly_set_coeff_ui(F, 0, 0); // step 2: multiply the polynomials... zmod_poly_t product; zmod_poly_init(product, p); zmod_poly_mul_KS(product, G, F, 0); // step 3: assemble the result... unsigned long g_sqr_power, value; g_sqr_power = g_sqr; fudge = g; res[0] = 1L; unsigned long value_coeff_ui; for(i = 1; i < poly_size; i++) { value = zmod_poly_get_coeff_ui(product, i + poly_size); if(is_odd) { value = z_mod_precomp(zmod_poly_get_coeff_ui(G, i) + zmod_poly_get_coeff_ui(product, i) + p - value, p, p_inv); } else { value = z_mod_precomp(zmod_poly_get_coeff_ui(G, i) + zmod_poly_get_coeff_ui(product, i) + value, p, p_inv); } value = z_mulmod_precomp(z_mulmod_precomp(z_mulmod_precomp(4L, i, p, p_inv), fudge, p, p_inv), value, p, p_inv); value = z_mulmod_precomp(value, z_invert(p+1L-g_sqr_power, p), p, p_inv); res[i] = value; g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); fudge = z_mulmod_precomp(fudge, g_sqr_power, p, p_inv); g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); } zmod_poly_clear(product); zmod_poly_clear(F); zmod_poly_clear(G); return TRUE; } /* Verifies that the ouput of bernoulli_mod_p above is correct. Takes the result from bernoulli_mod_p (res - an array of (p-1)/2 unsigned longs), and the prime p. Returns 0 if res is incorrect, 1 if res is correct. */ int verify_bernoulli_mod_p(unsigned long *res, unsigned long p) { unsigned long N, i, product, sum, value, element; double p_inv; N = (p-1)/2; product = 1L; sum = 0L; p_inv = z_precompute_inverse(p); for(i = 0; i < N; i++) { element = res[i]; // if((signed long)element < 0) // { // printf("NEGATIVE NUMBER!!!!!\n"); // } // if(element > p) // { // printf("OVERFLOW!!!!!\n"); // } value = z_mulmod_precomp(z_mulmod_precomp(product, 2*i+1L, p, p_inv), element, p, p_inv); sum = z_mod_precomp(sum + value, p, p_inv); product = z_mulmod_precomp(product, 4L, p, p_inv); } if(z_mod_precomp(sum + 2L, p, p_inv)) { // i = 0; // printf("Error occurred, output:\n"); // while (i < N) // { // printf("%d\n", res[i]); // i++; // } return FALSE; } return TRUE; } /* Test function for bernoulli_mod_p Calculates bernoulli_mod_p for the prime p and verifies the result. Returs 0 if incorrect, and 1 if correct. */ int test_bernoulli_mod_p(unsigned long p) { unsigned long *res = (unsigned long*) flint_stack_alloc((p-1)/2); if(!bernoulli_mod_p(res, p)) { printf("Could not factor p = %d\n", p); flint_stack_release(); return FALSE; } int result = verify_bernoulli_mod_p(res, p); flint_stack_release(); return result; } int main (int argc, char const *argv[]) { if (argc == 2) { unsigned long n = atoi(argv[1]); n = z_nextprime(n); printf("Computing bernoulli_mod_p(%ld)... ", n); if (!test_bernoulli_mod_p(n)) { printf("Failed\n"); } else { printf("Done\n"); } return 0; } unsigned long p = 2; unsigned long tests = 2000; unsigned long fail = 0; for(unsigned long i = 0; i < tests; i++) { p = z_nextprime(p); if(!test_bernoulli_mod_p(p)) { printf("Fails on p = %d\n", p); fail++; } else { printf("Works on p = %d\n", p); } } printf("\nResults: %d OK, %d FAILED.\n", tests - fail, fail); return 0; } flint-1.011/magma-profiles/0000755017361200017500000000000011025357255015427 5ustar tabbotttabbottflint-1.011/magma-profiles/poly-div.m0000644017361200017500000001503111025357252017345 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /* Profiling MAGMA polynomial division in Z[x]. Usage: run magma with the -b flag to prevent the start up banner, i.e. magma -b magma-profile.m > output.prof (C) 2007 David Harvey + Bill Hart, GPL */ target_name := "PolyMul"; target_description := "MAGMA polynomial multiplication in Z[x] over various lengths and bitsizes, NON-NEGATIVE coefficients only"; max := 1000000; // maximum total bitsize of input polys ratio := 1.2; // ratio between consecutive lengths/bitsizes // Timing runs need to last at least this many microseconds to be counted: DURATION_THRESHOLD := 200000; // Microseconds per timing run that the prof2d_sample function aims for: DURATION_TARGET := 300000; /* This function should run count iterations at position (x, y), and return the total time in seconds, using the Cputime() function. */ function sampler(length, bits, count) // first time random poly generation + multiplication R:=PolynomialRing(Rationals()); countmod := 4; if count gt 1000 then countmod := 100; end if; if count gt 100 then countmod := 10; end if; time1 := Cputime(); for i := 1 to count do if (i-1) mod countmod eq 0 then a:=0; while a eq 0 do a:=Polynomial([RandomBits(bits): x in [1..length]]); end while; b:=Polynomial([RandomBits(bits): x in [1..length]]); c:=a*b; c:=R!c; a:=R!a; end if; d:=c div a; end for; time2 := Cputime(); // now time just the random poly generation for i := 1 to count do if (i-1) mod countmod eq 0 then a:=0; while a eq 0 do a:=Polynomial([RandomBits(bits): x in [1..length]]); end while; b:=Polynomial([RandomBits(bits): x in [1..length]]); c:=a*b; end if; end for; time3 := Cputime(); return (time2 - time1) - (time3 - time2); end function; /* Formats in scientific notation with 3 decimal places */ function format_sci(x) L := Floor(Log(10, x)); x := x / 10^L; s := Sprintf("%.3oe", x); if L lt 0 then s := s cat "-"; else s := s cat "+"; end if; s := s cat Sprintf("%o", Floor(Abs(L / 10))); s := s cat Sprintf("%o", (Abs(L) mod 10)); return s; end function; procedure prof2d_sample(x, y) // number of timings that were at least DURATION_THRESHOLD microseconds: good_count := 0; // first try just a few loops num_trials := 4; last_time := sampler(x, y, num_trials) * 1000000.0; max_time := 0; min_time := 0; // loop until we have enough good times while true do per_trial := last_time / num_trials; // if the last recorded time was long enough, record it if last_time gt DURATION_THRESHOLD then if good_count gt 0 then max_time := Max(max_time, per_trial); min_time := Min(min_time, per_trial); else max_time := per_trial; min_time := per_trial; end if; good_count := good_count + 1; if good_count eq 5 then // we've got enough data // print it out and return print Sprintf("%o\t%o\t%o\t%o", x, y, format_sci(min_time), format_sci(max_time)); return; end if; end if; // adjust num_trials so that the elapsed time gravitates towards // DURATION_TARGET; num_trials can be changed by a factor of // at most 25%, and must be at least 1 if last_time lt 0.0001 then last_time := 0.0001; end if; adjust_ratio := 1.0 * DURATION_TARGET / last_time; if adjust_ratio gt 1.25 then adjust_ratio := 1.25; end if; if adjust_ratio lt 0.75 then adjust_ratio := 0.75; end if; num_trials := Ceiling(adjust_ratio * num_trials); // just to be safe: if num_trials eq 0 then num_trials := 1; end if; // run another trial last_time := sampler(x, y, num_trials) * 1000000.0; end while; end procedure; /* This function should loop over appropriate combinations of (x, y), and call prof2d_sample(x, y) for each one. */ procedure driver() max_iter := Ceiling(Log(max) / Log(ratio)); last_length := 0; for i := 0 to max_iter do length := Floor(ratio^i); if length ne last_length then last_length := length; last_bits := 0; for j := 0 to max_iter do bits := Floor(ratio^j); if bits ne last_bits then last_bits := bits; if length*bits le max then prof2d_sample(length, bits); end if; end if; end for; end if; end for; end procedure; /************************************************************************ This last section is the generic profiling code. Just leave this stuff alone. ************************************************************************/ procedure print_header() print "FLINT profile output"; print ""; print "TIMESTAMP: (todo: write code to generate timestamp)"; print "MACHINE: (todo: write code to get machine from environment var)"; print ""; print "MODULE: magma"; print "TARGET:", target_name; print ""; print "DESCRIPTION:"; print target_description; print ""; print "============================================== begin data"; end procedure; print_header(); driver(); quit; // ------------- end of file ------------------------------------ flint-1.011/magma-profiles/poly-mult.m0000644017361200017500000001452311025357252017551 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /* Profiling MAGMA polynomial multiplication in Z[x]. Usage: run magma with the -b flag to prevent the start up banner, i.e. magma -b magma-profile.m > output.prof (C) 2007 David Harvey + Bill Hart, GPL */ target_name := "PolyMul"; target_description := "MAGMA polynomial multiplication in Z[x] over various lengths and bitsizes, NON-NEGATIVE coefficients only"; max := 16000000; // maximum total bitsize of input polys ratio := 1.2; // ratio between consecutive lengths/bitsizes // Timing runs need to last at least this many microseconds to be counted: DURATION_THRESHOLD := 200000; // Microseconds per timing run that the prof2d_sample function aims for: DURATION_TARGET := 300000; forward prof2d_sample; /* This function should run count iterations at position (x, y), and return the total time in seconds, using the Cputime() function. */ function sampler(length, bits, count) // first time random poly generation + multiplication countmod := 4; if count gt 1000 then countmod := 100; end if; if count gt 100 then countmod := 10; end if; time1 := Cputime(); for i := 1 to count do if (i-1) mod countmod eq 0 then a:=Polynomial([RandomBits(bits): x in [1..length]]); b:=Polynomial([RandomBits(bits): x in [1..length]]); end if; c:=a*b; end for; time2 := Cputime(); // now time just the random poly generation for i := 1 to count do if (i-1) mod countmod eq 0 then a:=Polynomial([RandomBits(bits): x in [1..length]]); b:=Polynomial([RandomBits(bits): x in [1..length]]); end if; end for; time3 := Cputime(); return (time2 - time1) - (time3 - time2); end function; /* This function should loop over appropriate combinations of (x, y), and call prof2d_sample(x, y) for each one. */ procedure driver() max_iter := Ceiling(Log(max) / Log(ratio)); last_length := 0; for i := 0 to max_iter do length := Floor(ratio^i); if length ne last_length then last_length := length; last_bits := 0; for j := 0 to max_iter do bits := Floor(ratio^j); if bits ne last_bits then last_bits := bits; if length*bits le max then prof2d_sample(length, bits); end if; end if; end for; end if; end for; end procedure; /************************************************************************ This last section is the generic profiling code. Just leave this stuff alone. ************************************************************************/ /* Formats in scientific notation with 3 decimal places */ function format_sci(x) L := Floor(Log(10, x)); x := x / 10^L; s := Sprintf("%.3oe", x); if L lt 0 then s := s cat "-"; else s := s cat "+"; end if; s := s cat Sprintf("%o", Floor(Abs(L / 10))); s := s cat Sprintf("%o", (Abs(L) mod 10)); return s; end function; procedure prof2d_sample(x, y) // number of timings that were at least DURATION_THRESHOLD microseconds: good_count := 0; // first try just a few loops num_trials := 4; last_time := sampler(x, y, num_trials) * 1000000.0; max_time := 0; min_time := 0; // loop until we have enough good times while true do per_trial := last_time / num_trials; // if the last recorded time was long enough, record it if last_time gt DURATION_THRESHOLD then if good_count gt 0 then max_time := Max(max_time, per_trial); min_time := Min(min_time, per_trial); else max_time := per_trial; min_time := per_trial; end if; good_count := good_count + 1; if good_count eq 5 then // we've got enough data // print it out and return print Sprintf("%o\t%o\t%o\t%o", x, y, format_sci(min_time), format_sci(max_time)); return; end if; end if; // adjust num_trials so that the elapsed time gravitates towards // DURATION_TARGET; num_trials can be changed by a factor of // at most 25%, and must be at least 1 if last_time lt 0.0001 then last_time := 0.0001; end if; adjust_ratio := 1.0 * DURATION_TARGET / last_time; if adjust_ratio gt 1.25 then adjust_ratio := 1.25; end if; if adjust_ratio lt 0.75 then adjust_ratio := 0.75; end if; num_trials := Ceiling(adjust_ratio * num_trials); // just to be safe: if num_trials eq 0 then num_trials := 1; end if; // run another trial last_time := sampler(x, y, num_trials) * 1000000.0; end while; end procedure; procedure print_header() print "FLINT profile output"; print ""; print "TIMESTAMP: (todo: write code to generate timestamp)"; print "MACHINE: (todo: write code to get machine from environment var)"; print ""; print "MODULE: magma"; print "TARGET:", target_name; print ""; print "DESCRIPTION:"; print target_description; print ""; print "============================================== begin data"; end procedure; print_header(); driver(); quit; // ------------- end of file ------------------------------------ flint-1.011/magma-profiles/template.m0000644017361200017500000001300711025357252017416 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /* Script for 2d profiles of MAGMA. This file is part of the FLINT project. It generates output in the same format as profiler-main.c. Please make a COPY of this file before using it; don't overwrite this template file in the FLINT repository! Usage: run magma with the -b flag to prevent the start up banner, i.e. magma -b magma-profile.m > output.prof (C) 2007 David Harvey, GPL */ /************************************************************************ The first section of the file is a template you need to fill in to run a particular profile. ************************************************************************/ target_name := "target name here"; target_description := "some description here"; // Timing runs need to last at least this many microseconds to be counted: DURATION_THRESHOLD := 200000; // Microseconds per timing run that the prof2d_sample function aims for: DURATION_TARGET := 300000; forward prof2d_sample; /* This function should run "count" iterations at position (x, y), and return the total time in seconds, using the Cputime() function. */ function sampler(x, y, count) // add setup code here time1 := Cputime(); // add stuff to be timed here time2 := Cputime(); return time2 - time1; end function; /* This function should loop over appropriate combinations of (x, y), and call prof2d_sample(x, y) for each one. */ procedure driver() // here is an example that calls prof2d_sample for a range of x and y for x := 1 to 10 do for y := 1 to 10 do prof2d_sample(x, y); end for; end for; end procedure; /************************************************************************ This last section is the generic profiling code. Just leave this stuff alone. ************************************************************************/ /* Formats in scientific notation with 3 decimal places */ function format_sci(x) L := Floor(Log(10, x)); x := x / 10^L; s := Sprintf("%.3oe", x); if L lt 0 then s := s cat "-"; else s := s cat "+"; end if; s := s cat Sprintf("%o", Floor(Abs(L / 10))); s := s cat Sprintf("%o", (Abs(L) mod 10)); return s; end function; procedure prof2d_sample(x, y) // number of timings that were at least DURATION_THRESHOLD microseconds: good_count := 0; // first try just a few loops num_trials := 4; last_time := sampler(x, y, num_trials) * 1000000.0; max_time := 0; min_time := 0; // loop until we have enough good times while true do per_trial := last_time / num_trials; // if the last recorded time was long enough, record it if last_time gt DURATION_THRESHOLD then if good_count gt 0 then max_time := Max(max_time, per_trial); min_time := Min(min_time, per_trial); else max_time := per_trial; min_time := per_trial; end if; good_count := good_count + 1; if good_count eq 5 then // we've got enough data // print it out and return print Sprintf("%o\t%o\t%o\t%o", x, y, format_sci(min_time), format_sci(max_time)); return; end if; end if; // adjust num_trials so that the elapsed time gravitates towards // DURATION_TARGET; num_trials can be changed by a factor of // at most 25%, and must be at least 1 if last_time lt 0.0001 then last_time := 0.0001; end if; adjust_ratio := 1.0 * DURATION_TARGET / last_time; if adjust_ratio gt 1.25 then adjust_ratio := 1.25; end if; if adjust_ratio lt 0.75 then adjust_ratio := 0.75; end if; num_trials := Ceiling(adjust_ratio * num_trials); // just to be safe: if num_trials eq 0 then num_trials := 1; end if; // run another trial last_time := sampler(x, y, num_trials) * 1000000.0; end while; end procedure; procedure print_header() print "FLINT profile output"; print ""; print "TIMESTAMP: (todo: write code to generate timestamp)"; print "MACHINE: (todo: write code to get machine from environment var)"; print ""; print "MODULE: magma"; print "TARGET:", target_name; print ""; print "DESCRIPTION:"; print target_description; print ""; print "============================================== begin data"; end procedure; print_header(); driver(); quit; // ------------- end of file ------------------------------------ flint-1.011/longlong.h0000644017361200017500000021006711025357254014521 0ustar tabbotttabbott/* longlong.h -- definitions for mixed size 32/64 bit arithmetic. Copyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. This file is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this file; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /* You have to define the following before including this file: UWtype -- An unsigned type, default type for operations (typically a "word") UHWtype -- An unsigned type, at least half the size of UWtype. UDWtype -- An unsigned type, at least twice as large a UWtype W_TYPE_SIZE -- size in bits of UWtype SItype, USItype -- Signed and unsigned 32 bit types. DItype, UDItype -- Signed and unsigned 64 bit types. On a 32 bit machine UWtype should typically be USItype; on a 64 bit machine, UWtype should typically be UDItype. */ #define __BITS4 (W_TYPE_SIZE / 4) #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) /* This is used to make sure no undesirable sharing between different libraries that use this file takes place. */ #ifndef __MPN #define __MPN(x) __##x #endif #ifndef _PROTO #if (__STDC__-0) || defined (__cplusplus) #define _PROTO(x) x #else #define _PROTO(x) () #endif #endif /* Define auxiliary asm macros. 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype word product in HIGH_PROD and LOW_PROD. 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a UDWtype product. This is just a variant of umul_ppmm. 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, denominator) divides a UDWtype, composed by the UWtype integers HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less than DENOMINATOR for correct operation. If, in addition, the most significant bit of DENOMINATOR must be 1, then the pre-processor symbol UDIV_NEEDS_NORMALIZATION is defined to 1. 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, denominator). Like udiv_qrnnd but the numbers are signed. The quotient is rounded towards 0. 5) count_leading_zeros(count, x) counts the number of zero-bits from the msb to the first non-zero bit in the UWtype X. This is the number of steps X needs to be shifted left to set the msb. Undefined for X == 0, unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts from the least significant end. 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, high_addend_2, low_addend_2) adds two UWtype integers, composed by HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow (i.e. carry out) is not stored anywhere, and is lost. 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, and is lost. If any of these macros are left undefined for a particular CPU, C macros are used. Notes: For add_ssaaaa the two high and two low addends can both commute, but unfortunately gcc only supports one "%" commutative in each asm block. This has always been so but is only documented in recent versions (eg. pre-release 3.3). Having two or more "%"s can cause an internal compiler error in certain rare circumstances. Apparently it was only the last "%" that was ever actually respected, so the code has been updated to leave just that. Clearly there's a free choice whether high or low should get it, if there's a reason to favour one over the other. Also obviously when the constraints on the two operands are identical there's no benefit to the reloader in any "%" at all. */ /* The CPUs come in alphabetical order below. Please add support for more CPUs here, or improve the current support for the CPUs below! */ /* count_leading_zeros_gcc_clz is count_leading_zeros implemented with gcc 3.4 __builtin_clzl or __builtin_clzll, according to our limb size. Similarly count_trailing_zeros_gcc_ctz using __builtin_ctzl or __builtin_ctzll. These builtins are only used when we check what code comes out, on some chips they're merely libgcc calls, where we will instead want an inline in that case (either asm or generic C). These builtins are better than an asm block of the same insn, since an asm block doesn't give gcc any information about scheduling or resource usage. We keep an asm block for use on prior versions of gcc though. For reference, __builtin_ffs existed in gcc prior to __builtin_clz, but it's not used (for count_leading_zeros) because it generally gives extra code to ensure the result is 0 when the input is 0, which we don't need or want. */ #ifdef _LONG_LONG_LIMB #define count_leading_zeros_gcc_clz(count,x) \ do { \ ASSERT ((x) != 0); \ (count) = __builtin_clzll (x); \ } while (0) #else #define count_leading_zeros_gcc_clz(count,x) \ do { \ ASSERT ((x) != 0); \ (count) = __builtin_clzl (x); \ } while (0) #endif #ifdef _LONG_LONG_LIMB #define count_trailing_zeros_gcc_ctz(count,x) \ do { \ ASSERT ((x) != 0); \ (count) = __builtin_ctzll (x); \ } while (0) #else #define count_trailing_zeros_gcc_ctz(count,x) \ do { \ ASSERT ((x) != 0); \ (count) = __builtin_ctzl (x); \ } while (0) #endif /* FIXME: The macros using external routines like __MPN(count_leading_zeros) don't need to be under !NO_ASM */ #if ! defined (NO_ASM) #if defined (__alpha) && W_TYPE_SIZE == 64 /* Most alpha-based machines, except Cray systems. */ #if defined (__GNUC__) #define umul_ppmm(ph, pl, m0, m1) \ do { \ UDItype __m0 = (m0), __m1 = (m1); \ __asm__ ("umulh %r1,%2,%0" \ : "=r" (ph) \ : "%rJ" (m0), "rI" (m1)); \ (pl) = __m0 * __m1; \ } while (0) #define UMUL_TIME 18 #else /* ! __GNUC__ */ #include #define umul_ppmm(ph, pl, m0, m1) \ do { \ UDItype __m0 = (m0), __m1 = (m1); \ (ph) = __UMULH (m0, m1); \ (pl) = __m0 * __m1; \ } while (0) #endif #ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { UWtype __di; \ __di = __MPN(invert_limb) (d); \ udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ } while (0) #define UDIV_PREINV_ALWAYS 1 #define UDIV_NEEDS_NORMALIZATION 1 #define UDIV_TIME 220 #endif /* LONGLONG_STANDALONE */ /* clz_tab is required in all configurations, since mpn/alpha/cntlz.asm always goes into libgmp.so, even when not actually used. */ #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB #if defined (__GNUC__) && HAVE_HOST_CPU_alpha_CIX #define count_leading_zeros(COUNT,X) \ __asm__("ctlz %1,%0" : "=r"(COUNT) : "r"(X)) #define count_trailing_zeros(COUNT,X) \ __asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X)) #endif /* clz/ctz using cix */ #if ! defined (count_leading_zeros) \ && defined (__GNUC__) && ! defined (LONGLONG_STANDALONE) /* ALPHA_CMPBGE_0 gives "cmpbge $31,src,dst", ie. test src bytes == 0. "$31" is written explicitly in the asm, since an "r" constraint won't select reg 31. There seems no need to worry about "r31" syntax for cray, since gcc itself (pre-release 3.4) emits just $31 in various places. */ #define ALPHA_CMPBGE_0(dst, src) \ do { asm ("cmpbge $31, %1, %0" : "=r" (dst) : "r" (src)); } while (0) /* Zero bytes are turned into bits with cmpbge, a __clz_tab lookup counts them, locating the highest non-zero byte. A second __clz_tab lookup counts the leading zero bits in that byte, giving the result. */ #define count_leading_zeros(count, x) \ do { \ UWtype __clz__b, __clz__c, __clz__x = (x); \ ALPHA_CMPBGE_0 (__clz__b, __clz__x); /* zero bytes */ \ __clz__b = __clz_tab [(__clz__b >> 1) ^ 0x7F]; /* 8 to 1 byte */ \ __clz__b = __clz__b * 8 - 7; /* 57 to 1 shift */ \ __clz__x >>= __clz__b; \ __clz__c = __clz_tab [__clz__x]; /* 8 to 1 bit */ \ __clz__b = 65 - __clz__b; \ (count) = __clz__b - __clz__c; \ } while (0) #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB #endif /* clz using cmpbge */ #if ! defined (count_leading_zeros) && ! defined (LONGLONG_STANDALONE) #if HAVE_ATTRIBUTE_CONST long __MPN(count_leading_zeros) _PROTO ((UDItype)) __attribute__ ((const)); #else long __MPN(count_leading_zeros) _PROTO ((UDItype)); #endif #define count_leading_zeros(count, x) \ ((count) = __MPN(count_leading_zeros) (x)) #endif /* clz using mpn */ #endif /* __alpha */ #if defined (_CRAY) && W_TYPE_SIZE == 64 #include #define UDIV_PREINV_ALWAYS 1 #define UDIV_NEEDS_NORMALIZATION 1 #define UDIV_TIME 220 long __MPN(count_leading_zeros) _PROTO ((UDItype)); #define count_leading_zeros(count, x) \ ((count) = _leadz ((UWtype) (x))) #if defined (_CRAYIEEE) /* I.e., Cray T90/ieee, T3D, and T3E */ #define umul_ppmm(ph, pl, m0, m1) \ do { \ UDItype __m0 = (m0), __m1 = (m1); \ (ph) = _int_mult_upper (m0, m1); \ (pl) = __m0 * __m1; \ } while (0) #ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { UWtype __di; \ __di = __MPN(invert_limb) (d); \ udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ } while (0) #endif /* LONGLONG_STANDALONE */ #endif /* _CRAYIEEE */ #endif /* _CRAY */ #if defined (__ia64) && W_TYPE_SIZE == 64 /* This form encourages gcc (pre-release 3.4 at least) to emit predicated "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic code using "al>= _c; \ if (_x >= 1 << 4) \ _x >>= 4, _c += 4; \ if (_x >= 1 << 2) \ _x >>= 2, _c += 2; \ _c += _x >> 1; \ (count) = W_TYPE_SIZE - 1 - _c; \ } while (0) /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1 based, and we don't need a special case for x==0 here */ #define count_trailing_zeros(count, x) \ do { \ UWtype __ctz_x = (x); \ __asm__ ("popcnt %0 = %1" \ : "=r" (count) \ : "r" ((__ctz_x-1) & ~__ctz_x)); \ } while (0) #endif #if defined (__INTEL_COMPILER) #include #define umul_ppmm(ph, pl, m0, m1) \ do { \ UWtype _m0 = (m0), _m1 = (m1); \ ph = _m64_xmahu (_m0, _m1, 0); \ pl = _m0 * _m1; \ } while (0) #endif #ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { UWtype __di; \ __di = __MPN(invert_limb) (d); \ udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ } while (0) #define UDIV_PREINV_ALWAYS 1 #define UDIV_NEEDS_NORMALIZATION 1 #endif #define UDIV_TIME 220 #endif #if defined (__GNUC__) /* We sometimes need to clobber "cc" with gcc2, but that would not be understood by gcc1. Use cpp to avoid major code duplication. */ #if __GNUC__ < 2 #define __CLOBBER_CC #define __AND_CLOBBER_CC #else /* __GNUC__ >= 2 */ #define __CLOBBER_CC : "cc" #define __AND_CLOBBER_CC , "cc" #endif /* __GNUC__ < 2 */ #if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add %1,%4,%5\n\taddc %0,%2,%3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl)) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("sub %1,%4,%5\n\tsubc %0,%2,%3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl)) #define umul_ppmm(xh, xl, m0, m1) \ do { \ USItype __m0 = (m0), __m1 = (m1); \ __asm__ ("multiplu %0,%1,%2" \ : "=r" (xl) \ : "r" (__m0), "r" (__m1)); \ __asm__ ("multmu %0,%1,%2" \ : "=r" (xh) \ : "r" (__m0), "r" (__m1)); \ } while (0) #define udiv_qrnnd(q, r, n1, n0, d) \ __asm__ ("dividu %0,%3,%4" \ : "=r" (q), "=q" (r) \ : "1" (n1), "r" (n0), "r" (d)) #define count_leading_zeros(count, x) \ __asm__ ("clz %0,%1" \ : "=r" (count) \ : "r" (x)) #define COUNT_LEADING_ZEROS_0 32 #endif /* __a29k__ */ #if defined (__arc__) #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add.f\t%1, %4, %5\n\tadc\t%0, %2, %3" \ : "=r" (sh), \ "=&r" (sl) \ : "r" ((USItype) (ah)), \ "rIJ" ((USItype) (bh)), \ "%r" ((USItype) (al)), \ "rIJ" ((USItype) (bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("sub.f\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ : "=r" (sh), \ "=&r" (sl) \ : "r" ((USItype) (ah)), \ "rIJ" ((USItype) (bh)), \ "r" ((USItype) (al)), \ "rIJ" ((USItype) (bl))) #endif #if defined (__arm__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (al)) \ { \ if (__builtin_constant_p (ah)) \ __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \ : "=r" (sh), "=&r" (sl) \ : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ else \ __asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ } \ else if (__builtin_constant_p (ah)) \ { \ if (__builtin_constant_p (bl)) \ __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \ : "=r" (sh), "=&r" (sl) \ : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \ else \ __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \ : "=r" (sh), "=&r" (sl) \ : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ } \ else if (__builtin_constant_p (bl)) \ { \ if (__builtin_constant_p (bh)) \ __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \ else \ __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \ : "=r" (sh), "=&r" (sl) \ : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \ } \ else /* only bh might be a constant */ \ __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC);\ } while (0) #if 1 || defined (__arm_m__) /* `M' series has widening multiply support */ #define umul_ppmm(xh, xl, a, b) \ __asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b)) #define UMUL_TIME 5 #define smul_ppmm(xh, xl, a, b) \ __asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b)) #ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { UWtype __di; \ __di = __MPN(invert_limb) (d); \ udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ } while (0) #define UDIV_PREINV_ALWAYS 1 #define UDIV_NEEDS_NORMALIZATION 1 #define UDIV_TIME 70 #endif /* LONGLONG_STANDALONE */ #else #define umul_ppmm(xh, xl, a, b) \ __asm__ ("%@ Inlined umul_ppmm\n" \ " mov %|r0, %2, lsr #16\n" \ " mov %|r2, %3, lsr #16\n" \ " bic %|r1, %2, %|r0, lsl #16\n" \ " bic %|r2, %3, %|r2, lsl #16\n" \ " mul %1, %|r1, %|r2\n" \ " mul %|r2, %|r0, %|r2\n" \ " mul %|r1, %0, %|r1\n" \ " mul %0, %|r0, %0\n" \ " adds %|r1, %|r2, %|r1\n" \ " addcs %0, %0, #65536\n" \ " adds %1, %1, %|r1, lsl #16\n" \ " adc %0, %0, %|r1, lsr #16" \ : "=&r" (xh), "=r" (xl) \ : "r" (a), "r" (b) \ : "r0", "r1", "r2") #define UMUL_TIME 20 #ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { UWtype __r; \ (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \ (r) = __r; \ } while (0) extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype)); #define UDIV_TIME 200 #endif /* LONGLONG_STANDALONE */ #endif #endif /* __arm__ */ #if defined (__clipper__) && W_TYPE_SIZE == 32 #define umul_ppmm(w1, w0, u, v) \ ({union {UDItype __ll; \ struct {USItype __l, __h;} __i; \ } __x; \ __asm__ ("mulwux %2,%0" \ : "=r" (__x.__ll) \ : "%0" ((USItype)(u)), "r" ((USItype)(v))); \ (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) #define smul_ppmm(w1, w0, u, v) \ ({union {DItype __ll; \ struct {SItype __l, __h;} __i; \ } __x; \ __asm__ ("mulwx %2,%0" \ : "=r" (__x.__ll) \ : "%0" ((SItype)(u)), "r" ((SItype)(v))); \ (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) #define __umulsidi3(u, v) \ ({UDItype __w; \ __asm__ ("mulwux %2,%0" \ : "=r" (__w) : "%0" ((USItype)(u)), "r" ((USItype)(v))); \ __w; }) #endif /* __clipper__ */ /* Fujitsu vector computers. */ #if defined (__uxp__) && W_TYPE_SIZE == 32 #define umul_ppmm(ph, pl, u, v) \ do { \ union {UDItype __ll; \ struct {USItype __h, __l;} __i; \ } __x; \ __asm__ ("mult.lu %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v));\ (ph) = __x.__i.__h; \ (pl) = __x.__i.__l; \ } while (0) #define smul_ppmm(ph, pl, u, v) \ do { \ union {UDItype __ll; \ struct {USItype __h, __l;} __i; \ } __x; \ __asm__ ("mult.l %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v)); \ (ph) = __x.__i.__h; \ (pl) = __x.__i.__l; \ } while (0) #endif #if defined (__gmicro__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add.w %5,%1\n\taddx %3,%0" \ : "=g" (sh), "=&g" (sl) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "%1" ((USItype)(al)), "g" ((USItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("sub.w %5,%1\n\tsubx %3,%0" \ : "=g" (sh), "=&g" (sl) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "1" ((USItype)(al)), "g" ((USItype)(bl))) #define umul_ppmm(ph, pl, m0, m1) \ __asm__ ("mulx %3,%0,%1" \ : "=g" (ph), "=r" (pl) \ : "%0" ((USItype)(m0)), "g" ((USItype)(m1))) #define udiv_qrnnd(q, r, nh, nl, d) \ __asm__ ("divx %4,%0,%1" \ : "=g" (q), "=r" (r) \ : "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d))) #define count_leading_zeros(count, x) \ __asm__ ("bsch/1 %1,%0" \ : "=g" (count) : "g" ((USItype)(x)), "0" ((USItype)0)) #endif #if defined (__hppa) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add%I5 %5,%r4,%1\n\taddc %r2,%r3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rM" (ah), "rM" (bh), "%rM" (al), "rI" (bl)) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("sub%I4 %4,%r5,%1\n\tsubb %r2,%r3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rM" (ah), "rM" (bh), "rI" (al), "rM" (bl)) #if defined (_PA_RISC1_1) #define umul_ppmm(wh, wl, u, v) \ do { \ union {UDItype __ll; \ struct {USItype __h, __l;} __i; \ } __x; \ __asm__ ("xmpyu %1,%2,%0" : "=*f" (__x.__ll) : "*f" (u), "*f" (v)); \ (wh) = __x.__i.__h; \ (wl) = __x.__i.__l; \ } while (0) #define UMUL_TIME 8 #define UDIV_TIME 60 #else #define UMUL_TIME 40 #define UDIV_TIME 80 #endif #define count_leading_zeros(count, x) \ do { \ USItype __tmp; \ __asm__ ( \ "ldi 1,%0\n" \ " extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \ " extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n" \ " ldo 16(%0),%0 ; Yes. Perform add.\n" \ " extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \ " extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n" \ " ldo 8(%0),%0 ; Yes. Perform add.\n" \ " extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \ " extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n" \ " ldo 4(%0),%0 ; Yes. Perform add.\n" \ " extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \ " extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n" \ " ldo 2(%0),%0 ; Yes. Perform add.\n" \ " extru %1,30,1,%1 ; Extract bit 1.\n" \ " sub %0,%1,%0 ; Subtract it.\n" \ : "=r" (count), "=r" (__tmp) : "1" (x)); \ } while (0) #endif /* hppa */ /* These macros are for ABI=2.0w. In ABI=2.0n they can't be used, since GCC (3.2) puts longlong into two adjacent 32-bit registers. Presumably this is just a case of no direct support for 2.0n but treating it like 1.0. */ #if defined (__hppa) && W_TYPE_SIZE == 64 && ! defined (_LONG_LONG_LIMB) #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add%I5 %5,%r4,%1\n\tadd,dc %r2,%r3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rM" (ah), "rM" (bh), "%rM" (al), "rI" (bl)) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("sub%I4 %4,%r5,%1\n\tsub,db %r2,%r3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rM" (ah), "rM" (bh), "rI" (al), "rM" (bl)) #endif /* hppa */ #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32 #define smul_ppmm(xh, xl, m0, m1) \ do { \ union {DItype __ll; \ struct {USItype __h, __l;} __i; \ } __x; \ __asm__ ("lr %N0,%1\n\tmr %0,%2" \ : "=&r" (__x.__ll) \ : "r" (m0), "r" (m1)); \ (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ } while (0) #define sdiv_qrnnd(q, r, n1, n0, d) \ do { \ union {DItype __ll; \ struct {USItype __h, __l;} __i; \ } __x; \ __x.__i.__h = n1; __x.__i.__l = n0; \ __asm__ ("dr %0,%2" \ : "=r" (__x.__ll) \ : "0" (__x.__ll), "r" (d)); \ (q) = __x.__i.__l; (r) = __x.__i.__h; \ } while (0) #endif #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addl %5,%k1\n\tadcl %3,%k0" \ : "=r" (sh), "=&r" (sl) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "%1" ((USItype)(al)), "g" ((USItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("subl %5,%k1\n\tsbbl %3,%k0" \ : "=r" (sh), "=&r" (sl) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "1" ((USItype)(al)), "g" ((USItype)(bl))) #define umul_ppmm(w1, w0, u, v) \ __asm__ ("mull %3" \ : "=a" (w0), "=d" (w1) \ : "%0" ((USItype)(u)), "rm" ((USItype)(v))) #define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\ __asm__ ("divl %4" /* stringification in K&R C */ \ : "=a" (q), "=d" (r) \ : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(dx))) #if HAVE_HOST_CPU_i586 || HAVE_HOST_CPU_pentium || HAVE_HOST_CPU_pentiummmx /* Pentium bsrl takes between 10 and 72 cycles depending where the most significant 1 bit is, hence the use of the following alternatives. bsfl is slow too, between 18 and 42 depending where the least significant 1 bit is, so let the generic count_trailing_zeros below make use of the count_leading_zeros here too. */ #if HAVE_HOST_CPU_pentiummmx && ! defined (LONGLONG_STANDALONE) /* The following should be a fixed 14 or 15 cycles, but possibly plus an L1 cache miss reading from __clz_tab. For P55 it's favoured over the float below so as to avoid mixing MMX and x87, since the penalty for switching between the two is about 100 cycles. The asm block sets __shift to -3 if the high 24 bits are clear, -2 for 16, -1 for 8, or 0 otherwise. This could be written equivalently as follows, but as of gcc 2.95.2 it results in conditional jumps. __shift = -(__n < 0x1000000); __shift -= (__n < 0x10000); __shift -= (__n < 0x100); The middle two sbbl and cmpl's pair, and with luck something gcc generates might pair with the first cmpl and the last sbbl. The "32+1" constant could be folded into __clz_tab[], but it doesn't seem worth making a different table just for that. */ #define count_leading_zeros(c,n) \ do { \ USItype __n = (n); \ USItype __shift; \ __asm__ ("cmpl $0x1000000, %1\n" \ "sbbl %0, %0\n" \ "cmpl $0x10000, %1\n" \ "sbbl $0, %0\n" \ "cmpl $0x100, %1\n" \ "sbbl $0, %0\n" \ : "=&r" (__shift) : "r" (__n)); \ __shift = __shift*8 + 24 + 1; \ (c) = 32 + 1 - __shift - __clz_tab[__n >> __shift]; \ } while (0) #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB #define COUNT_LEADING_ZEROS_0 31 /* n==0 indistinguishable from n==1 */ #else /* ! pentiummmx || LONGLONG_STANDALONE */ /* The following should be a fixed 14 cycles or so. Some scheduling opportunities should be available between the float load/store too. This sort of code is used in gcc 3 for __builtin_ffs (with "n&-n") and is apparently suggested by the Intel optimizing manual (don't know exactly where). gcc 2.95 or up will be best for this, so the "double" is correctly aligned on the stack. */ #define count_leading_zeros(c,n) \ do { \ union { \ double d; \ unsigned a[2]; \ } __u; \ ASSERT ((n) != 0); \ __u.d = (UWtype) (n); \ (c) = 0x3FF + 31 - (__u.a[1] >> 20); \ } while (0) #define COUNT_LEADING_ZEROS_0 (0x3FF + 31) #endif /* pentiummx */ #else /* ! pentium */ #if __GMP_GNUC_PREREQ (3,4) /* using bsrl */ #define count_leading_zeros(count,x) count_leading_zeros_gcc_clz(count,x) #endif /* gcc clz */ /* On P6, gcc prior to 3.0 generates a partial register stall for __cbtmp^31, due to using "xorb $31" instead of "xorl $31", the former being 1 code byte smaller. "31-__cbtmp" is a workaround, probably at the cost of one extra instruction. Do this for "i386" too, since that means generic x86. */ #if ! defined (count_leading_zeros) && __GNUC__ < 3 \ && (HAVE_HOST_CPU_i386 \ || HAVE_HOST_CPU_i686 \ || HAVE_HOST_CPU_pentiumpro \ || HAVE_HOST_CPU_pentium2 \ || HAVE_HOST_CPU_pentium3) #define count_leading_zeros(count, x) \ do { \ USItype __cbtmp; \ ASSERT ((x) != 0); \ __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ (count) = 31 - __cbtmp; \ } while (0) #endif /* gcc<3 asm bsrl */ #ifndef count_leading_zeros #define count_leading_zeros(count, x) \ do { \ USItype __cbtmp; \ ASSERT ((x) != 0); \ __asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ (count) = __cbtmp ^ 31; \ } while (0) #endif /* asm bsrl */ #if __GMP_GNUC_PREREQ (3,4) /* using bsfl */ #define count_trailing_zeros(count,x) count_trailing_zeros_gcc_ctz(count,x) #endif /* gcc ctz */ #ifndef count_trailing_zeros #define count_trailing_zeros(count, x) \ do { \ ASSERT ((x) != 0); \ __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x))); \ } while (0) #endif /* asm bsfl */ #endif /* ! pentium */ #ifndef UMUL_TIME #define UMUL_TIME 10 #endif #ifndef UDIV_TIME #define UDIV_TIME 40 #endif #endif /* 80x86 */ #if defined (__amd64__) && W_TYPE_SIZE == 64 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addq %5,%q1\n\tadcq %3,%q0" \ : "=r" (sh), "=&r" (sl) \ : "0" ((UDItype)(ah)), "rme" ((UDItype)(bh)), \ "%1" ((UDItype)(al)), "rme" ((UDItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("subq %5,%q1\n\tsbbq %3,%q0" \ : "=r" (sh), "=&r" (sl) \ : "0" ((UDItype)(ah)), "rme" ((UDItype)(bh)), \ "1" ((UDItype)(al)), "rme" ((UDItype)(bl))) #define umul_ppmm(w1, w0, u, v) \ __asm__ ("mulq %3" \ : "=a" (w0), "=d" (w1) \ : "%0" ((UDItype)(u)), "rm" ((UDItype)(v))) #define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\ __asm__ ("divq %4" /* stringification in K&R C */ \ : "=a" (q), "=d" (r) \ : "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx))) /* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */ #define count_leading_zeros(count, x) \ do { \ UDItype __cbtmp; \ ASSERT ((x) != 0); \ __asm__ ("bsrq %1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x))); \ (count) = __cbtmp ^ 63; \ } while (0) /* bsfq destination must be a 64-bit register, "%q0" forces this in case count is only an int. */ #define count_trailing_zeros(count, x) \ do { \ ASSERT ((x) != 0); \ __asm__ ("bsfq %1,%q0" : "=r" (count) : "rm" ((UDItype)(x))); \ } while (0) #endif /* x86_64 */ #if defined (__i860__) && W_TYPE_SIZE == 32 #define rshift_rhlc(r,h,l,c) \ __asm__ ("shr %3,r0,r0\;shrd %1,%2,%0" \ "=r" (r) : "r" (h), "r" (l), "rn" (c)) #endif /* i860 */ #if defined (__i960__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("cmpo 1,0\;addc %5,%4,%1\;addc %3,%2,%0" \ : "=r" (sh), "=&r" (sl) \ : "dI" (ah), "dI" (bh), "%dI" (al), "dI" (bl)) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("cmpo 0,0\;subc %5,%4,%1\;subc %3,%2,%0" \ : "=r" (sh), "=&r" (sl) \ : "dI" (ah), "dI" (bh), "dI" (al), "dI" (bl)) #define umul_ppmm(w1, w0, u, v) \ ({union {UDItype __ll; \ struct {USItype __l, __h;} __i; \ } __x; \ __asm__ ("emul %2,%1,%0" \ : "=d" (__x.__ll) : "%dI" (u), "dI" (v)); \ (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) #define __umulsidi3(u, v) \ ({UDItype __w; \ __asm__ ("emul %2,%1,%0" : "=d" (__w) : "%dI" (u), "dI" (v)); \ __w; }) #define udiv_qrnnd(q, r, nh, nl, d) \ do { \ union {UDItype __ll; \ struct {USItype __l, __h;} __i; \ } __nn; \ __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ __asm__ ("ediv %d,%n,%0" \ : "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d)); \ (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ } while (0) #define count_leading_zeros(count, x) \ do { \ USItype __cbtmp; \ __asm__ ("scanbit %1,%0" : "=r" (__cbtmp) : "r" (x)); \ (count) = __cbtmp ^ 31; \ } while (0) #define COUNT_LEADING_ZEROS_0 (-32) /* sic */ #if defined (__i960mx) /* what is the proper symbol to test??? */ #define rshift_rhlc(r,h,l,c) \ do { \ union {UDItype __ll; \ struct {USItype __l, __h;} __i; \ } __nn; \ __nn.__i.__h = (h); __nn.__i.__l = (l); \ __asm__ ("shre %2,%1,%0" : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ } #endif /* i960mx */ #endif /* i960 */ #if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \ || defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \ || defined (__mc5307__)) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \ : "=d" (sh), "=&d" (sl) \ : "0" ((USItype)(ah)), "d" ((USItype)(bh)), \ "%1" ((USItype)(al)), "g" ((USItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \ : "=d" (sh), "=&d" (sl) \ : "0" ((USItype)(ah)), "d" ((USItype)(bh)), \ "1" ((USItype)(al)), "g" ((USItype)(bl))) /* The '020, '030, '040 and CPU32 have 32x32->64 and 64/32->32q-32r. */ #if defined (__mc68020__) || defined(mc68020) \ || defined (__mc68030__) || defined (mc68030) \ || defined (__mc68040__) || defined (mc68040) \ || defined (__mcpu32__) || defined (mcpu32) \ || defined (__NeXT__) #define umul_ppmm(w1, w0, u, v) \ __asm__ ("mulu%.l %3,%1:%0" \ : "=d" (w0), "=d" (w1) \ : "%0" ((USItype)(u)), "dmi" ((USItype)(v))) #define UMUL_TIME 45 #define udiv_qrnnd(q, r, n1, n0, d) \ __asm__ ("divu%.l %4,%1:%0" \ : "=d" (q), "=d" (r) \ : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d))) #define UDIV_TIME 90 #define sdiv_qrnnd(q, r, n1, n0, d) \ __asm__ ("divs%.l %4,%1:%0" \ : "=d" (q), "=d" (r) \ : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d))) #else /* for other 68k family members use 16x16->32 multiplication */ #define umul_ppmm(xh, xl, a, b) \ do { USItype __umul_tmp1, __umul_tmp2; \ __asm__ ("| Inlined umul_ppmm\n" \ " move%.l %5,%3\n" \ " move%.l %2,%0\n" \ " move%.w %3,%1\n" \ " swap %3\n" \ " swap %0\n" \ " mulu%.w %2,%1\n" \ " mulu%.w %3,%0\n" \ " mulu%.w %2,%3\n" \ " swap %2\n" \ " mulu%.w %5,%2\n" \ " add%.l %3,%2\n" \ " jcc 1f\n" \ " add%.l %#0x10000,%0\n" \ "1: move%.l %2,%3\n" \ " clr%.w %2\n" \ " swap %2\n" \ " swap %3\n" \ " clr%.w %3\n" \ " add%.l %3,%1\n" \ " addx%.l %2,%0\n" \ " | End inlined umul_ppmm" \ : "=&d" (xh), "=&d" (xl), \ "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ } while (0) #define UMUL_TIME 100 #define UDIV_TIME 400 #endif /* not mc68020 */ /* The '020, '030, '040 and '060 have bitfield insns. GCC 3.4 defines __mc68020__ when in CPU32 mode, check for __mcpu32__ to exclude bfffo on that chip (bitfield insns not available). */ #if (defined (__mc68020__) || defined (mc68020) \ || defined (__mc68030__) || defined (mc68030) \ || defined (__mc68040__) || defined (mc68040) \ || defined (__mc68060__) || defined (mc68060) \ || defined (__NeXT__)) \ && ! defined (__mcpu32__) #define count_leading_zeros(count, x) \ __asm__ ("bfffo %1{%b2:%b2},%0" \ : "=d" (count) \ : "od" ((USItype) (x)), "n" (0)) #define COUNT_LEADING_ZEROS_0 32 #endif #endif /* mc68000 */ #if defined (__m88000__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \ : "=r" (sh), "=&r" (sl) \ : "rJ" (ah), "rJ" (bh), "%rJ" (al), "rJ" (bl)) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \ : "=r" (sh), "=&r" (sl) \ : "rJ" (ah), "rJ" (bh), "rJ" (al), "rJ" (bl)) #define count_leading_zeros(count, x) \ do { \ USItype __cbtmp; \ __asm__ ("ff1 %0,%1" : "=r" (__cbtmp) : "r" (x)); \ (count) = __cbtmp ^ 31; \ } while (0) #define COUNT_LEADING_ZEROS_0 63 /* sic */ #if defined (__m88110__) #define umul_ppmm(wh, wl, u, v) \ do { \ union {UDItype __ll; \ struct {USItype __h, __l;} __i; \ } __x; \ __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ (wh) = __x.__i.__h; \ (wl) = __x.__i.__l; \ } while (0) #define udiv_qrnnd(q, r, n1, n0, d) \ ({union {UDItype __ll; \ struct {USItype __h, __l;} __i; \ } __x, __q; \ __x.__i.__h = (n1); __x.__i.__l = (n0); \ __asm__ ("divu.d %0,%1,%2" \ : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) #define UMUL_TIME 5 #define UDIV_TIME 25 #else #define UMUL_TIME 17 #define UDIV_TIME 150 #endif /* __m88110__ */ #endif /* __m88000__ */ #if defined (__mips) && W_TYPE_SIZE == 32 #if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 #define umul_ppmm(w1, w0, u, v) \ __asm__ ("multu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v)) #else #define umul_ppmm(w1, w0, u, v) \ __asm__ ("multu %2,%3\n\tmflo %0\n\tmfhi %1" \ : "=d" (w0), "=d" (w1) : "d" (u), "d" (v)) #endif #define UMUL_TIME 10 #define UDIV_TIME 100 #endif /* __mips */ #if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64 #if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 #define umul_ppmm(w1, w0, u, v) \ __asm__ ("dmultu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v)) #else #define umul_ppmm(w1, w0, u, v) \ __asm__ ("dmultu %2,%3\n\tmflo %0\n\tmfhi %1" \ : "=d" (w0), "=d" (w1) : "d" (u), "d" (v)) #endif #define UMUL_TIME 20 #define UDIV_TIME 140 #endif /* __mips */ #if defined (__ns32000__) && W_TYPE_SIZE == 32 #define umul_ppmm(w1, w0, u, v) \ ({union {UDItype __ll; \ struct {USItype __l, __h;} __i; \ } __x; \ __asm__ ("meid %2,%0" \ : "=g" (__x.__ll) \ : "%0" ((USItype)(u)), "g" ((USItype)(v))); \ (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) #define __umulsidi3(u, v) \ ({UDItype __w; \ __asm__ ("meid %2,%0" \ : "=g" (__w) \ : "%0" ((USItype)(u)), "g" ((USItype)(v))); \ __w; }) #define udiv_qrnnd(q, r, n1, n0, d) \ ({union {UDItype __ll; \ struct {USItype __l, __h;} __i; \ } __x; \ __x.__i.__h = (n1); __x.__i.__l = (n0); \ __asm__ ("deid %2,%0" \ : "=g" (__x.__ll) \ : "0" (__x.__ll), "g" ((USItype)(d))); \ (r) = __x.__i.__l; (q) = __x.__i.__h; }) #define count_trailing_zeros(count,x) \ do { \ __asm__ ("ffsd %2,%0" \ : "=r" (count) \ : "0" ((USItype) 0), "r" ((USItype) (x))); \ } while (0) #endif /* __ns32000__ */ /* In the past we had a block of various #defines tested _ARCH_PPC - AIX _ARCH_PWR - AIX __powerpc__ - gcc __POWERPC__ - BEOS __ppc__ - Darwin PPC - old gcc, GNU/Linux, SysV The plain PPC test was not good for vxWorks, since PPC is defined on all CPUs there (eg. m68k too), as a constant one is expected to compare CPU_FAMILY against. At any rate, this was pretty unattractive and a bit fragile. The use of HAVE_HOST_CPU_FAMILY is designed to cut through it all and be sure of getting the desired effect. ENHANCE-ME: We should test _IBMR2 here when we add assembly support for the system vendor compilers. (Is that vendor compilers with inline asm, or what?) */ #if (HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc) \ && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (bh) && (bh) == 0) \ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ else \ __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ } while (0) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (ah) && (ah) == 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ else if (__builtin_constant_p (bh) && (bh) == 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ else \ __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ } while (0) #define count_leading_zeros(count, x) \ __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x)) #define COUNT_LEADING_ZEROS_0 32 #if HAVE_HOST_CPU_FAMILY_powerpc #define umul_ppmm(ph, pl, m0, m1) \ do { \ USItype __m0 = (m0), __m1 = (m1); \ __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ (pl) = __m0 * __m1; \ } while (0) #define UMUL_TIME 15 #define smul_ppmm(ph, pl, m0, m1) \ do { \ SItype __m0 = (m0), __m1 = (m1); \ __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ (pl) = __m0 * __m1; \ } while (0) #define SMUL_TIME 14 #define UDIV_TIME 120 #else #define UMUL_TIME 8 #define smul_ppmm(xh, xl, m0, m1) \ __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1)) #define SMUL_TIME 4 #define sdiv_qrnnd(q, r, nh, nl, d) \ __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d)) #define UDIV_TIME 100 #endif #endif /* 32-bit POWER architecture variants. */ /* We should test _IBMR2 here when we add assembly support for the system vendor compilers. */ #if HAVE_HOST_CPU_FAMILY_powerpc && W_TYPE_SIZE == 64 #if !defined (_LONG_LONG_LIMB) /* _LONG_LONG_LIMB is ABI=mode32 where adde operates on 32-bit values. So use adde etc only when not _LONG_LONG_LIMB. */ #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (bh) && (bh) == 0) \ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ else \ __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ } while (0) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (ah) && (ah) == 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ else if (__builtin_constant_p (bh) && (bh) == 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ else \ __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ : "=r" (sh), "=&r" (sl) \ : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ } while (0) #endif /* ! _LONG_LONG_LIMB */ #define count_leading_zeros(count, x) \ __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x)) #define COUNT_LEADING_ZEROS_0 64 #define umul_ppmm(ph, pl, m0, m1) \ do { \ UDItype __m0 = (m0), __m1 = (m1); \ __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ (pl) = __m0 * __m1; \ } while (0) #define UMUL_TIME 15 #define smul_ppmm(ph, pl, m0, m1) \ do { \ DItype __m0 = (m0), __m1 = (m1); \ __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ (pl) = __m0 * __m1; \ } while (0) #define SMUL_TIME 14 /* ??? */ #define UDIV_TIME 120 /* ??? */ #endif /* 64-bit PowerPC. */ #if defined (__pyr__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addw %5,%1\n\taddwc %3,%0" \ : "=r" (sh), "=&r" (sl) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "%1" ((USItype)(al)), "g" ((USItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("subw %5,%1\n\tsubwb %3,%0" \ : "=r" (sh), "=&r" (sl) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "1" ((USItype)(al)), "g" ((USItype)(bl))) /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ #define umul_ppmm(w1, w0, u, v) \ ({union {UDItype __ll; \ struct {USItype __h, __l;} __i; \ } __x; \ __asm__ ("movw %1,%R0\n\tuemul %2,%0" \ : "=&r" (__x.__ll) \ : "g" ((USItype) (u)), "g" ((USItype)(v))); \ (w1) = __x.__i.__h; (w0) = __x.__i.__l;}) #endif /* __pyr__ */ #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("a %1,%5\n\tae %0,%3" \ : "=r" (sh), "=&r" (sl) \ : "0" ((USItype)(ah)), "r" ((USItype)(bh)), \ "%1" ((USItype)(al)), "r" ((USItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("s %1,%5\n\tse %0,%3" \ : "=r" (sh), "=&r" (sl) \ : "0" ((USItype)(ah)), "r" ((USItype)(bh)), \ "1" ((USItype)(al)), "r" ((USItype)(bl))) #define smul_ppmm(ph, pl, m0, m1) \ __asm__ ( \ "s r2,r2\n" \ " mts r10,%2\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " m r2,%3\n" \ " cas %0,r2,r0\n" \ " mfs r10,%1" \ : "=r" (ph), "=r" (pl) \ : "%r" ((USItype)(m0)), "r" ((USItype)(m1)) \ : "r2") #define UMUL_TIME 20 #define UDIV_TIME 200 #define count_leading_zeros(count, x) \ do { \ if ((x) >= 0x10000) \ __asm__ ("clz %0,%1" \ : "=r" (count) : "r" ((USItype)(x) >> 16)); \ else \ { \ __asm__ ("clz %0,%1" \ : "=r" (count) : "r" ((USItype)(x))); \ (count) += 16; \ } \ } while (0) #endif /* RT/ROMP */ #if defined (__sh2__) && W_TYPE_SIZE == 32 #define umul_ppmm(w1, w0, u, v) \ __asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0" \ : "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach") #define UMUL_TIME 5 #endif #if defined (__sparc__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl) \ __CLOBBER_CC) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl) \ __CLOBBER_CC) /* FIXME: When gcc -mcpu=v9 is used on solaris, gcc/config/sol2-sld-64.h doesn't define anything to indicate that to us, it only sets __sparcv8. */ #if defined (__sparc_v9__) || defined (__sparcv9) /* Perhaps we should use floating-point operations here? */ #if 0 /* Triggers a bug making mpz/tests/t-gcd.c fail. Perhaps we simply need explicitly zero-extend the inputs? */ #define umul_ppmm(w1, w0, u, v) \ __asm__ ("mulx %2,%3,%%g1; srl %%g1,0,%1; srlx %%g1,32,%0" : \ "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "g1") #else /* Use v8 umul until above bug is fixed. */ #define umul_ppmm(w1, w0, u, v) \ __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) #endif /* Use a plain v8 divide for v9. */ #define udiv_qrnnd(q, r, n1, n0, d) \ do { \ USItype __q; \ __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \ (r) = (n0) - __q * (d); \ (q) = __q; \ } while (0) #else #if defined (__sparc_v8__) /* gcc normal */ \ || defined (__sparcv8) /* gcc solaris */ \ || HAVE_HOST_CPU_supersparc /* Don't match immediate range because, 1) it is not often useful, 2) the 'I' flag thinks of the range as a 13 bit signed interval, while we want to match a 13 bit interval, sign extended to 32 bits, but INTERPRETED AS UNSIGNED. */ #define umul_ppmm(w1, w0, u, v) \ __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) #define UMUL_TIME 5 #if HAVE_HOST_CPU_supersparc #define UDIV_TIME 60 /* SuperSPARC timing */ #else /* Don't use this on SuperSPARC because its udiv only handles 53 bit dividends and will trap to the kernel for the rest. */ #define udiv_qrnnd(q, r, n1, n0, d) \ do { \ USItype __q; \ __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \ (r) = (n0) - __q * (d); \ (q) = __q; \ } while (0) #define UDIV_TIME 25 #endif /* HAVE_HOST_CPU_supersparc */ #else /* ! __sparc_v8__ */ #if defined (__sparclite__) /* This has hardware multiply but not divide. It also has two additional instructions scan (ffs from high bit) and divscc. */ #define umul_ppmm(w1, w0, u, v) \ __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) #define UMUL_TIME 5 #define udiv_qrnnd(q, r, n1, n0, d) \ __asm__ ("! Inlined udiv_qrnnd\n" \ " wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ " tst %%g0\n" \ " divscc %3,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%%g1\n" \ " divscc %%g1,%4,%0\n" \ " rd %%y,%1\n" \ " bl,a 1f\n" \ " add %1,%4,%1\n" \ "1: ! End of inline udiv_qrnnd" \ : "=r" (q), "=r" (r) : "r" (n1), "r" (n0), "rI" (d) \ : "%g1" __AND_CLOBBER_CC) #define UDIV_TIME 37 #define count_leading_zeros(count, x) \ __asm__ ("scan %1,1,%0" : "=r" (count) : "r" (x)) /* Early sparclites return 63 for an argument of 0, but they warn that future implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0 undefined. */ #endif /* __sparclite__ */ #endif /* __sparc_v8__ */ #endif /* __sparc_v9__ */ /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ #ifndef umul_ppmm #define umul_ppmm(w1, w0, u, v) \ __asm__ ("! Inlined umul_ppmm\n" \ " wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \ " sra %3,31,%%g2 ! Don't move this insn\n" \ " and %2,%%g2,%%g2 ! Don't move this insn\n" \ " andcc %%g0,0,%%g1 ! Don't move this insn\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,%3,%%g1\n" \ " mulscc %%g1,0,%%g1\n" \ " add %%g1,%%g2,%0\n" \ " rd %%y,%1" \ : "=r" (w1), "=r" (w0) : "%rI" (u), "r" (v) \ : "%g1", "%g2" __AND_CLOBBER_CC) #define UMUL_TIME 39 /* 39 instructions */ #endif #ifndef udiv_qrnnd #ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ do { UWtype __r; \ (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \ (r) = __r; \ } while (0) extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype)); #ifndef UDIV_TIME #define UDIV_TIME 140 #endif #endif /* LONGLONG_STANDALONE */ #endif /* udiv_qrnnd */ #endif /* __sparc__ */ #if defined (__sparc__) && W_TYPE_SIZE == 64 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ( \ "addcc %r4,%5,%1\n" \ " addccc %r6,%7,%%g0\n" \ " addc %r2,%3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl), \ "%rJ" ((al) >> 32), "rI" ((bl) >> 32) \ __CLOBBER_CC) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ( \ "subcc %r4,%5,%1\n" \ " subccc %r6,%7,%%g0\n" \ " subc %r2,%3,%0" \ : "=r" (sh), "=&r" (sl) \ : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl), \ "rJ" ((al) >> 32), "rI" ((bl) >> 32) \ __CLOBBER_CC) #endif #if defined (__vax__) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \ : "=g" (sh), "=&g" (sl) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "%1" ((USItype)(al)), "g" ((USItype)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \ : "=g" (sh), "=&g" (sl) \ : "0" ((USItype)(ah)), "g" ((USItype)(bh)), \ "1" ((USItype)(al)), "g" ((USItype)(bl))) #define smul_ppmm(xh, xl, m0, m1) \ do { \ union {UDItype __ll; \ struct {USItype __l, __h;} __i; \ } __x; \ USItype __m0 = (m0), __m1 = (m1); \ __asm__ ("emul %1,%2,$0,%0" \ : "=g" (__x.__ll) : "g" (__m0), "g" (__m1)); \ (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ } while (0) #define sdiv_qrnnd(q, r, n1, n0, d) \ do { \ union {DItype __ll; \ struct {SItype __l, __h;} __i; \ } __x; \ __x.__i.__h = n1; __x.__i.__l = n0; \ __asm__ ("ediv %3,%2,%0,%1" \ : "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d)); \ } while (0) #if 0 /* FIXME: This instruction appears to be unimplemented on some systems (vax 8800 maybe). */ #define count_trailing_zeros(count,x) \ do { \ __asm__ ("ffs 0, 31, %1, %0" \ : "=g" (count) \ : "g" ((USItype) (x))); \ } while (0) #endif #endif /* __vax__ */ #if defined (__z8000__) && W_TYPE_SIZE == 16 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ : "=r" (sh), "=&r" (sl) \ : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \ "%1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl))) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ : "=r" (sh), "=&r" (sl) \ : "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \ "1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl))) #define umul_ppmm(xh, xl, m0, m1) \ do { \ union {long int __ll; \ struct {unsigned int __h, __l;} __i; \ } __x; \ unsigned int __m0 = (m0), __m1 = (m1); \ __asm__ ("mult %S0,%H3" \ : "=r" (__x.__i.__h), "=r" (__x.__i.__l) \ : "%1" (m0), "rQR" (m1)); \ (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ (xh) += ((((signed int) __m0 >> 15) & __m1) \ + (((signed int) __m1 >> 15) & __m0)); \ } while (0) #endif /* __z8000__ */ #endif /* __GNUC__ */ #endif /* NO_ASM */ #if !defined (umul_ppmm) && defined (__umulsidi3) #define umul_ppmm(ph, pl, m0, m1) \ { \ UDWtype __ll = __umulsidi3 (m0, m1); \ ph = (UWtype) (__ll >> W_TYPE_SIZE); \ pl = (UWtype) __ll; \ } #endif #if !defined (__umulsidi3) #define __umulsidi3(u, v) \ ({UWtype __hi, __lo; \ umul_ppmm (__hi, __lo, u, v); \ ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) #endif /* Use mpn_umul_ppmm or mpn_udiv_qrnnd functions, if they exist. The "_r" forms have "reversed" arguments, meaning the pointer is last, which sometimes allows better parameter passing, in particular on 64-bit hppa. */ #define mpn_umul_ppmm __MPN(umul_ppmm) extern UWtype mpn_umul_ppmm _PROTO ((UWtype *, UWtype, UWtype)); #if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm \ && ! defined (LONGLONG_STANDALONE) #define umul_ppmm(wh, wl, u, v) \ do { \ UWtype __umul_ppmm__p0; \ (wh) = mpn_umul_ppmm (&__umul_ppmm__p0, (UWtype) (u), (UWtype) (v)); \ (wl) = __umul_ppmm__p0; \ } while (0) #endif #define mpn_umul_ppmm_r __MPN(umul_ppmm_r) extern UWtype mpn_umul_ppmm_r _PROTO ((UWtype, UWtype, UWtype *)); #if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm_r \ && ! defined (LONGLONG_STANDALONE) #define umul_ppmm(wh, wl, u, v) \ do { \ UWtype __umul_ppmm__p0; \ (wh) = mpn_umul_ppmm_r ((UWtype) (u), (UWtype) (v), &__umul_ppmm__p0); \ (wl) = __umul_ppmm__p0; \ } while (0) #endif #define mpn_udiv_qrnnd __MPN(udiv_qrnnd) extern UWtype mpn_udiv_qrnnd _PROTO ((UWtype *, UWtype, UWtype, UWtype)); #if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd \ && ! defined (LONGLONG_STANDALONE) #define udiv_qrnnd(q, r, n1, n0, d) \ do { \ UWtype __udiv_qrnnd__r; \ (q) = mpn_udiv_qrnnd (&__udiv_qrnnd__r, \ (UWtype) (n1), (UWtype) (n0), (UWtype) d); \ (r) = __udiv_qrnnd__r; \ } while (0) #endif #define mpn_udiv_qrnnd_r __MPN(udiv_qrnnd_r) extern UWtype mpn_udiv_qrnnd_r _PROTO ((UWtype, UWtype, UWtype, UWtype *)); #if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd_r \ && ! defined (LONGLONG_STANDALONE) #define udiv_qrnnd(q, r, n1, n0, d) \ do { \ UWtype __udiv_qrnnd__r; \ (q) = mpn_udiv_qrnnd_r ((UWtype) (n1), (UWtype) (n0), (UWtype) d, \ &__udiv_qrnnd__r); \ (r) = __udiv_qrnnd__r; \ } while (0) #endif /* If this machine has no inline assembler, use C macros. */ #if !defined (add_ssaaaa) #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ do { \ UWtype __x; \ __x = (al) + (bl); \ (sh) = (ah) + (bh) + (__x < (al)); \ (sl) = __x; \ } while (0) #endif #if !defined (sub_ddmmss) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ do { \ UWtype __x; \ __x = (al) - (bl); \ (sh) = (ah) - (bh) - ((al) < (bl)); \ (sl) = __x; \ } while (0) #endif /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of smul_ppmm. */ #if !defined (umul_ppmm) && defined (smul_ppmm) #define umul_ppmm(w1, w0, u, v) \ do { \ UWtype __w1; \ UWtype __xm0 = (u), __xm1 = (v); \ smul_ppmm (__w1, w0, __xm0, __xm1); \ (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \ + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \ } while (0) #endif /* If we still don't have umul_ppmm, define it using plain C. For reference, when this code is used for squaring (ie. u and v identical expressions), gcc recognises __x1 and __x2 are the same and generates 3 multiplies, not 4. The subsequent additions could be optimized a bit, but the only place GMP currently uses such a square is mpn_sqr_basecase, and chips obliged to use this generic C umul will have plenty of worse performance problems than a couple of extra instructions on the diagonal of sqr_basecase. */ #if !defined (umul_ppmm) #define umul_ppmm(w1, w0, u, v) \ do { \ UWtype __x0, __x1, __x2, __x3; \ UHWtype __ul, __vl, __uh, __vh; \ UWtype __u = (u), __v = (v); \ \ __ul = __ll_lowpart (__u); \ __uh = __ll_highpart (__u); \ __vl = __ll_lowpart (__v); \ __vh = __ll_highpart (__v); \ \ __x0 = (UWtype) __ul * __vl; \ __x1 = (UWtype) __ul * __vh; \ __x2 = (UWtype) __uh * __vl; \ __x3 = (UWtype) __uh * __vh; \ \ __x1 += __ll_highpart (__x0);/* this can't give carry */ \ __x1 += __x2; /* but this indeed can */ \ if (__x1 < __x2) /* did we get it? */ \ __x3 += __ll_B; /* yes, add it in the proper pos. */ \ \ (w1) = __x3 + __ll_highpart (__x1); \ (w0) = (__x1 << W_TYPE_SIZE/2) + __ll_lowpart (__x0); \ } while (0) #endif /* If we don't have smul_ppmm, define it using umul_ppmm (which surely will exist in one form or another. */ #if !defined (smul_ppmm) #define smul_ppmm(w1, w0, u, v) \ do { \ UWtype __w1; \ UWtype __xm0 = (u), __xm1 = (v); \ umul_ppmm (__w1, w0, __xm0, __xm1); \ (w1) = __w1 - (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \ - (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \ } while (0) #endif /* Define this unconditionally, so it can be used for debugging. */ #define __udiv_qrnnd_c(q, r, n1, n0, d) \ do { \ UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ \ ASSERT ((d) != 0); \ ASSERT ((n1) < (d)); \ \ __d1 = __ll_highpart (d); \ __d0 = __ll_lowpart (d); \ \ __q1 = (n1) / __d1; \ __r1 = (n1) - __q1 * __d1; \ __m = __q1 * __d0; \ __r1 = __r1 * __ll_B | __ll_highpart (n0); \ if (__r1 < __m) \ { \ __q1--, __r1 += (d); \ if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ if (__r1 < __m) \ __q1--, __r1 += (d); \ } \ __r1 -= __m; \ \ __q0 = __r1 / __d1; \ __r0 = __r1 - __q0 * __d1; \ __m = __q0 * __d0; \ __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ if (__r0 < __m) \ { \ __q0--, __r0 += (d); \ if (__r0 >= (d)) \ if (__r0 < __m) \ __q0--, __r0 += (d); \ } \ __r0 -= __m; \ \ (q) = __q1 * __ll_B | __q0; \ (r) = __r0; \ } while (0) /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through __udiv_w_sdiv (defined in libgcc or elsewhere). */ #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) #define udiv_qrnnd(q, r, nh, nl, d) \ do { \ UWtype __r; \ (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ (r) = __r; \ } while (0) #endif /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ #if !defined (udiv_qrnnd) #define UDIV_NEEDS_NORMALIZATION 1 #define udiv_qrnnd __udiv_qrnnd_c #endif #if !defined (count_leading_zeros) #define count_leading_zeros(count, x) \ do { \ UWtype __xr = (x); \ UWtype __a; \ \ if (W_TYPE_SIZE == 32) \ { \ __a = __xr < ((UWtype) 1 << 2*__BITS4) \ ? (__xr < ((UWtype) 1 << __BITS4) ? 1 : __BITS4 + 1) \ : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 + 1 \ : 3*__BITS4 + 1); \ } \ else \ { \ for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ if (((__xr >> __a) & 0xff) != 0) \ break; \ ++__a; \ } \ \ (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \ } while (0) /* This version gives a well-defined value for zero. */ #define COUNT_LEADING_ZEROS_0 (W_TYPE_SIZE - 1) #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB #endif /* clz_tab needed by mpn/x86/pentium/mod_1.asm in a fat binary */ #if HAVE_HOST_CPU_FAMILY_x86 && WANT_FAT_BINARY #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB #endif #ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB extern const unsigned char __GMP_DECLSPEC __clz_tab[128]; #endif #if !defined (count_trailing_zeros) /* Define count_trailing_zeros using count_leading_zeros. The latter might be defined in asm, but if it is not, the C version above is good enough. */ #define count_trailing_zeros(count, x) \ do { \ UWtype __ctz_x = (x); \ UWtype __ctz_c; \ ASSERT (__ctz_x != 0); \ count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \ (count) = W_TYPE_SIZE - 1 - __ctz_c; \ } while (0) #endif #ifndef UDIV_NEEDS_NORMALIZATION #define UDIV_NEEDS_NORMALIZATION 0 #endif /* Whether udiv_qrnnd is actually implemented with udiv_qrnnd_preinv, and that hence the latter should always be used. */ #ifndef UDIV_PREINV_ALWAYS #define UDIV_PREINV_ALWAYS 0 #endif /* Give defaults for UMUL_TIME and UDIV_TIME. */ #ifndef UMUL_TIME #define UMUL_TIME 1 #endif #ifndef UDIV_TIME #define UDIV_TIME UMUL_TIME #endif flint-1.011/ZmodF_mul.c0000644017361200017500000007241411025357254014573 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** ZmodF_mul.c Copyright (C) 2007, David Harvey and William Hart Routines for multiplication of elements of Z/pZ where p = B^n + 1, B = 2^FLINT_BITS. ******************************************************************************/ #include #include "ZmodF.h" #include "ZmodF_poly.h" #include "ZmodF_mul.h" #include "mpn_extras.h" #include "ZmodF_mul-tuning.h" #include "longlong_wrapper.h" #include "longlong.h" /****************************************************************************** Plain multiplication ******************************************************************************/ /* Normalises a and b, and then attempts to multiply them mod p, putting result in res. It only succeeds if one of the inputs is exactly -1 mod p, in which case it returns 1. Otherwise it just returns 0. */ long _ZmodF_mul_handle_minus1(ZmodF_t res, ZmodF_t a, ZmodF_t b, unsigned long n) { ZmodF_normalise(a, n); ZmodF_normalise(b, n); if (a[n]) { // a = -1 mod p ZmodF_neg(res, b, n); return 1; } if (b[n]) { // b = -1 mod p ZmodF_neg(res, a, n); return 1; } return 0; } /* Same as _ZmodF_mul_handle_minus1, but for squaring. */ long _ZmodF_sqr_handle_minus1(ZmodF_t res, ZmodF_t a, unsigned long n) { ZmodF_normalise(a, n); if (a[n]) { // a = -1 mod p if (a == res) // handle aliasing res[n] = 0; else ZmodF_zero(res, n); res[0] = 1; return 1; } return 0; } /* Computes res := a * b mod p = B^n + 1, where a and b are of length n. scratch must be of length 2n, and not overlap any of a, b, res. a and b must be normalised, and have zero overflow limbs. Any combinations of a, b, res may be aliased. The output is not necessarily normalised. */ void _ZmodF_mul(ZmodF_t res, ZmodF_t a, ZmodF_t b, mp_limb_t* scratch, unsigned long n) { FLINT_ASSERT(a[n] == 0); FLINT_ASSERT(b[n] == 0); // Detect zero limbs at the top of a and b, and reduce multiplication size // appropriately. // (Note: the reason we do this is that in the FFTs we often *do* get // Fourier coefficients with plenty of zeroes; for example the first and // second coefficients are x0 + x1 + ... + xM and x0 - x1 + x2 - ... - xM, // which are about half the size of the other coefficients. The overhead // from performing the checks is negligible if n is large enough, and we // expect say n >= 16 if the FFTs are tuned correctly.) unsigned long limbs_out = 2*n; unsigned long limbs1 = n; while (limbs1 && !a[limbs1-1]) { scratch[--limbs_out] = 0; limbs1--; } unsigned long limbs2 = n; while (limbs2 && !b[limbs2-1]) { scratch[--limbs_out] = 0; limbs2--; } if ((limbs1 == 0) || (limbs2 == 0)) { F_mpn_clear(res, n+1); return; } // do the product into scratch if (limbs1 >= limbs2) F_mpn_mul(scratch, a, limbs1, b, limbs2); else F_mpn_mul(scratch, b, limbs2, a, limbs1); // reduce mod p res[n] = -mpn_sub_n(res, scratch, scratch + n, n); } void ZmodF_mul(ZmodF_t res, ZmodF_t a, ZmodF_t b, mp_limb_t* scratch, unsigned long n) { // try special cases a = -1 or b = -1 mod p if (_ZmodF_mul_handle_minus1(res, a, b, n)) return; // that didn't work, run ordinary multiplication _ZmodF_mul(res, a, b, scratch, n); } void ZmodF_sqr(ZmodF_t res, ZmodF_t a, mp_limb_t* scratch, unsigned long n) { // try special case a = -1 mod p if (_ZmodF_sqr_handle_minus1(res, a, n)) return; // that didn't work, run ordinary multiplication _ZmodF_mul(res, a, a, scratch, n); } /****************************************************************************** ZmodF_mul_info initialisation routines ******************************************************************************/ /* Initialises info to use plain mpn_mul_n for multiplication. */ void ZmodF_mul_info_init_plain(ZmodF_mul_info_t info, unsigned long n, int squaring) { info->n = n; info->squaring = squaring; info->algo = ZMODF_MUL_ALGO_PLAIN; info->scratch = (mp_limb_t*) flint_stack_alloc(2*n); } /* Initialises info to use the threeway algorithm for multiplication. PRECONDITIONS: n must be divisible by 3 */ void ZmodF_mul_info_init_threeway(ZmodF_mul_info_t info, unsigned long n, int squaring) { FLINT_ASSERT(n % 3 == 0); info->n = n; info->squaring = squaring; info->m = n/3; info->algo = ZMODF_MUL_ALGO_THREEWAY; // todo: maybe can use less memory here when squaring: info->scratch = (mp_limb_t*) flint_stack_alloc(3*n + 1); } /* Initialises info to use FFT algorithm for multiplication. If m == 0, it will automatically choose a suitable value for m, and will choose k = 0. (This won't always be optimal, but it should be pretty close.) PRECONDITIONS: n*FLINT_BITS must be divisible by 2^depth (so that the input can be broken up into 2^depth pieces) 0 <= k <= 2 k <= m m*FLINT_BITS must be divisible by 2^depth (so that the coefficients support a negacyclic FFT of the given depth). (m+k)*FLINT_BITS >= 2*n*FLINT_BITS/2^depth + 1 + depth (so that getting the convolution mod (B^m + 1)*B^k determines the result uniquely) */ void ZmodF_mul_info_init_fft(ZmodF_mul_info_t info, unsigned long n, unsigned long depth, unsigned long m, unsigned long k, int squaring) { FLINT_ASSERT((n * FLINT_BITS) % (1 << depth) == 0); FLINT_ASSERT(k <= m); info->algo = ZMODF_MUL_ALGO_FFT; info->n = n; info->squaring = squaring; if (!m) { // automatically determine reasonable values for m and k unsigned long input_bits = (n*FLINT_BITS) >> depth; unsigned long output_bits = 2*input_bits + 1 + depth; m = ((output_bits - 1) >> FLINT_LG_BITS_PER_LIMB) + 1; k = 0; // m needs to be divisible by 2^shift unsigned long shift = 0; if (depth > FLINT_LG_BITS_PER_LIMB) shift = depth - FLINT_LG_BITS_PER_LIMB; // if m is in the feasible range for the threeway algorithm, try to // round it up/down to a multiple of 3 if (m < ZmodF_mul_threeway_fft_threshold) { // first try rounding down to a multiple of 3, using k to compensate // (this only works if the smaller m satisfies the right divisibility // conditions) unsigned long smaller_m = (m / 3) * 3; if (((smaller_m >> shift) << shift) == smaller_m) { k = m - smaller_m; m = smaller_m; } else { // that didn't work; just round up to a multiple of 3*2^shift unsigned long round = 3 << shift; m = (((m-1) / round) + 1) * round; } } else { // threeway not feasible. // first try rounding down to a multiple of 2^shift, using // k to compensate unsigned long smaller_m = (m >> shift) << shift; if (m - smaller_m <= 2) { k = m - smaller_m; m = smaller_m; } else { // that didn't work; just round up to a multiple of 2^shift m = (((m - 1) >> shift) + 1) << shift; } } } FLINT_ASSERT((m*FLINT_BITS) % (1 << depth) == 0); FLINT_ASSERT(k <= 2); FLINT_ASSERT((m+k)*FLINT_BITS >= ((2*n*FLINT_BITS) >> depth) + 1 + depth); info->m = m; info->k = k; // For the ZmodF_poly routines, we'll only need m+1 limbs (i.e. to work // mod B^m + 1). But later on we'll need m+k+1 limbs to store the result // mod (B^m + 1)*B^k, so we allocate these spare limbs in advance. ZmodF_poly_init(info->polys[0], depth, m + k, 1); ZmodF_poly_decrease_n(info->polys[0], m); if (!squaring) { ZmodF_poly_init(info->polys[1], depth, m + k, 1); ZmodF_poly_decrease_n(info->polys[1], m); } // todo: maybe can use less memory here when squaring: if (k) info->scratch = (mp_limb_t*) flint_stack_alloc((3*k) << depth); else info->scratch = NULL; } /* Looks up n in the tuning table and returns optimal negacyclic FFT depth. */ unsigned long _ZmodF_mul_best_fft_depth(unsigned long n, int squaring) { unsigned long* table = squaring ? ZmodF_sqr_fft_table : ZmodF_mul_fft_table; unsigned long i; for (i = 0; table[i]; i++) if (n < table[i]) return i + 3; // We've gone beyond the end of the table; need to choose a value // somewhat heuristically. We extrapolate from the last table entry, // assuming that the convolution length should be proportional to // sqrt(total bitsize). unsigned long depth = i + 3 + (unsigned long) floor(log(1.0 * n / table[i-1]) / log(4.0)); // need n*FLINT_BITS divisible by 2^depth while ((n*FLINT_BITS) & ((1 << depth) - 1)) depth--; return depth; } void ZmodF_mul_info_init(ZmodF_mul_info_t info, unsigned long n, int squaring) { if (!squaring) { if (n < ZmodF_mul_plain_threeway_threshold) { ZmodF_mul_info_init_plain(info, n, 0); return; } if (n % 3 == 0) { if (n < ZmodF_mul_threeway_fft_threshold) { ZmodF_mul_info_init_threeway(info, n, 0); return; } } else { if (n < ZmodF_mul_plain_fft_threshold) { ZmodF_mul_info_init_plain(info, n, 0); return; } } } else { if (n < ZmodF_sqr_plain_threeway_threshold) { ZmodF_mul_info_init_plain(info, n, 1); return; } if (n % 3 == 0) { if (n < ZmodF_sqr_threeway_fft_threshold) { ZmodF_mul_info_init_threeway(info, n, 1); return; } } else { if (n < ZmodF_sqr_plain_fft_threshold) { ZmodF_mul_info_init_plain(info, n, 1); return; } } } unsigned long depth = _ZmodF_mul_best_fft_depth(n, squaring); ZmodF_mul_info_init_fft(info, n, depth, 0, 0, squaring); } void ZmodF_mul_info_clear(ZmodF_mul_info_t info) { if (info->scratch) flint_stack_release(); if (info->algo == ZMODF_MUL_ALGO_FFT) { if (!info->squaring) ZmodF_poly_clear(info->polys[1]); ZmodF_poly_clear(info->polys[0]); } } /****************************************************************************** FFT multiplication routines ******************************************************************************/ /* Splits x into equally sized pieces. That is, let R = (B^n)^(1/M), where M = transform length of "poly". Let x = \sum_{i=0}^{M-1} c_i R^i, where each c_i is in [0, R). This function computes the c_i and stores them as coefficients of "poly". PRECONDITIONS: x must be normalised and of length n, and have zero overflow limb (i.e. is != -1 mod p) n*FLINT_BITS must be divisible by M */ void _ZmodF_mul_fft_split(ZmodF_poly_t poly, ZmodF_t x, unsigned long n) { FLINT_ASSERT((n * FLINT_BITS) % (1 << poly->depth) == 0); FLINT_ASSERT(x[n] == 0); unsigned long size = 1UL << poly->depth; // we'll split x into chunks each having "bits" bits unsigned long bits = (n * FLINT_BITS) >> poly->depth; // round it up to a whole number of limbs unsigned long limbs = ((bits - 1) >> FLINT_LG_BITS_PER_LIMB) + 1; // last_mask is applied to the last limb of each target coefficient to // zero out the bits that don't belong there unsigned long last_mask = (1UL << (bits & (FLINT_BITS-1))) - 1; if (!last_mask) last_mask = -1UL; // [start, end) are the bit-indices into x of the current chunk unsigned long start, end, i; for (i = 0, start = 0, end = bits; i < size; i++, start = end, end += bits) { // figure out which limbs contain the data for this chunk unsigned long start_limb = start >> FLINT_LG_BITS_PER_LIMB; unsigned long end_limb = ((end-1) >> FLINT_LG_BITS_PER_LIMB) + 1; // shift/copy the limbs containing the chunk into the target coefficient unsigned long start_bits = start & (FLINT_BITS-1); if (start_bits) mpn_rshift(poly->coeffs[i], x + start_limb, end_limb - start_limb, start_bits); else F_mpn_copy(poly->coeffs[i], x + start_limb, end_limb - start_limb); // zero out the high bits that shouldn't contribute to this coefficient poly->coeffs[i][limbs-1] &= last_mask; // zero out remaining limbs F_mpn_clear(poly->coeffs[i] + limbs, poly->n + 1 - limbs); } } /* Combines coefficients of poly into a ZmodF_t. More precisely, let the coefficients of "poly" be c_0, ..., c_{M-1}, where M = transform length of poly. The coefficients are assumed to be normalised, and lie in the range [0, q), where q = (B^m + 1)*B^k. (Here "normalised" means that the top m+1 limbs are normalised in the usual ZmodF_t sense; the bottom k limbs are arbitrary.) However the coefficients are interpreted as signed integers in the range (-E, E) where E = B^(m+k)/2. (This is necessary since the output of a negacyclic convolution can have negative coefficients.) The function computes res := \sum_{i=0}^{M-1} c_i R^i (mod p) (not necessarily normalised), where R = (B^n)^(1/M). NOTE: the input poly is destroyed. */ void _ZmodF_mul_fft_combine(ZmodF_t res, ZmodF_poly_t poly, unsigned long m, unsigned long k, unsigned long n) { FLINT_ASSERT((n * FLINT_BITS) % (1 << poly->depth) == 0); FLINT_ASSERT(poly->n > 0); ZmodF_zero(res, n); unsigned long size = 1 << poly->depth; // "bits" is the number of bits apart that each coefficient must be stored unsigned long bits = (n * FLINT_BITS) >> poly->depth; // "start" is the bit-index into x where the current coeff will be stored unsigned long start; long i; // first loop: twiddle bits to deal with signs of coefficients // (do this in reverse order to avoid quadratic running time) for (i = size-1, start = (size-1)*bits; i >= 0; i--, start -= bits) { // start_limb, start_bit indicate where the current coefficient will // be stored; end_limb points beyond the last limb unsigned long start_limb = start >> FLINT_LG_BITS_PER_LIMB; unsigned long start_bit = start & (FLINT_BITS-1); unsigned long end_limb = start_limb + m + k + 1; if (poly->coeffs[i][m + k] || ((mp_limb_signed_t) poly->coeffs[i][m + k - 1] < 0)) { // coefficient is negative, so it was stored as (B^m + 1)*B^k plus // the real coefficient, so we need to correct for that unsigned long fiddle = 1UL << start_bit; // first correct for B^k unsigned long index = start_limb + k; if (index < n) mpn_sub_1(res + index, res + index, n + 1 - index, fiddle); else { // handle negacyclic wraparound index -= n; mpn_add_1(res + index, res + index, n + 1 - index, fiddle); } // now correct for B^(m+k) index = start_limb + m + k; if (index < n) mpn_sub_1(res + index, res + index, n + 1 - index, fiddle); else { // handle negacyclic wraparound index -= n; mpn_add_1(res + index, res + index, n + 1 - index, fiddle); } } } // main loop: add in bulk of coefficients for (i = 0, start = 0; i < size; i++, start += bits) { // start_limb, start_bit indicate where the current coefficient will // be stored; end_limb points beyond the last limb unsigned long start_limb = start >> FLINT_LG_BITS_PER_LIMB; unsigned long start_bit = start & (FLINT_BITS-1); unsigned long end_limb = start_limb + m + k + 1; // shift coefficient to the left to line it up with the spot where // it's going to get added in if (start_bit) mpn_lshift(poly->coeffs[i], poly->coeffs[i], m + k + 1, start_bit); if (end_limb <= n) { // the coefficient fits nicely into the output, so add it in mpn_add(res + start_limb, res + start_limb, n + 1 - start_limb, poly->coeffs[i], m + k + 1); } else { // the coefficient needs to wrap around negacyclically res[n] += mpn_add_n(res + start_limb, res + start_limb, poly->coeffs[i], n - start_limb); mpn_sub(res, res, n + 1, poly->coeffs[i] + n - start_limb, end_limb - n); } } } /* in1 and in2 are two polynomials of length len, with coefficients mod B. This function computes their negacyclic convolution (mod B), stores result at out, also length len. out must not alias in1 or in2. Only naive convolution is implemented. */ void _ZmodF_mul_fft_convolve_modB(unsigned long* out, unsigned long* in1, unsigned long* in2, unsigned long len) { unsigned long i, j; for (i = 0; i < len; i++) out[i] = in1[0] * in2[i]; for (i = 1; i < len; i++) { for (j = 0; j < len - i; j++) out[i+j] += in1[i] * in2[j]; for (; j < len; j++) out[i+j-len] -= in1[i] * in2[j]; } } /* Multiplies (in1_hi*B + in1_lo) by (in2_hi*B + in2_lo) modulo B^2; puts result into (out_hi*B + out_lo). */ void mul_modB2(unsigned long* out_hi, unsigned long* out_lo, unsigned long in1_hi, unsigned long in1_lo, unsigned long in2_hi, unsigned long in2_lo) { umul_ppmm(*out_hi, *out_lo, in1_lo, in2_lo); *out_hi += (in1_hi * in2_lo) + (in2_hi * in1_lo); } /* in1 and in2 are two polynomials of length len, with coefficients mod B^2. This function computes their negacyclic convolution (mod B^2), stores result at out, also length len. Each coefficient uses exactly 2 limbs. out must not alias in1 or in2. Only naive convolution is implemented. */ void _ZmodF_mul_fft_convolve_modB2(unsigned long* out, unsigned long* in1, unsigned long* in2, unsigned long len) { unsigned long i, j; unsigned long hi, lo; for (i = 0; i < len; i++) mul_modB2(out + 2*i+1, out + 2*i, in1[1], in1[0], in2[2*i+1], in2[2*i]); for (i = 1; i < len; i++) { for (j = 0; j < len - i; j++) { mul_modB2(&hi, &lo, in1[2*i+1], in1[2*i], in2[2*j+1], in2[2*j]); add_ssaaaa(out[2*(i+j)+1], out[2*(i+j)], out[2*(i+j)+1], out[2*(i+j)], hi, lo); } for (; j < len; j++) { mul_modB2(&hi, &lo, in1[2*i+1], in1[2*i], in2[2*j+1], in2[2*j]); sub_ddmmss(out[2*(i+j-len)+1], out[2*(i+j-len)], out[2*(i+j-len)+1], out[2*(i+j-len)], hi, lo); } } } /* Reduces an array of ZmodF_t's mod B, stores result at out. */ void _ZmodF_mul_fft_reduce_modB(unsigned long* out, ZmodF_t* in, unsigned long len) { for (unsigned long i = 0; i < len; i++) out[i] = in[i][0]; } /* Reduces an array of ZmodF_t's mod B^2, stores result at out (two limbs per coefficient). */ void _ZmodF_mul_fft_reduce_modB2(unsigned long* out, ZmodF_t* in, unsigned long len) { for (unsigned long i = 0; i < len; i++) { out[2*i] = in[i][0]; out[2*i+1] = in[i][1]; } } /* Computes res = a*b using FFT algorithm. Output is not necessarily normalised. If a == b, this function automatically uses a faster squaring algorithm. Expects both inputs to be normalised and != -1 mod p. */ void _ZmodF_mul_info_mul_fft(ZmodF_mul_info_t info, ZmodF_t res, ZmodF_t a, ZmodF_t b) { FLINT_ASSERT(info->algo == ZMODF_MUL_ALGO_FFT); FLINT_ASSERT(!a[info->n]); FLINT_ASSERT(!b[info->n]); unsigned long len = 1UL << info->polys[0]->depth; unsigned long n = info->n; unsigned long m = info->m; unsigned long k = info->k; if (a != b) { // distinct operands case FLINT_ASSERT(!info->squaring); // split inputs into 2^depth pieces _ZmodF_mul_fft_split(info->polys[0], a, n); _ZmodF_mul_fft_split(info->polys[1], b, n); // negacyclic convolution mod B^k if (k == 1) { _ZmodF_mul_fft_reduce_modB(info->scratch + len, info->polys[0]->coeffs, len); _ZmodF_mul_fft_reduce_modB(info->scratch + 2*len, info->polys[1]->coeffs, len); _ZmodF_mul_fft_convolve_modB(info->scratch, info->scratch + len, info->scratch + 2*len, len); } else if (k == 2) { _ZmodF_mul_fft_reduce_modB2(info->scratch + 2*len, info->polys[0]->coeffs, len); _ZmodF_mul_fft_reduce_modB2(info->scratch + 4*len, info->polys[1]->coeffs, len); _ZmodF_mul_fft_convolve_modB2(info->scratch, info->scratch + 2*len, info->scratch + 4*len, len); } // negacyclic convolution mod B^m + 1 using FFT ZmodF_poly_negacyclic_convolution(info->polys[0], info->polys[0], info->polys[1]); } else { // squaring case // split input into 2^depth pieces _ZmodF_mul_fft_split(info->polys[0], a, n); // negacyclic convolution mod B^k if (k == 1) { _ZmodF_mul_fft_reduce_modB(info->scratch + len, info->polys[0]->coeffs, len); _ZmodF_mul_fft_convolve_modB(info->scratch, info->scratch + len, info->scratch + len, len); } else if (k == 2) { _ZmodF_mul_fft_reduce_modB2(info->scratch + 2*len, info->polys[0]->coeffs, len); _ZmodF_mul_fft_convolve_modB2(info->scratch, info->scratch + 2*len, info->scratch + 2*len, len); } // negacyclic convolution mod B^m + 1 using FFT ZmodF_poly_negacyclic_convolution(info->polys[0], info->polys[0], info->polys[0]); } // use CRT to determine coefficients of convolution mod (B^m + 1)*B^k. // Basically the idea is: adjust bottom k limbs to make them agree with the // known mod B^k result, and compensate the top k limbs accordingly (i.e. // add a multiple of B^m + 1). if (k) { for (unsigned long i = 0; i < len; i++) { ZmodF_t coeff = info->polys[0]->coeffs[i]; ZmodF_fast_reduce(coeff, m); // zero out the top limbs for (unsigned long j = 0; j < k; j++) coeff[m+1+j] = 0; // compute the amount we need to adjust by mp_limb_t adjust[2]; mpn_sub_n(adjust, info->scratch + k*i, coeff, k); // perform adjustment mpn_add(coeff, coeff, m+k+1, adjust, k); mpn_add(coeff + m, coeff + m, k+1, adjust, k); // normalise result ZmodF_normalise(coeff + k, m); } } else ZmodF_poly_normalise(info->polys[0]); // substitute back to get integer mod B^n + 1 _ZmodF_mul_fft_combine(res, info->polys[0], m, k, n); } /****************************************************************************** Threeway multiplication routines ******************************************************************************/ /* Assume a is length 3m, and normalised, and != -1 mod p. Reduces a mod B^m + 1, stores result at res, in usual ZmodF_t format (m+1 limbs), not necessarily normalised. */ void _ZmodF_mul_threeway_reduce1(ZmodF_t res, ZmodF_t a, unsigned long m) { FLINT_ASSERT(a[3*m] == 0); res[m] = mpn_add_n(res, a, a+2*m, m); res[m] -= mpn_sub_n(res, res, a+m, m); } /* Assume a is length 3m, and normalised, and != -1 mod p. Reduces a mod B^2m - B^m + 1, stores result at res, exactly 2m limbs. res must have room for 2m+1 limbs, even though the last limb is not used for the answer. Note: in some cases there are two possible answers, since B^2m - B^m + 1 is less than B^2m. In these cases either answer may be produced. */ void _ZmodF_mul_threeway_reduce2(mp_limb_t* res, ZmodF_t a, unsigned long m) { res[2*m] = mpn_add_n(res+m, a+m, a+2*m, m); long borrow = mpn_sub_n(res, a, a+2*m, m); mpn_sub_1(res+m, res+m, m+1, borrow); if (res[2*m]) { FLINT_ASSERT(res[2*m] == 1); // subtract B^2m - B^m + 1, then it's guaranteed to be normalised mpn_sub_1(res, res, 2*m, 1); mpn_add_1(res+m, res+m, m, 1); } } /* Computes res = a*b using threeway algorithm. Output is not necessarily normalised. If a == b, this function automatically uses a faster squaring algorithm. Expects both inputs to be normalised and != -1 mod p. */ void _ZmodF_mul_info_mul_threeway(ZmodF_mul_info_t info, ZmodF_t res, ZmodF_t a, ZmodF_t b) { FLINT_ASSERT(info->algo == ZMODF_MUL_ALGO_THREEWAY); FLINT_ASSERT(!a[info->n]); FLINT_ASSERT(!b[info->n]); unsigned long m = info->m; mp_limb_t* buf1 = info->scratch; mp_limb_t* buf2 = buf1 + m+1; mp_limb_t* buf3 = buf2 + 4*m; if (a != b) { // distinct operands case FLINT_ASSERT(!info->squaring); // reduce a and b mod B^m + 1 _ZmodF_mul_threeway_reduce1(buf1, a, m); _ZmodF_mul_threeway_reduce1(buf2, b, m); // buf1 := a*b mod B^m + 1 ZmodF_mul(buf1, buf1, buf2, buf3, m); // reduce inputs mod B^2m - B^m + 1 _ZmodF_mul_threeway_reduce2(buf2, a, m); _ZmodF_mul_threeway_reduce2(buf2 + 2*m, b, m); // multiply mod B^2m - B^m + 1 mpn_mul_n(buf3, buf2, buf2 + 2*m, 2*m); } else { // squaring case // reduce a mod B^m + 1 _ZmodF_mul_threeway_reduce1(buf1, a, m); // buf1 := a*a mod B^m + 1 ZmodF_mul(buf1, buf1, buf1, buf3, m); // reduce a mod B^2m - B^m + 1 _ZmodF_mul_threeway_reduce2(buf2, a, m); // square mod B^2m - B^m + 1 mpn_mul_n(buf3, buf2, buf2, 2*m); } // reduce mod B^3m + 1 (inplace) buf3[3*m] = -mpn_sub(buf3, buf3, 3*m, buf3 + 3*m, m); // Now buf3 (length 3m+1) is congruent to a*b mod B^2m - B^m + 1, // and buf1 (length m+1) is congruent to a*b mod B^m + 1. // Need to adjust buf3 to make it congruent to buf1 mod B^m + 1, // without modifying it mod B^2m - B^m + 1. // Strategy is: // Let X = (buf1 - buf3)/3 mod B^m + 1. // Add (B^2m - B^m + 1)*X to buf3. // buf2 := 3*X ZmodF_normalise(buf3, 3*m); if (buf3[3*m]) // special case: buf3 = -1 mod B^3m + 1 mpn_add_1(buf2, buf1, m+1, 1); else { _ZmodF_mul_threeway_reduce1(buf2, buf3, m); ZmodF_sub(buf2, buf1, buf2, m); } // buf2 := X ZmodF_divby3(buf2, buf2, m); ZmodF_normalise(buf2, m); // res := buf3 + (B^2m - B^m + 1)*X. if (buf2[m]) { // special case: X = -1 mod B^m + 1 ZmodF_set(res, buf3, 3*m); mpn_sub_1(res, res, 3*m+1, 1); mpn_add_1(res + m, res + m, 2*m+1, 1); mpn_sub_1(res + 2*m, res + 2*m, m+1, 1); } else { // usual case mp_limb_t carry1 = mpn_add_n(res, buf3, buf2, m); mp_limb_t carry2 = mpn_sub_n(res + m, buf3 + m, buf2, m); res[3*m] = buf3[3*m] + mpn_add_n(res + 2*m, buf3 + 2*m, buf2, m); mpn_add_1(res + m, res + m, 2*m+1, carry1); mpn_sub_1(res + 2*m, res + 2*m, m+1, carry2); } } /****************************************************************************** Main ZmodF_mul_info multiplication routine ******************************************************************************/ void ZmodF_mul_info_mul(ZmodF_mul_info_t info, ZmodF_t res, ZmodF_t a, ZmodF_t b) { // try special cases a = -1 or b = -1 mod p if (a != b) { if (_ZmodF_mul_handle_minus1(res, a, b, info->n)) return; } else { if (_ZmodF_sqr_handle_minus1(res, a, info->n)) return; } if (info->algo == ZMODF_MUL_ALGO_PLAIN) { _ZmodF_mul(res, a, b, info->scratch, info->n); return; } else if (info->algo == ZMODF_MUL_ALGO_THREEWAY) _ZmodF_mul_info_mul_threeway(info, res, a, b); else { FLINT_ASSERT(info->algo == ZMODF_MUL_ALGO_FFT); _ZmodF_mul_info_mul_fft(info, res, a, b); } } // end of file **************************************************************** flint-1.011/longlong_wrapper.h0000644017361200017500000000314511025357254016256 0ustar tabbotttabbott/*============================================================================ Copyright (C) 2007, William Hart, David Harvey This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ #ifndef LONGLONGWRAP_H #define LONGLONGWRAP_H #ifdef __cplusplus extern "C" { #endif #include // todo: I think perhaps UDWtype is not quite right. It needs to be // twice the length. But how to do this on a 64-bit machine? #define UWtype mp_limb_t #define UHWtype mp_limb_t #define UDWtype mp_limb_t #define W_TYPE_SIZE FLINT_BITS #define SItype int32_t #define USItype uint32_t #define DItype int64_t #define UDItype uint64_t #define LONGLONG_STANDALONE // todo: longlong.h requires there to be an ASSERT macro around. // For now I'm killing it, but we should hook this up to our own assertion // code. #define ASSERT(condition) #ifdef __cplusplus } #endif #endif flint-1.011/delta_qexp.c0000644017361200017500000000516511025357254015024 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /* Demo FLINT program for computing the q-expansion of the delta function. (C) 2007 David Harvey and William Hart */ #include #include #include #include "flint.h" #include "fmpz_poly.h" int main(int argc, char* argv[]) { if (argc != 2) { printf("Syntax: delta_qexp \n"); printf("where is the number of terms to compute\n"); return 0; } // number of terms to compute long N = atoi(argv[1]); // compute coefficients of F(q)^2 long* values = malloc(sizeof(long) * N); for (long i = 0; i < N; i++) values[i] = 0; long stop = (long) ceil((-1.0 + sqrt(1.0 + 8.0*N)) / 2.0); for (long i = 0; i <= stop; i++) { long index1 = i*(i+1)/2; long value1 = (i & 1) ? (-2*i-1) : (2*i+1); for (long j = 0; j <= stop; j++) { long index2 = j*(j+1)/2; if (index1 + index2 >= N) break; long value2 = (j & 1) ? (-2*j-1) : (2*j+1); values[index1 + index2] += value1 * value2; } } // Create some polynomial objects fmpz_poly_t F2, F4, F8; fmpz_poly_init(F2); fmpz_poly_init(F4); fmpz_poly_init(F8); fmpz_poly_fit_length(F2, N); for (long i = 0; i < N; i++) fmpz_poly_set_coeff_si(F2, i, values[i]); free(values); // compute F^4, truncated to length N fmpz_poly_mul_trunc_n(F4, F2, F2, N); // compute F^8, truncated to length N fmpz_poly_mul_trunc_n(F8, F4, F4, N); // print out last coefficient fmpz_t coeff = fmpz_poly_get_coeff_ptr(F8, N-1); printf("coefficient of q^%d is ", N); fmpz_print(coeff); printf("\n"); // clean up fmpz_poly_clear(F8); fmpz_poly_clear(F4); fmpz_poly_clear(F2); return 0; } flint-1.011/NTL-profile.c0000644017361200017500000002324111025357254014764 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** NTL-profile.c Profiling for NTL Copyright (C) 2007, Tomasz Lechowski, William Hart and David Harvey *****************************************************************************/ #include #include #include #include "profiler-main.h" #include "flint.h" #include "memory-manager.h" #include "fmpz_poly.h" #include "mpz_poly.h" #include "test-support.h" #include #include #include #include //============================================================================= NTL_CLIENT // whether to generate signed or unsigned random polys #define SIGNS 0 unsigned long randint(unsigned long randsup) { static unsigned long randval = 4035456057U; randval = ((unsigned long)randval*1025416097U+286824428U)%(unsigned long)4294967291U; return (unsigned long)randval%randsup; } // ============================================================================ /* Calls prof2d_sample(length, bits, NULL) for all length, bits combinations such that length*bits < max_bits, with length and bits spaced out by the given ratio */ void run_triangle(unsigned long max_bits, double ratio) { int max_iter = (int) ceil(log((double) max_bits) / log(ratio)); unsigned long last_length = 0; for (unsigned long i = 0; i <= max_iter; i++) { unsigned long length = (unsigned long) floor(powl(ratio, i)); if (length != last_length) { last_length = length; unsigned long last_bits = 0; for (unsigned long j = 0; j <= max_iter; j++) { unsigned long bits = (unsigned long) floor(powl(ratio, j)); if (bits != last_bits) { last_bits = bits; if (bits * length < max_bits) prof2d_sample(length, bits, NULL); } } } } } // ============================================================================ void sample_NTL_factor(unsigned long length, unsigned long bits, void* arg, unsigned long count) { ZZX poly1, poly2, poly3; ZZ a, c; vec_pair_ZZX_long factors; poly1.SetMaxLength(length); //poly2.SetMaxLength(length); //poly3.SetMaxLength(2*length-1); unsigned long r_count; // how often to generate new random data if (count >= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long i = 0; i < count; i++) { if (i%r_count == 0) { for (unsigned long j = 0; j= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long i = 0; i < count; i++) { if (i%r_count == 0) { for (unsigned long j = 0; j= 10000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 4; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long i = 0; i < count; i++) { if (i%r_count == 0) { do { for (unsigned long j = 0; j < length; j++) { RandomBits(a,bits); SetCoeff(poly1,j,a); } } while (IsZero(poly1)); for (unsigned long j = 0; j < length; j++) { RandomBits(a,bits); SetCoeff(poly2,j,a); } } mul(poly3, poly1, poly2); prof_start(); for (unsigned long count2 = 0; count2 < r_count; count2++) { divide(poly2, poly3, poly1); } prof_stop(); i += (r_count-1); } } char* profDriverString_NTL_poly_div1(char* params) { return "NTL_poly_div1 over various lengths and various bit sizes.\n" "Parameters are: max bitsize; ratio between consecutive lengths/bitsizes."; } char* profDriverDefaultParams_NTL_poly_div1() { return "1000000 1.2"; } void profDriver_NTL_poly_div1(char* params) { unsigned long max_bits; double ratio; sscanf(params, "%ld %lf", &max_bits, &ratio); test_support_init(); prof2d_set_sampler(sample_NTL_poly_div1); run_triangle(max_bits, ratio); test_support_cleanup(); } // ============================================================================ void sample_NTL_poly_div2(unsigned long length, unsigned long bits, void* arg, unsigned long count) { ZZX poly1; ZZX poly2; ZZX poly3; ZZ a; poly1.SetMaxLength(length); poly2.SetMaxLength(length); poly3.SetMaxLength(2*length-1); unsigned long r_count; // how often to generate new random data if (count >= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long i = 0; i < count; i++) { if (i%r_count == 0) { for (unsigned long j = 0; j #include // ============================================================================ /* this function samples multiplying polynomials of lengths len1 and len2 using mpz_poly_mul_karatsuba arg should point to an unsigned long, giving the coefficient bitlengths */ void sample_mpz_poly_mul_karatsuba_mixlengths( unsigned long len1, unsigned long len2, void* arg, unsigned long count) { unsigned long bits = *(unsigned long*) arg; mpz_poly_t poly1, poly2, poly3; mpz_poly_init(poly1); mpz_poly_init(poly2); mpz_poly_init(poly3); mpz_t x; mpz_init(x); for (unsigned long i = 0; i < len1; i++) { mpz_urandomb(x, randstate, bits); if (random_ulong(2)) mpz_neg(x, x); mpz_poly_set_coeff(poly1, i, x); } for (unsigned long i = 0; i < len2; i++) { mpz_urandomb(x, randstate, bits); if (random_ulong(2)) mpz_neg(x, x); mpz_poly_set_coeff(poly2, i, x); } mpz_clear(x); prof_start(); for (unsigned long i = 0; i < count; i++) mpz_poly_mul_karatsuba(poly3, poly1, poly2); prof_stop(); mpz_poly_clear(poly3); mpz_poly_clear(poly2); mpz_poly_clear(poly1); } char* profDriverString_mpz_poly_mul_karatsuba_mixlengths(char* params) { return "mpz_poly_mul_karatubsa for distinct input lengths and fixed\n" "coefficient size. Parameters are: max length; length skip; coefficient size (in bits)\n"; } char* profDriverDefaultParams_mpz_poly_mul_karatsuba_mixlengths() { return "50 1 100"; } void profDriver_mpz_poly_mul_karatsuba_mixlengths(char* params) { unsigned long max_length, skip, bits; sscanf(params, "%ld %ld %ld", &max_length, &skip, &bits); prof2d_set_sampler(sample_mpz_poly_mul_karatsuba_mixlengths); test_support_init(); for (unsigned long len1 = skip; len1 <= max_length; len1 += skip) for (unsigned long len2 = skip; len2 <= len1; len2 += skip) prof2d_sample(len1, len2, &bits); test_support_cleanup(); } // ============================================================================ /* this function samples multiplying polynomials of lengths len1 and len2 using mpz_poly_mul_karatsuba arg should point to an unsigned long, giving the coefficient bitlengths */ void sample__mpz_poly_mul_kara_recursive_mixlengths( unsigned long len1, unsigned long len2, void* arg, unsigned long count) { unsigned long bits = *(unsigned long*) arg; mpz_poly_t poly1, poly2, poly3; mpz_poly_init(poly1); mpz_poly_init(poly2); mpz_poly_init(poly3); mpz_poly_ensure_alloc(poly3, len1 + len2); // allocate scratch space unsigned long scratch_len = len1 + len2; mpz_t* scratch = (mpz_t*) malloc(scratch_len * sizeof(mpz_t)); for (unsigned long i = 0; i < scratch_len; i++) mpz_init2(scratch[i], 2*bits + 100); mpz_t x; mpz_init(x); for (unsigned long i = 0; i < len1; i++) { mpz_urandomb(x, randstate, bits); if (random_ulong(2)) mpz_neg(x, x); mpz_poly_set_coeff(poly1, i, x); } for (unsigned long i = 0; i < len2; i++) { mpz_urandomb(x, randstate, bits); if (random_ulong(2)) mpz_neg(x, x); mpz_poly_set_coeff(poly2, i, x); } mpz_clear(x); unsigned long crossover = _mpz_poly_mul_karatsuba_crossover(bits / FLINT_BITS); prof_start(); for (unsigned long i = 0; i < count; i++) _mpz_poly_mul_kara_recursive(poly3->coeffs, poly1->coeffs, len1, poly2->coeffs, len2, scratch, 1, crossover); prof_stop(); for (unsigned long i = 0; i < scratch_len; i++) mpz_clear(scratch[i]); free(scratch); mpz_poly_clear(poly3); mpz_poly_clear(poly2); mpz_poly_clear(poly1); } char* profDriverString__mpz_poly_mul_kara_recursive_mixlengths(char* params) { return "_mpz_poly_mul_kara_recursive for distinct input lengths and fixed\n" "coefficient size. Parameters are: max length; length skip; coefficient size (in bits)\n"; } char* profDriverDefaultParams__mpz_poly_mul_kara_recursive_mixlengths() { return "50 1 100"; } void profDriver__mpz_poly_mul_kara_recursive_mixlengths(char* params) { unsigned long max_length, skip, bits; sscanf(params, "%ld %ld %ld", &max_length, &skip, &bits); prof2d_set_sampler(sample__mpz_poly_mul_kara_recursive_mixlengths); test_support_init(); for (unsigned long len2 = skip; len2 <= max_length; len2 += skip) for (unsigned long len1 = skip; len1 <= len2; len1 += skip) prof2d_sample(len1, len2, &bits); test_support_cleanup(); } // end of file **************************************************************** flint-1.011/mpn_extras-test.c0000644017361200017500000002674011025357254016035 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** Z_mpn-test.c: test module for Z_mpn module Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #include #include #include #include "flint.h" #include "memory-manager.h" #include "mpn_extras.h" #include "mpz_poly.h" #include "test-support.h" #include "F_mpn_mul-tuning.h" #define VARY_BITS 1 #define SIGNS 1 #define DEBUG 0 // prints debug information #define DEBUG2 1 /**************************************************************************** Test code for Conversion Routines ****************************************************************************/ unsigned long randint(unsigned long randsup) { static unsigned long randval = 4035456057U; randval = ((unsigned long)randval*1025416097U+286824428U)%(unsigned long)4294967291U; return (unsigned long)randval%randsup; } void randpoly(mpz_poly_t pol, unsigned long length, unsigned long maxbits) { unsigned long bits; mpz_t temp; mpz_init(temp); mpz_poly_zero(pol); for (unsigned long i = 0; i < length; i++) { #if VARY_BITS bits = randint(maxbits); #else bits = maxbits; #endif if (bits == 0) mpz_set_ui(temp,0); else { mpz_rrandomb(temp, randstate, bits); #if SIGNS if (randint(2)) mpz_neg(temp,temp); #endif } mpz_poly_set_coeff(pol, i, temp); } mpz_clear(temp); } void randpoly_unsigned(mpz_poly_t pol, unsigned long length, unsigned long maxbits) { unsigned long bits; mpz_t temp; mpz_init(temp); mpz_poly_zero(pol); for (unsigned long i = 0; i < length; i++) { #if VARY_BITS bits = randint(maxbits); #else bits = maxbits; #endif if (bits == 0) mpz_set_ui(temp,0); else { mpz_rrandomb(temp, randstate, bits); } mpz_poly_set_coeff(pol, i, temp); } mpz_clear(temp); } int test_F_mpn_splitcombine_bits() { mp_limb_t * int1, * int2; ZmodF_poly_t poly; int result = 1; for (unsigned long count = 0; (count < 30000) && (result == 1); count++) { unsigned long limbs = randint(300)+1; unsigned long bits = randint(500)+1; unsigned long coeff_limbs = randint(100) + (bits-1)/FLINT_BITS + 1; unsigned long length = (FLINT_BITS*limbs - 1)/bits + 1; unsigned long log_length = 0; while ((1L << log_length) < length) log_length++; #if DEBUG printf("limbs = %ld, bits = %ld, coeff_limbs = %ld\n", limbs, bits, coeff_limbs); #endif int1 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*limbs); int2 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*limbs); ZmodF_poly_init(poly, log_length, coeff_limbs, 0); mpn_random2(int1, limbs); F_mpn_FFT_split_bits(poly, int1, limbs, bits, coeff_limbs); F_mpn_clear(int2, limbs); F_mpn_FFT_combine_bits(int2, poly, bits, coeff_limbs, limbs); #if DEBUG F_mpn_printx(int1, limbs); printf("\n\n"); for (unsigned long i = 0; i < length; i++) { F_mpn_printx(poly->coeffs[i], coeff_limbs); printf("\n");} printf("\n"); F_mpn_printx(int2, limbs); printf("\n\n"); #endif for (unsigned long j = 0; j < limbs; j++) { if (int1[j] != int2[j]) result = 0; } ZmodF_poly_clear(poly); free(int2); free(int1); } return result; } int test_F_mpn_mul_precomp() { mp_limb_t * int1, * int2, * product, * product2; F_mpn_precomp_t precomp; mp_limb_t msl; int result = 1; for (unsigned long count = 0; (count < 30) && (result == 1); count++) { unsigned long limbs2 = randint(2*FLINT_FFT_LIMBS_CROSSOVER)+1; unsigned long limbs1 = randint(2*FLINT_FFT_LIMBS_CROSSOVER)+1; int1 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*limbs1); mpn_random2(int1, limbs1); F_mpn_mul_precomp_init(precomp, int1, limbs1, limbs2); for (unsigned long count2 = 0; (count2 < 30) && (result == 1); count2++) { #if DEBUG printf("%ld, %ld\n",limbs1, limbs2); #endif unsigned long limbs3 = randint(limbs2)+1; int2 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*limbs3); product = (mp_limb_t *) malloc(sizeof(mp_limb_t)*(limbs1+limbs2)); product2 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*(limbs1+limbs2)); F_mpn_clear(int2, limbs3); mpn_random2(int2, limbs3); F_mpn_mul_precomp(product, int2, limbs3, precomp); if (limbs1 > limbs3) msl = mpn_mul(product2, int1, limbs1, int2, limbs3); else msl = mpn_mul(product2, int2, limbs3, int1, limbs1); for (unsigned long j = 0; j < limbs1+limbs3 - (msl == 0); j++) { if (product[j] != product2[j]) result = 0; } free(product2); free(product); free(int2); } F_mpn_mul_precomp_clear(precomp); free(int1); } return result; } int test_F_mpn_mul_precomp_trunc() { mp_limb_t * int1, * int2, * product, * product2; F_mpn_precomp_t precomp; mp_limb_t msl; int result = 1; for (unsigned long count = 0; (count < 30) && (result == 1); count++) { unsigned long limbs2 = randint(2*FLINT_FFT_LIMBS_CROSSOVER)+1; unsigned long limbs1 = randint(2*FLINT_FFT_LIMBS_CROSSOVER)+1; int1 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*limbs1); mpn_random2(int1, limbs1); F_mpn_mul_precomp_init(precomp, int1, limbs1, limbs2); for (unsigned long count2 = 0; (count2 < 30) && (result == 1); count2++) { unsigned long limbs3 = randint(limbs2)+1; unsigned long trunc = randint(2*(limbs1+limbs3)); #if DEBUG printf("limbs1 = %ld, limbs3 = %ld, trunc = %ld\n", limbs1, limbs3, trunc); #endif int2 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*limbs3); product = (mp_limb_t *) malloc(sizeof(mp_limb_t)*(limbs1+limbs2)); product2 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*(limbs1+limbs2)); F_mpn_clear(int2, limbs3); mpn_random2(int2, limbs3); if (limbs1 > limbs3) F_mpn_mul_trunc(product2, int1, limbs1, int2, limbs3, trunc); else F_mpn_mul_trunc(product2, int2, limbs3, int1, limbs1, trunc); F_mpn_mul_precomp_trunc(product, int2, limbs3, precomp, trunc); for (unsigned long j = 0; j < FLINT_MIN(trunc, limbs1+limbs3); j++) { if (product[j] != product2[j]) { printf("Failure at %ld\n", j); result = 0; } } free(product2); free(product); free(int2); } F_mpn_mul_precomp_clear(precomp); free(int1); } return result; } int test_F_mpn_mul() { mp_limb_t * int1, * int2, * product, * product2; mp_limb_t msl, msl2; int result = 1; for (unsigned long count = 0; (count < 30) && (result == 1); count++) { unsigned long limbs2 = randint(2*FLINT_FFT_LIMBS_CROSSOVER)+1; unsigned long limbs1 = limbs2 + randint(1000); int1 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*limbs1); mpn_random2(int1, limbs1); for (unsigned long count2 = 0; (count2 < 30) && (result == 1); count2++) { #if DEBUG printf("%ld, %ld\n",limbs1, limbs2); #endif int2 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*limbs2); product = (mp_limb_t *) malloc(sizeof(mp_limb_t)*(limbs1+limbs2)); product2 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*(limbs1+limbs2)); F_mpn_clear(int2, limbs2); mpn_random2(int2, randint(limbs2-1)+1); msl = F_mpn_mul(product, int1, limbs1, int2, limbs2); msl2 = mpn_mul(product2, int1, limbs1, int2, limbs2); for (unsigned long j = 0; j < limbs1+limbs2 - (msl == 0); j++) { if (product[j] != product2[j]) result = 0; } result &= (msl == msl2); free(product2); free(product); free(int2); } free(int1); } return result; } int test_F_mpn_mul_trunc() { mp_limb_t * int1, * int2, * product, * product2; mp_limb_t msl; int result = 1; for (unsigned long count = 0; (count < 30) && (result == 1); count++) { unsigned long limbs2 = randint(2*FLINT_FFT_LIMBS_CROSSOVER)+1; unsigned long limbs1 = limbs2 + randint(1000); int1 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*limbs1); mpn_random2(int1, limbs1); for (unsigned long count2 = 0; (count2 < 30) && (result == 1); count2++) { #if DEBUG printf("%ld, %ld\n",limbs1, limbs2); #endif unsigned long trunc = randint(limbs1 + limbs2 - 1)+1; int2 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*limbs2); product = (mp_limb_t *) malloc(sizeof(mp_limb_t)*(limbs1+limbs2)); product2 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*(limbs1+limbs2)); F_mpn_clear(int2, limbs2); mpn_random2(int2, limbs2); F_mpn_mul_trunc(product, int1, limbs1, int2, limbs2, trunc); mpn_mul(product2, int1, limbs1, int2, limbs2); for (unsigned long j = 0; j < trunc; j++) { if (product[j] != product2[j]) result = 0; } free(product2); free(product); free(int2); } free(int1); } return result; } /**************************************************************************** Main test functions ****************************************************************************/ #define RUN_TEST(targetfunc) \ printf("Testing " #targetfunc "()... "); \ fflush(stdout); \ success = test_##targetfunc(); \ all_success = all_success && success; \ printf(success ? "ok\n" : "FAIL!\n"); void F_mpn_test_all() { int success, all_success = 1; RUN_TEST(F_mpn_splitcombine_bits); RUN_TEST(F_mpn_mul); RUN_TEST(F_mpn_mul_trunc); RUN_TEST(F_mpn_mul_precomp); RUN_TEST(F_mpn_mul_precomp_trunc); printf(all_success ? "\nAll tests passed\n" : "\nAt least one test FAILED!\n"); } int main() { test_support_init(); F_mpn_test_all(); test_support_cleanup(); flint_stack_cleanup(); return 0; } // end of file **************************************************************** flint-1.011/zmod_poly-profile.c0000644017361200017500000005012311025357254016342 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** zmod_poly-profile.c : Profiling code for zmod_poly Copyright (C) 2007, David Howden *****************************************************************************/ #include "profiler-main.h" #include "zmod_poly.h" #include "long_extras.h" #include "flint.h" #include #include // ============================================================================ unsigned long randint(unsigned long limit) { #if FLINT_BITS == 32 static uint64_t randval = 4035456057U; randval = ((uint64_t)randval*(uint64_t)1025416097U+(uint64_t)286824430U)%(uint64_t)4294967311U; if (limit == 0L) return (unsigned long) randval; return (unsigned long)randval%limit; #else static unsigned long randval = 4035456057U; static unsigned long randval2 = 6748392731U; randval = ((unsigned long)randval*(unsigned long)1025416097U+(unsigned long)286824428U)%(unsigned long)4294967311U; randval2 = ((unsigned long)randval2*(unsigned long)1647637699U+(unsigned long)286824428U)%(unsigned long)4294967357U; if (limit == 0L) return (unsigned long) randval; return (unsigned long)(randval+(randval2<<32))%limit; #endif } /* Generate a random integer with up to the given number of bits [0, FLINT_BITS] */ unsigned long randbits(unsigned long bits) { return randint(l_shift(1L, bits)); } /* Generate a random zmod polynomial with the modulus n of the given length with normalised coefficients */ void randpoly(zmod_poly_t poly, long length, unsigned long n) { if (length == 0) { zmod_poly_fit_length(poly, 1); poly->length = 0; return; } zmod_poly_fit_length(poly, length); for (unsigned long i = 0; i < length; i++) poly->coeffs[i] = randint(n); poly->length = length; __zmod_poly_normalise(poly); } void sample_zmod_poly_mul_KS(unsigned long length, unsigned long bits, void* arg, unsigned long count) { zmod_poly_t pol1, pol2, res1; unsigned long modulus; zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); unsigned long r_count; // how often to generate new random data if (count >= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long count2 = 0; count2 < count; count2++) { if (count2 % r_count == 0) { do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); randpoly(pol1, length, modulus); randpoly(pol2, length, modulus); } #if DEBUG printf("bits = %ld, length = %ld, modulus = %ld\n", bits, length, modulus); #endif prof_start(); zmod_poly_mul_KS(res1, pol1, pol2, 0); prof_stop(); } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); } char* profDriverString_zmod_poly_mul_KS(char* params) { return "zmod_poly_mul_KS over various lengths and various bit sizes.\n" "Parameters: n_min, n_max, n_ratio.\n"; } char* profDriverDefaultParams_zmod_poly_mul_KS() { return "1 1000 1.2"; } void profDriver_zmod_poly_mul_KS(char* params) { unsigned long n, n_min, n_max; double n_ratio; sscanf(params, "%ld %ld %lf", &n_min, &n_max, &n_ratio); unsigned long last_n = 0; prof2d_set_sampler(sample_zmod_poly_mul_KS); int max_iter = (int) ceil(log((double) n_max) / log(n_ratio)); int min_iter = (int) ceil(log((double) n_min) / log(n_ratio)); for (unsigned long i = min_iter; i < max_iter; i++) { n = (unsigned long) floor(pow(n_ratio, i)); if (n != last_n) { last_n = n; for (unsigned long bits = 2; bits < 64; bits++) { unsigned long log_length = 0; while ((1L<= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long count2 = 0; count2 < count; count2++) { if (count2 % r_count == 0) { do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); randpoly(pol1, length, modulus); randpoly(pol2, length, modulus); } #if DEBUG printf("bits = %ld, length = %ld, modulus = %ld\n", bits, length, modulus); #endif prof_start(); zmod_poly_mul_KS_trunc(res1, pol1, pol2, 0, length); prof_stop(); } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); } char* profDriverString_zmod_poly_mul_KS_trunc(char* params) { return "zmod_poly_mul_KS_trunc over various lengths and various bit sizes.\n" "Parameters: n_min, n_max, n_ratio.\n"; } char* profDriverDefaultParams_zmod_poly_mul_KS_trunc() { return "1 1000 1.2"; } void profDriver_zmod_poly_mul_KS_trunc(char* params) { unsigned long n, n_min, n_max; double n_ratio; sscanf(params, "%ld %ld %lf", &n_min, &n_max, &n_ratio); unsigned long last_n = 0; prof2d_set_sampler(sample_zmod_poly_mul_KS_trunc); int max_iter = (int) ceil(log((double) n_max) / log(n_ratio)); int min_iter = (int) ceil(log((double) n_min) / log(n_ratio)); for (unsigned long i = min_iter; i < max_iter; i++) { n = (unsigned long) floor(pow(n_ratio, i)); if (n != last_n) { last_n = n; for (unsigned long bits = 2; bits < 64; bits++) { unsigned long log_length = 0; while ((1L<= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long count2 = 0; count2 < count; count2++) { if (count2 % r_count == 0) { do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); randpoly(pol1, length, modulus); randpoly(pol2, length, modulus); } #if DEBUG printf("bits = %ld, length = %ld, modulus = %ld\n", bits, length, modulus); #endif prof_start(); zmod_poly_mul_classical(res1, pol1, pol2); prof_stop(); } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); } char* profDriverString_zmod_poly_mul_classical(char* params) { return "zmod_poly_mul_classical over various lengths and various bit sizes.\n" "Parameters: n_min, n_max, n_ratio.\n"; } char* profDriverDefaultParams_zmod_poly_mul_classical() { return "1 1000 1.2"; } void profDriver_zmod_poly_mul_classical(char* params) { unsigned long n, n_min, n_max; double n_ratio; sscanf(params, "%ld %ld %lf", &n_min, &n_max, &n_ratio); unsigned long last_n = 0; prof2d_set_sampler(sample_zmod_poly_mul_classical); int max_iter = (int) ceil(log((double) n_max) / log(n_ratio)); int min_iter = (int) ceil(log((double) n_min) / log(n_ratio)); for (unsigned long i = min_iter; i < max_iter; i++) { n = (unsigned long) floor(pow(n_ratio, i)); if (n != last_n) { last_n = n; for (unsigned long bits = 2; bits < 64; bits++) { unsigned long log_length = 0; while ((1L<= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long count2 = 0; count2 < count; count2++) { if (count2 % r_count == 0) { do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); randpoly(pol1, length, modulus); randpoly(pol2, length, modulus); } #if DEBUG printf("bits = %ld, length = %ld, modulus = %ld\n", bits, length, modulus); #endif prof_start(); zmod_poly_mul_classical_trunc(res1, pol1, pol2, length); prof_stop(); } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); } char* profDriverString_zmod_poly_mul_classical_trunc(char* params) { return "zmod_poly_mul_classical_trunc over various lengths and various bit sizes.\n" "Parameters: n_min, n_max, n_ratio.\n"; } char* profDriverDefaultParams_zmod_poly_mul_classical_trunc() { return "1 1000 1.2"; } void profDriver_zmod_poly_mul_classical_trunc(char* params) { unsigned long n, n_min, n_max; double n_ratio; sscanf(params, "%ld %ld %lf", &n_min, &n_max, &n_ratio); unsigned long last_n = 0; prof2d_set_sampler(sample_zmod_poly_mul_classical_trunc); int max_iter = (int) ceil(log((double) n_max) / log(n_ratio)); int min_iter = (int) ceil(log((double) n_min) / log(n_ratio)); for (unsigned long i = min_iter; i < max_iter; i++) { n = (unsigned long) floor(pow(n_ratio, i)); if (n != last_n) { last_n = n; for (unsigned long bits = 2; bits < 64; bits++) { unsigned long log_length = 0; while ((1L<= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long count2 = 0; count2 < count; count2++) { if (count2 % r_count == 0) { do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); randpoly(pol1, length, modulus); randpoly(pol2, length, modulus); } #if DEBUG printf("bits = %ld, length = %ld, modulus = %ld\n", bits, length, modulus); #endif prof_start(); zmod_poly_mul_classical_trunc_left(res1, pol1, pol2, length); prof_stop(); } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); } char* profDriverString_zmod_poly_mul_classical_trunc_left(char* params) { return "zmod_poly_mul_classical_trunc_left over various lengths and various bit sizes.\n" "Parameters: n_min, n_max, n_ratio.\n"; } char* profDriverDefaultParams_zmod_poly_mul_classical_trunc_left() { return "1 1000 1.2"; } void profDriver_zmod_poly_mul_classical_trunc_left(char* params) { unsigned long n, n_min, n_max; double n_ratio; sscanf(params, "%ld %ld %lf", &n_min, &n_max, &n_ratio); unsigned long last_n = 0; prof2d_set_sampler(sample_zmod_poly_mul_classical_trunc_left); int max_iter = (int) ceil(log((double) n_max) / log(n_ratio)); int min_iter = (int) ceil(log((double) n_min) / log(n_ratio)); for (unsigned long i = min_iter; i < max_iter; i++) { n = (unsigned long) floor(pow(n_ratio, i)); if (n != last_n) { last_n = n; for (unsigned long bits = 2; bits < 64; bits++) { unsigned long log_length = 0; while ((1L<= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long count2 = 0; count2 < count; count2++) { if (count2 % r_count == 0) { do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_clear(pol1); zmod_poly_clear(res1); zmod_poly_init(pol1, modulus); zmod_poly_init(res1, modulus); randpoly(pol1, length, modulus); } #if DEBUG printf("bits = %ld, length = %ld, modulus = %ld\n", bits, length, modulus); #endif prof_start(); zmod_poly_mul_KS(res1, pol1, pol1, 0); prof_stop(); } zmod_poly_clear(pol1); zmod_poly_clear(res1); } char* profDriverString_zmod_poly_sqr_KS(char* params) { return "zmod_poly_mul_KS squaring over various lengths and various bit sizes.\n" "Parameters: n_min, n_max, n_ratio.\n"; } char* profDriverDefaultParams_zmod_poly_sqr_KS() { return "1 1000 1.2"; } void profDriver_zmod_poly_sqr_KS(char* params) { unsigned long n, n_min, n_max; double n_ratio; sscanf(params, "%ld %ld %lf", &n_min, &n_max, &n_ratio); unsigned long last_n = 0; prof2d_set_sampler(sample_zmod_poly_sqr_KS); int max_iter = (int) ceil(log((double) n_max) / log(n_ratio)); int min_iter = (int) ceil(log((double) n_min) / log(n_ratio)); for (unsigned long i = min_iter; i < max_iter; i++) { n = (unsigned long) floor(pow(n_ratio, i)); if (n != last_n) { last_n = n; for (unsigned long bits = 2; bits < 64; bits++) { unsigned long log_length = 0; while ((1L<= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long count2 = 0; count2 < count; count2++) { if (count2 % r_count == 0) { do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_clear(pol1); zmod_poly_clear(res1); zmod_poly_init(pol1, modulus); zmod_poly_init(res1, modulus); randpoly(pol1, length, modulus); } #if DEBUG printf("bits = %ld, length = %ld, modulus = %ld\n", bits, length, modulus); #endif prof_start(); zmod_poly_sqr_classical(res1, pol1); prof_stop(); } zmod_poly_clear(pol1); zmod_poly_clear(res1); } char* profDriverString_zmod_poly_sqr_classical(char* params) { return "zmod_poly_sqr_classical over various lengths and various bit sizes.\n" "Parameters: n_min, n_max, n_ratio.\n"; } char* profDriverDefaultParams_zmod_poly_sqr_classical() { return "1 1000 1.2"; } void profDriver_zmod_poly_sqr_classical(char* params) { unsigned long n, n_min, n_max; double n_ratio; sscanf(params, "%ld %ld %lf", &n_min, &n_max, &n_ratio); unsigned long last_n = 0; prof2d_set_sampler(sample_zmod_poly_sqr_classical); int max_iter = (int) ceil(log((double) n_max) / log(n_ratio)); int min_iter = (int) ceil(log((double) n_min) / log(n_ratio)); for (unsigned long i = min_iter; i < max_iter; i++) { n = (unsigned long) floor(pow(n_ratio, i)); if (n != last_n) { last_n = n; for (unsigned long bits = 2; bits < 64; bits++) { unsigned long log_length = 0; while ((1L< #include "tinyQS.h" #include "common.h" #define KSMAX 100 static const unsigned long prime_tab_small[][2] = { {32, 30}, {40, 50}, {50, 80}, {60, 100}, {70, 150}, {80, 200}, {90, 200}, {100, 250}, {110, 300}, {120, 500}, {130, 550} }; #define PTABSIZE_SMALL (sizeof(prime_tab_small)/(2*sizeof(unsigned long))) unsigned long num_FB_primes(unsigned long bits); void sqrts_init(QS_t * qs_inf); void sqrts_clear(void); void compute_sizes(QS_t * qs_inf); void sizes_clear(void); //Knuth-Schroeppel multipliers and a macro to count them static const unsigned long multipliers[] = {1, 2, 3, 5, 6, 7, 10, 11, 13, 14, 15, 17, 19, 21, 22, 23, 26, 29, 30, 31, 33, 34, 35, 37, 38, 41, 42, 43, 47}; #define NUMMULTS (sizeof(multipliers)/sizeof(unsigned long)) #define max_mult_size 6 // number of bits of maximum multiplier void primes_clear(void); void primes_init(QS_t * qs_inf); unsigned long knuth_schroeppel(QS_t * qs_inf); unsigned long compute_factor_base(QS_t * qs_inf); #endif flint-1.011/QS/mp_linear_algebra.c0000644017361200017500000003145111025357253016640 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** mp_linear_algebra.c Routines for dealing with building and handling the final F_2 matrix (C) 2006 William Hart ******************************************************************************/ #include #include #include #include "../flint.h" #include "../memory-manager.h" #include "common.h" #include "mp_poly.h" #include "mp_linear_algebra.h" #include "block_lanczos.h" #include "mp_lprels.h" /*========================================================================= linear_algebra_init: Function: Allocate space for the various linear algebra structures ==========================================================================*/ void linear_algebra_init(linalg_t * la_inf, QS_t * qs_inf, poly_t * poly_inf) { la_col_t * matrix; mpz_t * Y_arr; unsigned long prec = qs_inf->prec+1; unsigned long small_primes = qs_inf->small_primes; const unsigned long buffer_size = 2*(qs_inf->num_primes + EXTRA_RELS + 200); // Allows for 1/2 of relations to be duplicates la_inf->small = (unsigned long *) flint_stack_alloc(small_primes); la_inf->factor = (fac_t *) flint_stack_alloc_bytes(sizeof(fac_t)*MAX_FACS); matrix = la_inf->matrix = (la_col_t *) flint_stack_alloc_bytes(sizeof(la_col_t)*(qs_inf->num_primes + EXTRA_RELS + 400)); la_inf->unmerged = la_inf->matrix + qs_inf->num_primes + EXTRA_RELS + 200; Y_arr = la_inf->Y_arr = (mpz_t *) flint_stack_alloc_bytes(sizeof(mpz_t)*buffer_size); la_inf->curr_rel = la_inf->relation = (unsigned long *) flint_stack_alloc(buffer_size*MAX_FACS*2); la_inf->qsort_arr = (la_col_t **) flint_stack_alloc(200); la_inf->rel_str = (char *) flint_stack_alloc(MPQS_STRING_LENGTH); la_inf->lpnew = flint_fopen("lpnew", "w"); FILE * lprels = flint_fopen("lprels","w"); fclose(lprels); for (unsigned long i = 0; i < buffer_size; i++) { mpz_init2(Y_arr[i], prec); } for (unsigned long i = 0; i < qs_inf->num_primes + EXTRA_RELS + 400; i++) { matrix[i].weight = 0; } la_inf->num_unmerged = 0; la_inf->num_lp_unmerged = 0; la_inf->columns = 0; la_inf->num_relations = 0; } void linear_algebra_clear(linalg_t * la_inf, QS_t * qs_inf) { la_col_t * matrix = la_inf->matrix; la_col_t * unmerged = la_inf->unmerged; mpz_t * Y_arr = la_inf->Y_arr; const unsigned long buffer_size = 4*(qs_inf->num_primes + EXTRA_RELS + 200)/2; for (unsigned long i = 0; i < buffer_size; i++) { mpz_clear(Y_arr[i]); } for (unsigned long i = 0; i < la_inf->columns; i++) // Clear all used columns { free_col(matrix + i); } for (unsigned long i = 0; i < la_inf->num_unmerged; i++) // Clear all used columns { free_col(unmerged + i); } fclose(la_inf->lpnew); flint_stack_release(); // Clear rel_str flint_stack_release(); // Clear qsort_array flint_stack_release(); // Clear relation flint_stack_release(); // Clear Y_arr flint_stack_release(); // Clear matrix and unmerged flint_stack_release(); // Clear factor flint_stack_release(); // Clear small } /*========================================================================= Compare relations: Function: Compare two relations; used by qsort ==========================================================================*/ int relations_cmp(const void *a, const void *b) { la_col_t * ra = *((la_col_t **) a); la_col_t * rb = *((la_col_t **) b); long point; if (ra->weight > rb->weight) return 1; else if (ra->weight < rb->weight) return -1; for (point = ra->weight-1; (ra->data[point] == rb->data[point]) && (point >= 0); point--) { ; } if (point == -1L) return 0; if (ra->data[point] > rb->data[point]) return 1; else if (ra->data[point] < rb->data[point]) return -1; } int relations_cmp2(const void *a, const void *b) { la_col_t * ra = (la_col_t *) a; la_col_t * rb = (la_col_t *) b; long point; if (ra->weight > rb->weight) return 1; else if (ra->weight < rb->weight) return -1; for (point = ra->weight-1; (ra->data[point] == rb->data[point]) && (point >= 0); point--) { ; } if (point == -1L) return 0; if (ra->data[point] > rb->data[point]) return 1; else if (ra->data[point] < rb->data[point]) return -1; } /*========================================================================== Merge sort: Function: Merge a list of sorted new relations into a list of existing sorted relations. Sort is done using a merge sort algorithm with a short stack. ===========================================================================*/ unsigned long merge_sort(linalg_t * la_inf) { la_col_t * matrix = la_inf->matrix; long columns = la_inf->columns; la_col_t ** qsort_arr = la_inf->qsort_arr; long num_unmerged = la_inf->num_unmerged; long dups = 0; int comp; for (long i = columns + num_unmerged - 1L; i >= dups; i--) { if (!columns) comp = -1; else if (!num_unmerged) comp = 1; else { comp = relations_cmp2(matrix + columns - 1L, qsort_arr[num_unmerged - 1L]); } switch (comp) { case -1: { copy_col(matrix + i, qsort_arr[num_unmerged - 1L]); clear_col(qsort_arr[num_unmerged - 1L]); num_unmerged--; break; } case 1 : { copy_col(matrix + i, matrix + columns - 1L); columns--; break; } case 0 : { free_col(qsort_arr[num_unmerged - 1L]); clear_col(qsort_arr[num_unmerged - 1L]); num_unmerged--; copy_col(matrix + i, matrix + columns - 1L); columns--; dups++; break; } } } columns = la_inf->columns + la_inf->num_unmerged - dups; if (dups) { for (unsigned long i = 0; i < columns; i++) { copy_col(matrix + i, matrix + i + dups); } } la_inf->columns = columns; columns = la_inf->num_unmerged - dups; la_inf->num_unmerged = 0; #if DUPS printf("%ld new, %ld dups\n", columns, dups); #endif return columns; } /*========================================================================== Merge relations: Function: Merge unmerged relations into the matrix ===========================================================================*/ unsigned long merge_relations(linalg_t * la_inf) { const unsigned long num_unmerged = la_inf->num_unmerged; la_col_t * unmerged = la_inf->unmerged; la_col_t ** qsort_arr = la_inf->qsort_arr; if (num_unmerged) { for (unsigned long i = 0; i < num_unmerged; i++) { qsort_arr[i] = unmerged + i; } qsort(qsort_arr, num_unmerged, sizeof(la_col_t *), relations_cmp); if ((la_inf->num_relations & 7) == 0) printf("%ld relations found\n", la_inf->num_relations); return merge_sort(la_inf); } return 0; } unsigned long merge_lp_relations(QS_t * qs_inf, poly_t * poly_inf, linalg_t * la_inf) { FILE * comb; unsigned long combined; fclose(la_inf->lpnew); sort_lp_file("lpnew"); comb = flint_fopen("comb","w"); mergesort_lp_file("lprels", "lpnew", "tmp", comb); fclose(comb); la_inf->lpnew = flint_fopen("lpnew","w"); mpz_t factor; mpz_init(factor); comb = flint_fopen("comb", "r"); combined = combine_large_primes(qs_inf, la_inf, poly_inf, comb, factor); mpz_clear(factor); fclose(comb); la_inf->num_lp_unmerged = 0; return combined; } /*========================================================================== Insert large prime partial relation: Function: Insert the partial relation into the lprels file, return the number of full relations obtained after any sorting and merging ===========================================================================*/ unsigned long insert_lp_relation(QS_t * qs_inf, linalg_t * la_inf, poly_t * poly_inf, mpz_t Y, mpz_t res) { char * rel_str = la_inf->rel_str; char * rel_ptr = rel_str; char Q_str[200]; char Y_str[200]; FILE * LPNEW = la_inf->lpnew; unsigned long small_primes = qs_inf->small_primes; unsigned long * small = la_inf->small; const unsigned long num_factors = la_inf->num_factors; fac_t * factor = la_inf->factor; unsigned long fac_num = 0; for (unsigned long i = 0; i < small_primes; i++) { if (small[i]) add_factor(&rel_ptr, (unsigned long) small[i], (unsigned long) i); } for (unsigned long i = 0; i < num_factors; i++) { add_factor(&rel_ptr, (unsigned long) factor[i].exp, (unsigned long) factor[i].ind); } add_0(&rel_ptr); gmp_sprintf(Y_str, "%Zd\0", Y); gmp_sprintf(Q_str, "%Zd\0", res); fprintf(LPNEW, "%s @ %s :%s\n", Q_str, Y_str, rel_str); la_inf->num_lp_unmerged++; if ((la_inf->num_lp_unmerged %256) == 0) printf("%ld partials\n", la_inf->num_lp_unmerged); if (la_inf->num_lp_unmerged == 500) { return merge_lp_relations(qs_inf, poly_inf, la_inf); } return 0; } /*========================================================================== Insert relation: Function: Insert the relation into the matrix and store the Y value ===========================================================================*/ unsigned long insert_relation(QS_t * qs_inf, linalg_t * la_inf, poly_t * poly_inf, mpz_t Y) { la_col_t * unmerged = la_inf->unmerged; unsigned long num_unmerged = la_inf->num_unmerged; unsigned long small_primes = qs_inf->small_primes; unsigned long * small = la_inf->small; const unsigned long num_factors = la_inf->num_factors; fac_t * factor = la_inf->factor; unsigned long * curr_rel = la_inf->curr_rel; unsigned long fac_num = 0; clear_col(unmerged + num_unmerged); for (unsigned long i = 0; i < small_primes; i++) { if (small[i] & 1) insert_col_entry(unmerged + num_unmerged, i); if (small[i]) { curr_rel[2*fac_num + 1] = i; curr_rel[2*fac_num + 2] = small[i]; fac_num++; } } for (unsigned long i = 0; i < num_factors; i++) { if (factor[i].exp & 1) insert_col_entry(unmerged + num_unmerged, factor[i].ind); curr_rel[2*fac_num + 1] = factor[i].ind; curr_rel[2*fac_num + 2] = factor[i].exp; fac_num++; } curr_rel[0] = fac_num; unmerged[num_unmerged].orig = la_inf->num_relations; mpz_set(la_inf->Y_arr[la_inf->num_relations], Y); #if TEST3 mpz_t X, temp, temp2; mpz_init(X); mpz_init(temp); mpz_init(temp2); mpz_set_ui(X, 1); for (unsigned long j = 0; j < curr_rel[0]; j++) { mpz_set_ui(temp, qs_inf->factor_base[curr_rel[2*j + 1]].p); mpz_pow_ui(temp, temp, curr_rel[2*j + 2]); mpz_mul(X, X, temp); } mpz_mod(X, X, qs_inf->mpz_n); mpz_mul(temp, Y, Y); mpz_mod(temp, temp, qs_inf->mpz_n); if (mpz_cmp(X, temp) != 0) { mpz_add(temp2, temp, X); if (mpz_cmp(temp2, qs_inf->mpz_n) != 0) { gmp_printf("X = %Zd (mod N) != \nY^2 = %Zd (mod N)\n\n", X, temp); gmp_printf("n = %Zd\n", qs_inf->mpz_n); } } mpz_clear(X); mpz_clear(temp); mpz_clear(temp2); #endif la_inf->curr_rel += MAX_FACS*2; la_inf->num_unmerged++; la_inf->num_relations++; if (la_inf->num_unmerged == 100) { return merge_relations(la_inf); } return 0; } flint-1.011/QS/poly.c0000644017361200017500000003227611025357253014206 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** poly.c Routines for managing polynomials (C) 2006 William Hart ******************************************************************************/ #include #include #include #include "../flint.h" #include "../memory-manager.h" #include "../long_extras.h" #include "../longlong_wrapper.h" #include "../longlong.h" #include "poly.h" #include "common.h" /*========================================================================= poly_init: Function: computes parameters for the polynomials and initialises the various structures required ==========================================================================*/ void poly_init(QS_t * qs_inf, poly_t * poly_inf, mpz_t N) { unsigned long num_primes = qs_inf->num_primes; unsigned long s = (qs_inf->bits-1)/28+1; prime_t * factor_base = qs_inf->factor_base; unsigned long fact_approx, fact, span; long min; poly_inf->s = s; poly_inf->B_terms = (unsigned long*) flint_stack_alloc(s); poly_inf->A_ind = (unsigned long*) flint_stack_alloc(s); poly_inf->A_modp = (unsigned long*) flint_stack_alloc(s); poly_inf->A_inv2B = (unsigned long**) flint_stack_alloc(s); poly_inf->inv_p2 = (double*) flint_stack_alloc_bytes(s*sizeof(double)); poly_inf->A_inv = (unsigned long*) flint_stack_alloc(num_primes); poly_inf->soln1 = (unsigned long*) flint_stack_alloc(num_primes); poly_inf->soln2 = (unsigned long*) flint_stack_alloc(num_primes); unsigned long ** A_inv2B = poly_inf->A_inv2B; A_inv2B[0] = (unsigned long *) flint_stack_alloc(num_primes*s); mpz_init(poly_inf->C); for (unsigned long i = 1; i < s; i++) { A_inv2B[i] = A_inv2B[i-1] + num_primes; } mpz_t temp; mpz_init(temp); mpz_mul_ui(temp, N, 2*qs_inf->k); mpz_sqrt(temp, temp); mpz_div_ui(temp, temp, 300); poly_inf->target_A = mpz_get_ui(temp); mpz_root(temp, temp, s); fact_approx = mpz_get_ui(temp); for (fact = 0; fact_approx >= factor_base[fact].p; fact++); span = num_primes/s/s/2; if (span < 4*s) span = 4*s; min = fact - span/2; if (min < SMALL_PRIMES) min = SMALL_PRIMES; if (min + span >= qs_inf->num_primes) span = num_primes - min - 1; fact = min + span/2; #if POLY_PARAMS printf("min = FB[%ld], span = %ld, number of factors = %ld\n", min, span, s); #endif poly_inf->min = min; poly_inf->fact = fact; poly_inf->span = span; mpz_clear(temp); } void poly_clear(poly_t * poly_inf) { mpz_clear(poly_inf->C); flint_stack_release(); // release all A_inv2B[i] flint_stack_release(); // release soln1 flint_stack_release(); // release soln2 flint_stack_release(); // release A_inv flint_stack_release(); // release inv_p2 flint_stack_release(); // release A_inv2B flint_stack_release(); // release A_modp flint_stack_release(); // release A_ind flint_stack_release(); // release B_terms } /*========================================================================= compute_A: Function: Compute a new polynomial A value The function attempts to pick A near to an optimal size ==========================================================================*/ void compute_A(QS_t * qs_inf, poly_t * poly_inf) { unsigned long min = poly_inf->min; unsigned long span = poly_inf->span; unsigned long s = poly_inf->s; unsigned long * A_ind = poly_inf->A_ind; prime_t * factor_base = qs_inf->factor_base; unsigned long factor, i, p; unsigned long diff, best_diff, best1, best2; unsigned long A; if (s <= 4) { A_ind[0] = z_randint(span) + min; do { A_ind[1] = z_randint(span) + min; } while (A_ind[0] == A_ind[1]); } if (s == 2) A = factor_base[A_ind[0]].p * factor_base[A_ind[1]].p; if ((s == 3) || (s == 4)) { do { A_ind[2] = z_randint(span) + min; } while ((A_ind[0] == A_ind[2]) || (A_ind[1] == A_ind[2])); A = factor_base[A_ind[0]].p * factor_base[A_ind[1]].p * factor_base[A_ind[2]].p; } if (s == 4) { factor = (poly_inf->target_A - 1) / A + 1; for (i = min; i < min+span; i++) { if ((factor_base[i].p > factor) && (i != A_ind[0]) && (i != A_ind[1]) && (i != A_ind[2])) break; } if (i == min + span) { i--; while ((i == A_ind[0]) || (i == A_ind[1]) || (i == A_ind[2])) i--; } A_ind[3] = i; A *= factor_base[A_ind[3]].p; } if (s == 5) { A_ind[0] = ((z_randint(span) + min) | 1); if (A_ind[0] == min + span) A_ind[0] -= 2; do { A_ind[1] = ((z_randint(span) + min) | 1); if (A_ind[1] == min + span) A_ind[1] -= 2; } while (A_ind[0] == A_ind[1]); do { A_ind[2] = ((z_randint(span) + min) | 1); if (A_ind[2] == min + span) A_ind[2] -= 2; } while ((A_ind[0] == A_ind[2]) || (A_ind[1] == A_ind[2])); A = factor_base[A_ind[0]].p * factor_base[A_ind[1]].p * factor_base[A_ind[2]].p; factor = poly_inf->target_A / A; for (i = 0; i < 8; i++) { A_ind[3] = ((z_randint(span) + min) & -2L); if (A_ind[3] < min) A_ind[3]+=2; do { A_ind[4] = ((z_randint(span) + min) & -2L); if (A_ind[4] < min) A_ind[4]+=2; } while (A_ind[3] == A_ind[4]); if (i == 0) { best_diff = FLINT_ABS(factor_base[A_ind[3]].p * factor_base[A_ind[4]].p - factor); best1 = A_ind[3]; best2 = A_ind[4]; continue; } diff = FLINT_ABS(factor_base[A_ind[3]].p * factor_base[A_ind[4]].p - factor); if (diff < best_diff) { best_diff = diff; best1 = A_ind[3]; best2 = A_ind[4]; } } A_ind[3] = best1; A_ind[4] = best2; A = A * factor_base[A_ind[3]].p * factor_base[A_ind[4]].p; } poly_inf->A = A; #if POLY_A if ((s == 4) || (s == 5)) printf("A = %ld, target A = %ld\n", A, poly_inf->target_A); #endif for (i = 0; i < s; i++) { p = factor_base[A_ind[i]].p; poly_inf->inv_p2[i] = z_precompute_inverse(p*p); } } /*========================================================================= compute B terms: Function: Compute the terms from which the B values of the polynomials are constructed and compute the starting B coefficient ==========================================================================*/ void compute_B_terms(QS_t * qs_inf, poly_t * poly_inf) { unsigned long s = poly_inf->s; unsigned long * A_ind = poly_inf->A_ind; unsigned long * A_modp = poly_inf->A_modp; unsigned long * B_terms = poly_inf->B_terms; prime_t * factor_base = qs_inf->factor_base; unsigned long A = poly_inf->A; unsigned long B; unsigned long p, temp, temp2, i; double pinv; for (i = 0; i < s; i++) { p = factor_base[A_ind[i]].p; pinv = factor_base[A_ind[i]].pinv; temp2 = (temp = z_div_64_precomp(A, p, pinv)); A_modp[i] = (temp = z_mod_64_precomp(temp, p, pinv)); temp = z_invert(temp, p); temp = z_mulmod_precomp(temp, qs_inf->sqrts[A_ind[i]], p, pinv); if (temp > p/2) temp = p - temp; B_terms[i] = temp*temp2; } B = B_terms[0]; for (i = 1; i < s; i++) { B += B_terms[i]; } poly_inf->B = B; } /*========================================================================= Compute offsets and hypercube polynomial correction factors: Function: Compute the starting offsets in the sieve for each prime and the polynomial correction factors used by the hypercube method ==========================================================================*/ void compute_off_adj(QS_t * qs_inf, poly_t * poly_inf) { unsigned long num_primes = qs_inf->num_primes; unsigned long A = poly_inf->A; unsigned long B = poly_inf->B; unsigned long * A_inv = poly_inf->A_inv; unsigned long ** A_inv2B = poly_inf->A_inv2B; unsigned long * B_terms = poly_inf->B_terms; unsigned long * soln1 = poly_inf->soln1; unsigned long * soln2 = poly_inf->soln2; uint32_t * sqrts = qs_inf->sqrts; prime_t * factor_base = qs_inf->factor_base; unsigned long s = poly_inf->s; unsigned long p, temp; double pinv; for (unsigned long i = 2; i < num_primes; i++) // skip k and 2 { p = factor_base[i].p; pinv = factor_base[i].pinv; A_inv[i] = z_invert(z_mod_64_precomp(A, p, pinv), p); for (unsigned long j = 0; j < s; j++) { temp = z_mod_64_precomp(B_terms[j], p, pinv); temp = z_mulmod_precomp(temp, A_inv[i], p, pinv); temp *= 2; if (temp >= p) temp -= p; A_inv2B[j][i] = temp; } temp = z_mod_64_precomp(B, p, pinv); temp = sqrts[i] + p - temp; temp *= A_inv[i]; temp += SIEVE_SIZE/2; soln1[i] = z_mod_64_precomp(temp, p, pinv); // Consider using z_mod_precomp temp = p - sqrts[i]; if (temp == p) temp -= p; temp = z_mulmod_precomp(temp, A_inv[i], p, pinv); temp *= 2; if (temp >= p) temp -= p; soln2[i] = temp+soln1[i]; if (soln2[i] >= p) soln2[i] -= p; } } /*========================================================================= Compute offsets and hypercube polynomial correction factors: Function: Compute the starting offsets in the sieve for each prime and the polynomial correction factors used by the hypercube method ==========================================================================*/ void compute_A_factor_offsets(QS_t * qs_inf, poly_t * poly_inf) { unsigned long s = poly_inf->s; unsigned long * A_ind = poly_inf->A_ind; unsigned long * A_modp = poly_inf->A_modp; unsigned long * soln1 = poly_inf->soln1; unsigned long * soln2 = poly_inf->soln2; unsigned long p, D; unsigned long * n = qs_inf->n; unsigned long B = poly_inf->B; unsigned long temp, temp2, B_modp2, index, p2; prime_t * factor_base = qs_inf->factor_base; double * inv_p2 = poly_inf->inv_p2; double pinv; for (unsigned long j = 0; j < s; j++) { index = A_ind[j]; p = factor_base[index].p; p2 = p*p; pinv = factor_base[index].pinv; D = z_ll_mod_precomp(n[2], n[1], p*p, inv_p2[j]); if ((long) B < 0) { B_modp2 = z_mod_64_precomp(-B, p2, inv_p2[j]); B_modp2 = p2 - B_modp2; if (B_modp2 == p2) B_modp2 = 0; } else B_modp2 = z_mod_64_precomp(B, p2, inv_p2[j]); temp = B_modp2*A_modp[j]; temp = z_mod_64_precomp(temp, p, pinv); temp2 = z_invert(temp, p); D -= (B_modp2*B_modp2); if ((long) D < 0) temp = -z_div_64_precomp(-D, p, pinv); else temp = -z_div_64_precomp(-D, p, pinv); temp *= temp2; temp += SIEVE_SIZE/2; if ((long) temp < 0) { temp = p - z_mod_64_precomp(-temp, p, pinv); if (temp == p) temp = 0; } else temp = z_mod_64_precomp(temp, p, pinv); soln1[index] = temp; soln2[index] = -1L; } } /*========================================================================= Compute C: Function: Compute the C coefficient of the polynomial with the current A and B values ==========================================================================*/ void compute_C(QS_t * qs_inf, poly_t * poly_inf) { unsigned long A = poly_inf->A; unsigned long B = poly_inf->B; mpz_t * C = &poly_inf->C; mpz_t * mpz_n = &qs_inf->mpz_n; if ((long) B < 0L) B = -B; mpz_set_ui(*C, B); mpz_mul_ui(*C, *C, B); mpz_sub(*C, *C, *mpz_n); mpz_divexact_ui(*C, *C, A); } flint-1.011/QS/mp_sieve.c0000644017361200017500000003431211025357253015023 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** mp_sieve.c Routines for doing and managing sieving (C) 2006 William Hart ******************************************************************************/ #include #include #include #include #include "../flint.h" #include "../long_extras.h" #include "common.h" #include "mp_poly.h" #include "mp_linear_algebra.h" #include "mp_sieve.h" void get_sieve_params(QS_t * qs_inf) { unsigned long bits = qs_inf->bits; unsigned long i; prime_t * factor_base = qs_inf->factor_base; unsigned long num_primes = qs_inf->num_primes; unsigned long mult = qs_inf->num_primes; for (i = 0; i < PTABSIZE; i++) { if (prime_tab[i][0] > bits) break; } qs_inf->sieve_size = prime_tab[i-1][2]; qs_inf->small_primes = prime_tab[i-1][3]; qs_inf->large_prime = prime_tab[i-1][4]*factor_base[num_primes-1].p; qs_inf->error_bits = round(log(qs_inf->large_prime)/log(2.0))+3; // 2, 5, 6 printf("Error bits = %ld\n", qs_inf->error_bits); } void do_sieving2(QS_t * qs_inf, poly_t * poly_inf, unsigned char * sieve) { unsigned long num_primes = qs_inf->num_primes; uint32_t * soln1 = poly_inf->soln1; uint32_t * soln2 = poly_inf->soln2; prime_t * factor_base = qs_inf->factor_base; unsigned long sieve_size = qs_inf->sieve_size; unsigned char * end = sieve + sieve_size; unsigned char * sizes = qs_inf->sizes; unsigned long sieve_fill = qs_inf->sieve_fill; unsigned long small_primes = qs_inf->small_primes; unsigned char * bound; unsigned char * pos1; unsigned char * pos2; unsigned long size; unsigned long p; memset(sieve, sieve_fill, sieve_size); *end = 255; const unsigned long second_prime = FLINT_MIN(SECOND_PRIME, num_primes); for (unsigned long prime = small_primes; prime < second_prime; prime++) { if (soln2[prime] == -1) continue; p = factor_base[prime].p; size = sizes[prime]; pos1 = sieve + soln1[prime]; pos2 = sieve + soln2[prime]; bound = end - p; while (bound - pos1 > 0) { (*pos1)+=size, pos1+=p, (*pos2)+=size, pos2+=p; } if ((end - pos1 > 0) && (end - pos2 > 0)) { (*pos1)+=size, pos1+=p, (*pos2)+=size, pos2+=p; } if (end - pos2 > 0) { (*pos2)+=size; } if (end - pos1 > 0) { (*pos1)+=size; } } for (unsigned long prime = second_prime; prime < num_primes; prime++) { p = factor_base[prime].p; size = sizes[prime]; pos1 = sieve + soln1[prime]; pos2 = sieve + soln2[prime]; if (end - pos2 > 0) { (*pos2)+=size; } if (end - pos1 > 0) { (*pos1)+=size; } } } void update_offsets(unsigned long poly_add, uint32_t * poly_corr, QS_t * qs_inf, poly_t * poly_inf) { unsigned long num_primes = qs_inf->num_primes; uint32_t * soln1 = poly_inf->soln1; uint32_t * soln2 = poly_inf->soln2; prime_t * factor_base = qs_inf->factor_base; unsigned long p, correction; for (unsigned long prime = 2; prime < num_primes; prime++) { if (soln2[prime] == -1) continue; p = factor_base[prime].p; correction = (poly_add ? p - poly_corr[prime] : poly_corr[prime]); soln1[prime] += correction; if (soln1[prime] >= p) soln1[prime] -= p; soln2[prime] += correction; if (soln2[prime] >= p) soln2[prime] -= p; } } void do_sieving(QS_t * qs_inf, poly_t * poly_inf, unsigned char * sieve, unsigned long first_prime, unsigned long second_prime, unsigned long M, int first, int last) { unsigned long num_primes = qs_inf->num_primes; uint32_t * soln1 = poly_inf->soln1; uint32_t * soln2 = poly_inf->soln2; uint32_t * posn1 = poly_inf->posn1; uint32_t * posn2 = poly_inf->posn2; prime_t * factor_base = qs_inf->factor_base; unsigned long small_primes = qs_inf->small_primes; unsigned long p, correction; register unsigned char * position; unsigned char * end = sieve + M; unsigned char * sizes = qs_inf->sizes; register unsigned char * pos1; unsigned char * pos2; register unsigned char * bound; unsigned long size; long diff; for (unsigned long prime = first_prime; prime < second_prime; prime++) { if (soln2[prime] == -1) continue; p = factor_base[prime].p; size = sizes[prime]; if (first) { pos1 = sieve + soln1[prime]; pos2 = sieve + soln2[prime]; } else { pos1 = sieve + posn1[prime]; pos2 = sieve + posn2[prime]; } bound = end - p; while (bound - pos1 > 0) { (*pos1)+=size, pos1+=p, (*pos2)+=size, pos2+=p; } if ((end - pos1 > 0) && (end - pos2 > 0)) { (*pos1)+=size, pos1+=p, (*pos2)+=size, pos2+=p; } if (end - pos2 > 0) { (*pos2)+=size, pos2+=p; } if (end - pos1 > 0) { (*pos1)+=size, pos1+=p; } if (!last) { posn1[prime] = pos1 - sieve; posn2[prime] = pos2 - sieve; } } } void do_sieving3(QS_t * qs_inf, poly_t * poly_inf, unsigned char * sieve, unsigned long first_prime, unsigned long second_prime, unsigned long M) { uint32_t * soln1 = poly_inf->soln1; uint32_t * soln2 = poly_inf->soln2; prime_t * factor_base = qs_inf->factor_base; unsigned long p; unsigned char * end = sieve + M; unsigned char * sizes = qs_inf->sizes; register unsigned char * pos1; unsigned char * pos2; unsigned long size; for (unsigned long prime = first_prime; prime < second_prime; prime++) { if (soln2[prime] == -1) continue; p = factor_base[prime].p; size = sizes[prime]; pos1 = sieve + soln1[prime]; pos2 = sieve + soln2[prime]; while (end - pos2 > 0) { (*pos2)+=size, pos2+=p; } while (end - pos1 > 0) { (*pos1)+=size, pos1+=p; } } } /*========================================================================== evaluate_candidate: Function: determine whether a given sieve entry is a relation ===========================================================================*/ unsigned long evaluate_candidate(linalg_t * la_inf, QS_t * qs_inf, poly_t * poly_inf, unsigned long i, unsigned char * sieve) { unsigned long bits, exp, extra_bits, modp, prime; unsigned long num_primes = qs_inf->num_primes; prime_t * factor_base = qs_inf->factor_base; uint32_t * soln1 = poly_inf->soln1; uint32_t * soln2 = poly_inf->soln2; unsigned long * small = la_inf->small; unsigned long sieve_fill = qs_inf->sieve_fill; unsigned long sieve_size = qs_inf->sieve_size; fac_t * factor = la_inf->factor; mpz_t * A = &poly_inf->A_mpz; mpz_t * B = &poly_inf->B_mpz; unsigned long error_bits = qs_inf->error_bits; unsigned long small_primes = qs_inf->small_primes; unsigned long large_prime = qs_inf->large_prime; unsigned long num_factors = 0; unsigned long j; mpz_t * C = &poly_inf->C; unsigned long relations = 0; double pinv; const unsigned long second_prime = FLINT_MIN(SECOND_PRIME, num_primes); mpz_t X, Y, res, p; mpz_init(X); mpz_init(Y); mpz_init(res); mpz_init(p); #if POLYS printf("X = %ld\n", i); gmp_printf("%ZdX^2+2*%ZdX+%Zd\n", A, B, C); #endif mpz_set_ui(X, i); mpz_sub_ui(X, X, sieve_size/2); //X mpz_mul(Y, X, *A); mpz_add(Y, Y, *B); // Y = AX+B mpz_add(res, Y, *B); mpz_mul(res, res, X); mpz_add(res, res, *C); // res = AX^2+2BX+C bits = mpz_sizeinbase(res, 2); bits -= error_bits; extra_bits = 0; mpz_set_ui(p, 2); // divide out by powers of 2 exp = mpz_remove(res, res, p); #if RELATIONS if (exp) printf("2^%ld ", exp); #endif extra_bits += exp; small[1] = exp; if (factor_base[0].p != 1) // divide out powers of the multiplier { mpz_set_ui(p, factor_base[0].p); exp = mpz_remove(res, res, p); if (exp) extra_bits += qs_inf->sizes[0]; small[0] = exp; #if RELATIONS if (exp) printf("%ld^%ld ", factor_base[0].p, exp); #endif } else small[0] = 0; for (unsigned long j = 2; j < small_primes; j++) // pull out small primes { prime = factor_base[j].p; pinv = factor_base[j].pinv; modp = z_mod2_precomp(i, prime, pinv); if ((modp == soln1[j]) || (modp == soln2[j])) { mpz_set_ui(p, prime); exp = mpz_remove(res, res, p); extra_bits += qs_inf->sizes[j]; small[j] = exp; #if RELATIONS gmp_printf("%Zd^%ld ", p, exp); #endif } else small[j] = 0; } if (extra_bits + sieve[i] > bits+sieve_fill) { sieve[i] += extra_bits - sieve_fill; for (j = small_primes; (j < second_prime) && (extra_bits < sieve[i]); j++) // pull out remaining primes { prime = factor_base[j].p; pinv = factor_base[j].pinv; modp = z_mod2_precomp(i, prime, pinv); if (soln2[j] != -1) { if ((modp == soln1[j]) || (modp == soln2[j])) { mpz_set_ui(p, prime); exp = mpz_remove(res, res, p); #if RELATIONS gmp_printf("%Zd^%ld ", p, exp); #endif extra_bits += qs_inf->sizes[j]; factor[num_factors].ind = j; factor[num_factors++].exp = exp; } } else { mpz_set_ui(p, prime); exp = mpz_remove(res, res, p); factor[num_factors].ind = j; factor[num_factors++].exp = exp+1; #if RELATIONS if (exp) gmp_printf("%Zd^%ld ", p, exp); #endif } } for ( ; (j < num_primes) && (extra_bits < sieve[i]); j++) // pull out remaining primes { if ((i == soln1[j]) || (i == soln2[j])) { prime = factor_base[j].p; mpz_set_ui(p, prime); exp = mpz_remove(res, res, p); #if RELATIONS gmp_printf("%Zd^%ld ", p, exp); #endif extra_bits += qs_inf->sizes[j]; factor[num_factors].ind = j; factor[num_factors++].exp = exp; } } if (mpz_cmpabs_ui(res, 1) == 0) // We've found a relation { unsigned long * A_ind = poly_inf->A_ind; for (unsigned long i = 0; i < poly_inf->s; i++) // Commit any outstanding A factors { if (A_ind[i] >= j) { factor[num_factors].ind = A_ind[i]; factor[num_factors++].exp = 1; } } la_inf->num_factors = num_factors; relations += insert_relation(qs_inf, la_inf, poly_inf, Y); // Insert the relation in the matrix if (la_inf->num_relations >= qs_inf->num_primes + EXTRA_RELS + 200) { printf("Error: too many duplicate relations!\n"); abort(); } goto cleanup; } else if(mpz_cmpabs_ui(res, large_prime) < 0) { if (mpz_sgn(res) < 0) mpz_neg(res, res); unsigned long * A_ind = poly_inf->A_ind; for (unsigned long i = 0; i < poly_inf->s; i++) // Commit any outstanding A factors { if (A_ind[i] >= j) { factor[num_factors].ind = A_ind[i]; factor[num_factors++].exp = 1; } } la_inf->num_factors = num_factors; relations += insert_lp_relation(qs_inf, la_inf, poly_inf, Y, res); // Insert the relation in the matrix goto cleanup; } } cleanup: #if RELATIONS printf("\n"); #endif mpz_clear(X); mpz_clear(Y); mpz_clear(res); mpz_clear(p); return relations; } /*========================================================================== evaluateSieve: Function: searches sieve for relations and sticks them into a matrix ===========================================================================*/ unsigned long evaluate_sieve(linalg_t * la_inf, QS_t * qs_inf, poly_t * poly_inf, unsigned char * sieve) { unsigned long i = 0; unsigned long j = 0; unsigned long * sieve2 = (unsigned long *) sieve; unsigned long sieve_size = qs_inf->sieve_size; unsigned long rels = 0; while (j < sieve_size/sizeof(unsigned long)) { #if FLINT_BITS == 64 while (!(sieve2[j] & 0x8080808080808080U)) j++; #else while (!(sieve2[j] & 0x80808080U)) j++; #endif i = j*sizeof(unsigned long); while ((i < (j+1)*sizeof(unsigned long)) && (i < sieve_size)) { if (sieve[i] > 128) { rels += evaluate_candidate(la_inf, qs_inf, poly_inf, i, sieve); } i++; } j++; } return rels; } flint-1.011/QS/mp_poly.h0000644017361200017500000000446011025357253014701 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** mp_poly.h Header file for poly.c. (C) 2006 William Hart ******************************************************************************/ #ifndef MPPOLY_H #define MPPOLY_H #include #include "common.h" #define POLY_PARAMS 1 // Print the parameters being used to choose polynomials #define POLY_A 0 // Print target A and actual A #define TEST_C 0 // Test polynomial coefficients #define B_TERMS 0 // Print out the B_terms typedef struct poly_s { unsigned long s; unsigned long fact, span, min; unsigned long * target_A; unsigned long * A; unsigned long * B; mpz_t A_mpz; mpz_t B_mpz; mpz_t C; unsigned long * A_ind; unsigned long * A_modp; uint32_t * A_inv; uint32_t * soln1; uint32_t * soln2; uint32_t * posn1; uint32_t * posn2; uint32_t ** A_inv2B; double * inv_p2; unsigned long * B_terms; } poly_t; void poly_init(QS_t * qs_inf, poly_t * poly_inf, mpz_t N); void poly_clear(poly_t * poly_inf); void compute_A(QS_t * qs_inf, poly_t * poly_inf); void compute_B_terms(QS_t * qs_inf, poly_t * poly_inf); void compute_off_adj(QS_t * qs_inf, poly_t * poly_inf); void compute_A_factor_offsets(QS_t * qs_inf, poly_t * poly_inf); void compute_B_C(QS_t * qs_inf, poly_t * poly_inf); #endif flint-1.011/QS/sieve.h0000644017361200017500000000363311025357253014336 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** sieve.h Header file for sieve.c. (C) 2006 William Hart ******************************************************************************/ #ifndef SIEVE_H #define SIEVE_H #include #include "linear_algebra.h" #include "common.h" #define RELATIONS 1 // Print out relations as they are generated #define POLYS 0 // Print out polynomials and offsets in candidate evaluation void do_sieving(QS_t * qs_inf, poly_t * poly_inf, unsigned char * sieve); void update_offsets(unsigned long poly_add, unsigned long * poly_corr, QS_t * qs_inf, poly_t * poly_inf); unsigned long evaluate_sieve(linalg_t * la_inf, QS_t * qs_inf, poly_t * poly_inf, unsigned char * sieve); unsigned long evaluate_candidate(QS_t * qs_inf, poly_t * poly_inf, unsigned long i, unsigned char * sieve); #endif flint-1.011/QS/mp_factor_base.c0000644017361200017500000001766611025357253016175 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** mp_factor_base.c Routines for generating and maintaining the factor base primes including the multiplier (C) 2006 William Hart ******************************************************************************/ #include #include #include #include "../flint.h" #include "../memory-manager.h" #include "../long_extras.h" #include "mp_factor_base.h" /*========================================================================= num_FB_primes: Function: retrieve the number of factor base primes to use from table ==========================================================================*/ unsigned long num_FB_primes(unsigned long bits) { unsigned long i; for (i = 0; i < PTABSIZE; i++) { if (prime_tab[i][0] > bits) break; } return prime_tab[i-1][1]; } /*========================================================================= sqrts_init: Function: allocate space for the factorbase primes and associated info ==========================================================================*/ void sqrts_init(QS_t * qs_inf) { qs_inf->sqrts = (uint32_t *) flint_stack_alloc_bytes(qs_inf->num_primes*sizeof(uint32_t)); } void sqrts_clear(void) { flint_stack_release(); } /*========================================================================= primes_init: Function: allocate space for the factorbase primes and associated info ==========================================================================*/ void primes_init(QS_t * qs_inf) { unsigned long bits = qs_inf->bits; // set bits to the number of bits of kn qs_inf->num_primes = num_FB_primes(bits); qs_inf->factor_base = (prime_t *) flint_stack_alloc_bytes(qs_inf->num_primes*sizeof(prime_t)); } void primes_clear(void) { flint_stack_release(); } /*=========================================================================== Compute Prime Sizes: Function: Computes the size in bits of each prime in the factor base ===========================================================================*/ void compute_sizes(QS_t * qs_inf) { unsigned long num_primes = qs_inf->num_primes; qs_inf->sizes = (unsigned char *) flint_stack_alloc_bytes(num_primes); unsigned char * sizes = qs_inf->sizes; prime_t * factor_base = qs_inf->factor_base; for (unsigned long i = 0; i < num_primes; i++) { sizes[i] = (unsigned char) round(log(factor_base[i].p)/log(2.0)); } return; } void sizes_clear(void) { flint_stack_release(); } /*========================================================================= Knuth-Schroeppel algorithm: Function: Find the best multiplier to use (allows 2 as a multiplier). The general idea is to find a multiplier k such that kn will be faster to factor. This is achieved by making kn a square modulo lots of small primes. These primes will then be factor base primes, and the more small factor base primes, the faster relations will accumulate, since they hit the sieving interval more often. Also computes approximate inverses and modular square roots primes that are suitable as factor base primes ==========================================================================*/ unsigned long knuth_schroeppel(QS_t * qs_inf) { float best_factor = -10.0f; unsigned long multiplier = 1; unsigned long nmod8, mod8, multindex, prime, nmod, mult; const unsigned long max_fb_primes = qs_inf->num_primes; unsigned long fb_prime = 2; // leave space for the multiplier and 2 float factors[NUMMULTS]; float logpdivp; double pinv; int kron; uint32_t * sqrts = qs_inf->sqrts; fmpz_t n = qs_inf->n; nmod8 = n[1]%8; mpz_t r; for (multindex = 0; multindex < NUMMULTS; multindex++) { mod8 = ((nmod8*multipliers[multindex])%8); factors[multindex] = 0.34657359; // ln2/2 if (mod8 == 1) factors[multindex] *= 4.0; if (mod8 == 5) factors[multindex] *= 2.0; factors[multindex] -= (log((float) multipliers[multindex]) / 2.0); } prime = 3; while ((prime < KSMAX) && (fb_prime < max_fb_primes)) { pinv = z_precompute_inverse(prime); logpdivp = log((float)prime) / (float)prime; // log p / p nmod = mpn_mod_1(n + 1, n[0], prime); if (nmod == 0) return prime; kron = z_jacobi_precomp(nmod, prime, pinv); for (multindex = 0; multindex < NUMMULTS; multindex++) { mult = multipliers[multindex]; if (mult >= prime) { if (mult >= prime*prime) mult = mult%prime; else mult = z_mod_precomp(mult, prime, pinv); } if (mult == 0) factors[multindex] += logpdivp; else if (kron*z_jacobi_precomp(mult, prime, pinv) == 1) factors[multindex] += 2.0*logpdivp; } prime = z_nextprime(prime); } for (multindex=0; multindex best_factor) { best_factor = factors[multindex]; multiplier = multipliers[multindex]; } } qs_inf->k = multiplier; return 0; } /*========================================================================= Compute Factor Base: Function: Compute all the primes p for which n is a quadratic residue mod p. Compute square roots of n modulo each p. ==========================================================================*/ unsigned long compute_factor_base(QS_t * qs_inf) { unsigned long fb_prime = 2; unsigned long multiplier = qs_inf->k; prime_t * factor_base = qs_inf->factor_base; uint32_t * sqrts = qs_inf->sqrts; unsigned long num_primes = num_FB_primes(qs_inf->bits); unsigned long prime, nmod; double pinv; fmpz_t n = qs_inf->n; long kron; factor_base[0].p = multiplier; factor_base[0].pinv = z_precompute_inverse(multiplier); factor_base[1].p = 2; prime = 2; while (fb_prime < num_primes) { prime = z_nextprime(prime); pinv = z_precompute_inverse(prime); nmod = mpn_mod_1(n + 1, n[0], prime); if (nmod == 0) { if (z_mod_precomp(multiplier, prime, pinv) != 0) return prime; } kron = z_jacobi_precomp(nmod, prime, pinv); if (kron == 1) { factor_base[fb_prime].p = prime; factor_base[fb_prime].pinv = pinv; sqrts[fb_prime] = z_sqrtmod(nmod, prime); fb_prime++; } } printf("Largest prime = %ld\n", prime); qs_inf->num_primes = fb_prime; return 0; } flint-1.011/QS/mp_lprels.h0000644017361200017500000000417211025357253015217 0ustar tabbotttabbott/*============================================================================ Copyright 2006 William Hart This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ #ifndef LPRELS_H #define LPRELS_H #include "block_lanczos.h" #include "common.h" #include "mp_poly.h" #include "mp_linear_algebra.h" #define MPQS_STRING_LENGTH (4 * 1024UL) typedef struct { long q; char Y[MPQS_STRING_LENGTH]; char E[MPQS_STRING_LENGTH]; } mpqs_lp_entry; char * get_filename(char *dir, char *s); int mpqs_relations_cmp(const void *a, const void *b); void flint_fputs(char *s, FILE *file); long sort_lp_file(char *filename); long append_file(FILE *fp, FILE *fp1); long mpqs_mergesort_lp_file_internal(FILE *LPREL, FILE *LPNEW, FILE *COMB, FILE *TMP); long mergesort_lp_file(char *REL_str, char *NEW_str, char *TMP_str, FILE *COMB); void add_factor(char **last, unsigned long ei, unsigned long pi); void add_0(char **last); void set_exponents(unsigned long *ei, char *r); void set_lp_entry(mpqs_lp_entry *e, char *buf); unsigned long combine_large_primes(QS_t * qs_inf, linalg_t * la_inf, poly_t * poly_inf, FILE * COMB, mpz_t factor); FILE * flint_fopen(char * name, char * mode); char * unique_filename(char *s); #endif flint-1.011/QS/factor_base.c0000644017361200017500000001766011025357253015473 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** factor_base.c Routines for generating and maintaining the factor base primes including the multiplier (C) 2006 William Hart ******************************************************************************/ #include #include #include #include "../flint.h" #include "../memory-manager.h" #include "../long_extras.h" #include "factor_base.h" /*========================================================================= num_FB_primes: Function: retrieve the number of factor base primes to use from table ==========================================================================*/ unsigned long num_FB_primes(unsigned long bits) { unsigned long i; for (i = 0; i < PTABSIZE_SMALL; i++) { if (prime_tab_small[i][0] > bits) break; } return prime_tab_small[i-1][1]; } /*========================================================================= sqrts_init: Function: allocate space for the factorbase primes and associated info ==========================================================================*/ void sqrts_init(QS_t * qs_inf) { qs_inf->sqrts = (uint32_t *) flint_stack_alloc_bytes(sizeof(uint32_t)*qs_inf->num_primes); } void sqrts_clear(void) { flint_stack_release(); } /*========================================================================= primes_init: Function: allocate space for the factorbase primes and associated info ==========================================================================*/ void primes_init(QS_t * qs_inf) { unsigned long bits = qs_inf->bits; // set bits to the number of bits of kn qs_inf->num_primes = num_FB_primes(bits); qs_inf->factor_base = (prime_t *) flint_stack_alloc_bytes(qs_inf->num_primes*sizeof(prime_t)); } void primes_clear(void) { flint_stack_release(); } /*=========================================================================== Compute Prime Sizes: Function: Computes the size in bits of each prime in the factor base ===========================================================================*/ void compute_sizes(QS_t * qs_inf) { unsigned long num_primes = qs_inf->num_primes; qs_inf->sizes = (unsigned char *) flint_stack_alloc_bytes(num_primes); unsigned char * sizes = qs_inf->sizes; prime_t * factor_base = qs_inf->factor_base; for (unsigned long i = 0; i < num_primes; i++) { sizes[i] = (unsigned char) round(log(factor_base[i].p)/log(2.0)); } return; } void sizes_clear(void) { flint_stack_release(); } /*========================================================================= Knuth-Schroeppel algorithm: Function: Find the best multiplier to use (allows 2 as a multiplier). The general idea is to find a multiplier k such that kn will be faster to factor. This is achieved by making kn a square modulo lots of small primes. These primes will then be factor base primes, and the more small factor base primes, the faster relations will accumulate, since they hit the sieving interval more often. Also computes approximate inverses and modular square roots primes that are suitable as factor base primes ==========================================================================*/ unsigned long knuth_schroeppel(QS_t * qs_inf) { float best_factor = -10.0f; unsigned long multiplier = 1; unsigned long nmod8, mod8, multindex, prime, nmod, mult; const unsigned long max_fb_primes = qs_inf->num_primes; unsigned long fb_prime = 2; // leave space for the multiplier and 2 float factors[NUMMULTS]; float logpdivp; double pinv; int kron; uint32_t * sqrts = qs_inf->sqrts; fmpz_t n = qs_inf->n; nmod8 = n[1]%8; mpz_t r; for (multindex = 0; multindex < NUMMULTS; multindex++) { mod8 = ((nmod8*multipliers[multindex])%8); factors[multindex] = 0.34657359; // ln2/2 if (mod8 == 1) factors[multindex] *= 4.0; if (mod8 == 5) factors[multindex] *= 2.0; factors[multindex] -= (log((float) multipliers[multindex]) / 2.0); } prime = 3; while ((prime < KSMAX) && (fb_prime < max_fb_primes)) { pinv = z_precompute_inverse(prime); logpdivp = log((float)prime) / (float)prime; // log p / p nmod = z_ll_mod_precomp(n[2], n[1], prime, pinv); if (nmod == 0) return prime; kron = z_jacobi_precomp(nmod, prime, pinv); for (multindex = 0; multindex < NUMMULTS; multindex++) { mult = multipliers[multindex]; if (mult >= prime) { if (mult >= prime*prime) mult = mult%prime; else mult = z_mod_precomp(mult, prime, pinv); } if (mult == 0) factors[multindex] += logpdivp; else if (kron*z_jacobi_precomp(mult, prime, pinv) == 1) factors[multindex] += 2.0*logpdivp; } prime = z_nextprime(prime); } for (multindex=0; multindex best_factor) { best_factor = factors[multindex]; multiplier = multipliers[multindex]; } } qs_inf->k = multiplier; return 0; } /*========================================================================= Compute Factor Base: Function: Compute all the primes p for which n is a quadratic residue mod p. Compute square roots of n modulo each p. ==========================================================================*/ unsigned long compute_factor_base(QS_t * qs_inf) { unsigned long fb_prime = 2; unsigned long multiplier = qs_inf->k; prime_t * factor_base = qs_inf->factor_base; uint32_t * sqrts = qs_inf->sqrts; unsigned long num_primes = num_FB_primes(qs_inf->bits); unsigned long prime, nmod; double pinv; fmpz_t n = qs_inf->n; long kron; factor_base[0].p = multiplier; factor_base[0].pinv = z_precompute_inverse(multiplier); factor_base[1].p = 2; prime = 2; while (fb_prime < num_primes) { prime = z_nextprime(prime); pinv = z_precompute_inverse(prime); nmod = z_ll_mod_precomp(n[2], n[1], prime, pinv); if (nmod == 0) { if (z_mod_precomp(multiplier, prime, pinv) != 0) return prime; } kron = z_jacobi_precomp(nmod, prime, pinv); if (kron == 1) { factor_base[fb_prime].p = prime; factor_base[fb_prime].pinv = pinv; sqrts[fb_prime] = z_sqrtmod(nmod, prime); fb_prime++; } } qs_inf->num_primes = fb_prime; return 0; } flint-1.011/QS/block_lanczos.h0000644017361200017500000000744011025357253016046 0ustar tabbotttabbott/*============================================================================ Copyright 2006 William Hart This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ============================================================================*/ #ifndef LANCZOS_H #define LANCZOS_H #define DISPLAY 0 // Display some info about the linear algebra phase #include #include #include "../memory-manager.h" typedef struct { unsigned long *data; /* The list of occupied rows in this column */ unsigned long weight; /* Number of nonzero entries in this column */ unsigned long orig; /* Original relation number */ } la_col_t; uint64_t get_null_entry(uint64_t *, long, long); void reduce_matrix(unsigned long *, unsigned long *, la_col_t *); uint64_t * block_lanczos(unsigned long, unsigned long, unsigned long, la_col_t*); /*========================================================================== insert_col_entry: Function: insert an entry into a column of the matrix, reallocating the space for the column if necessary ===========================================================================*/ static inline void insert_col_entry(la_col_t* col, unsigned long entry) { unsigned long* temp; if (((col->weight >> 4) << 4) == col->weight) //need more space { if (col->weight != 0) col->data = (unsigned long *) flint_heap_realloc(col->data, col->weight+16); else col->data = (unsigned long*) flint_heap_alloc(16); } col->data[col->weight] = entry; col->weight++; } /*========================================================================== copy_col: Function: clear a column ===========================================================================*/ static inline void copy_col(la_col_t* col2, la_col_t* col1) { col2->weight = col1->weight; col2->data = col1->data; col2->orig = col1->orig; } /*========================================================================== swap_cols: Function: swap two columns ===========================================================================*/ static inline void swap_cols(la_col_t* col2, la_col_t* col1) { la_col_t temp; temp.weight = col1->weight; temp.data = col1->data; temp.orig = col1->orig; col1->weight = col2->weight; col1->data = col2->data; col1->orig = col2->orig; col2->weight = temp.weight; col2->data = temp.data; col2->orig = temp.orig; } /*========================================================================== clear_col: Function: clear a column ===========================================================================*/ static inline void clear_col(la_col_t* col) { col->weight = 0; } /*========================================================================== free_col: Function: free the memory used by a column ===========================================================================*/ static inline void free_col(la_col_t* col) { if (col->weight) flint_heap_free(col->data); } #endif flint-1.011/QS/tinyQS.h0000644017361200017500000000300311025357253014441 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** tinyQS.h Header file for tinyQS.c. (C) 2006 William Hart ******************************************************************************/ #ifndef TINYQS_H #define TINYQS_H #include #include "common.h" #define QS_INFO 0 // Print some info about what is being factored, etc #define MAXBITS 128 // Largest bits including multiplier that can be factored #define TEST 0 #define PRINT_FACTORS 1 typedef struct F_mpz_fact_s { mpz_t * fact; unsigned long num; } F_mpz_factor_t; #endif flint-1.011/QS/mp_lprels.c0000644017361200017500000005100111025357253015203 0ustar tabbotttabbott/*============================================================================ Copyright 2006 William Hart This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /* This code has been adapted for FLINT from mpqs.c in the Pari/GP package. See http://pari.math.u-bordeaux.fr/ */ #include #include #include #include #include #include "mp_lprels.h" #define min_bufspace 120UL /* use new buffer when < min_bufspace left */ #define buflist_size 4096UL /* size of list-of-buffers blocks */ #define sort_table_size 100000UL /********************************************************************* File based large prime routines *********************************************************************/ /* Concatenates a filename and directory name to give a full path */ char * get_filename(char *dir, char *s) { char *buf = (char *) malloc(strlen(dir) + strlen(s) + 2); #if defined(__EMX__) || defined(WINCE) sprintf(buf, "%s\\%s", dir,s); #else sprintf(buf, "%s/%s", dir,s); #endif return buf; } char * unique_filename(char *s) { char *buf, suf[64]; size_t lsuf; sprintf(suf,".%ld.%ld", (long)getuid(), (long)getpid()); lsuf = strlen(suf); /* room for s + suffix '\0' */ buf = (char*) malloc(8 + lsuf + 1); sprintf(buf, "%.8s%s", s, suf); return buf; } FILE * flint_fopen(char * name, char * mode) { #if defined(WINCE) || defined(macintosh) char * tmp_dir = NULL; #else char * tmp_dir = getenv("TMPDIR"); #endif if (tmp_dir == NULL) tmp_dir = "./"; FILE * temp_file = fopen(get_filename(tmp_dir,unique_filename(name)),mode); if (!temp_file) { printf("Unable to open temporary file\n"); abort(); } return temp_file; } /* Compares two large prime relations according to their first element (the large prime). Used by qsort. */ int lp_relations_cmp(const void *a, const void *b) { char **sa = (char**) a; char **sb = (char**) b; long qa = strtol(*sa, NULL, 10); long qb = strtol(*sb, NULL, 10); if (qa < qb) return -1; else if (qa > qb) return 1; else return strcmp(*sa, *sb); } /* Writes the given string to the given file and aborts upon error */ void flint_fputs(char *s, FILE *file) { if (fputs(s, file) < 0) { printf("Error whilst writing to large prime file!"); abort(); } } /* Given a file "filename" containing full or large prime relations, rearrange the file so that relations are sorted by their first elements. Works in memory, discards duplicate lines, and overwrites the original file. Returns the number of relations after sorting and discarding. */ long sort_lp_file(char *filename) { FILE *TMP; char *old_s, *buf, *cur_line; char **s_table, **sort_table, **buflist, **buflist_head; long i, j, count; size_t length, bufspace; buflist_head = (char**) malloc(buflist_size * sizeof(char*)); buflist = buflist_head; *buflist++ = NULL; /* flag this as last and only buflist block */ TMP = flint_fopen(filename, "r"); /* allocate first buffer and read first line, if any, into it */ buf = (char*) malloc(MPQS_STRING_LENGTH * sizeof(char)); cur_line = buf; bufspace = MPQS_STRING_LENGTH; if (fgets(cur_line, bufspace, TMP) == NULL) { /* file empty */ free(buf); free(buflist_head); fclose(TMP); return 0; } /* enter first buffer into buflist */ *buflist++ = buf; /* can't overflow the buflist block */ length = strlen(cur_line) + 1; /* count the \0 byte as well */ bufspace -= length; s_table = (char**) malloc(sort_table_size*sizeof(char*)); sort_table = s_table+sort_table_size; /* at start of loop, one line from the file is sitting in cur_line inside buf, the next will go into cur_line + length, and there's room for bufspace further characters in buf. The loop reads another line if one exists, and if this overruns the current buffer, it allocates a fresh one --GN */ for (i = 0, sort_table--; /* until end of file */; i++, sort_table--) { *sort_table = cur_line; cur_line += length; /* if little room is left, allocate a fresh buffer before attempting to * read a line, and remember to free it if no further line is forthcoming. * This avoids some copying of partial lines --GN */ if (bufspace < min_bufspace) { buf = (char*) malloc(MPQS_STRING_LENGTH * sizeof(char)); cur_line = buf; bufspace = MPQS_STRING_LENGTH; if (fgets(cur_line, bufspace, TMP) == NULL) { free(buf); break; } if (buflist - buflist_head >= buflist_size) abort(); /* remember buffer for later deallocation */ *buflist++ = buf; length = strlen(cur_line) + 1; bufspace -= length; continue; } /* normal case: try fitting another line into the current buffer */ if (fgets(cur_line, bufspace, TMP) == NULL) break; /* none exists */ length = strlen(cur_line) + 1; bufspace -= length; /* check whether we got the entire line or only part of it */ if (bufspace == 0 && cur_line[length-2] != '\n') { size_t lg1; buf = (char*) malloc(MPQS_STRING_LENGTH * sizeof(char)); if (buflist - buflist_head >= buflist_size) abort(); /* remember buffer for later deallocation */ *buflist++ = buf; /* copy what we've got to the new buffer */ (void)strcpy(buf, cur_line); /* cannot overflow */ cur_line = buf + length - 1; /* point at the \0 byte */ bufspace = MPQS_STRING_LENGTH - length + 1; /* read remainder of line */ if (fgets(cur_line, bufspace, TMP) == NULL) { printf("MPQS: relations file truncated?!\n"); abort(); } lg1 = strlen(cur_line); length += lg1; /* we already counted the \0 once */ bufspace -= (lg1 + 1); /* but here we must take it into account */ cur_line = buf; /* back up to the beginning of the line */ } } /* for */ fclose(TMP); /* sort the whole lot in place by swapping pointers */ qsort(sort_table, i, sizeof(char*), lp_relations_cmp); /* copy results back to the original file, skipping exact duplicates */ TMP = flint_fopen(filename, "w"); old_s = sort_table[0]; flint_fputs(sort_table[0], TMP); count = 1; for(j = 1; j < i; j++) { if (strcmp(old_s, sort_table[j])) { flint_fputs(sort_table[j], TMP); count++; } old_s = sort_table[j]; } fflush(TMP); fclose(TMP); /* deallocate buffers */ while (*--buflist) { if (buflist != buflist_head) free(*buflist); /* free a buffer */ } free(buflist_head); free(s_table); return count; } /* Appends contents of file fp1 to fp (auxiliary routine for merge sort) and returns number of lines copied. Closes fp afterwards. */ long append_file(FILE *fp, FILE *fp1) { char line[MPQS_STRING_LENGTH]; long c = 0; while (fgets(line, MPQS_STRING_LENGTH, fp1)) { flint_fputs(line, fp); c++; } if (fflush(fp)) { printf("Error while flushing file.\n"); abort(); } fclose(fp); return c; } /* Merge-sort on the files LPREL and LPNEW; assumes that LPREL and LPNEW are already sorted. Creates/truncates the TMP file, writes result to it and closes it (via append_file()). Instead of LPREL, LPNEW we may also call this with FREL, FNEW. In the latter case COMB should be NULL (and we return the count of all full relations), in the former case it should be non-NULL (and we return the count of frels we expect to be able to combine out of the present lprels). If COMB is non-NULL, the combinable lprels are written out to this separate file. We retain only one occurrence of each large prime in TMP (i.e. in the future LPREL file). --GN */ #define swap_lines() { char *line_tmp;\ line_tmp = line_new_old; \ line_new_old = line_new; \ line_new = line_tmp; } long mergesort_lp_file_internal(FILE *LPREL, FILE *LPNEW, FILE *COMB, FILE *TMP) { char line1[MPQS_STRING_LENGTH], line2[MPQS_STRING_LENGTH]; char line[MPQS_STRING_LENGTH]; char *line_new = line1, *line_new_old = line2; long q_new, q_new_old = -1, q, i = 0, c = 0; long comb_in_progress; if ( !fgets(line_new, MPQS_STRING_LENGTH, LPNEW) ) { /* LPNEW is empty: copy LPREL to TMP. Could be done by a rename if we didn't want to count the lines (again)... however, this case will not normally happen */ i = append_file(TMP, LPREL); return COMB ? 0 : i; } /* we now have a line_new from LPNEW */ if (!fgets(line, MPQS_STRING_LENGTH, LPREL)) { /* LPREL is empty: copy LPNEW to TMP... almost. */ flint_fputs(line_new, TMP); if (!COMB) { /* full relations mode */ i = append_file(TMP, LPNEW); return i + 1; } /* LP mode: check for combinable relations */ q_new_old = atol(line_new); /* we need to retain a copy of the old line just for a moment, because we may yet have to write it to COMB. Do this by swapping the two buffers */ swap_lines(); comb_in_progress = 0; i = 0; while (fgets(line_new, MPQS_STRING_LENGTH, LPNEW)) { q_new = atol(line_new); if (q_new_old == q_new) { /* found combinables, check whether we're already busy on this particular large prime */ if (!comb_in_progress) { /* if not, write first line to COMB, creating and opening the file first if it isn't open yet */ flint_fputs(line_new_old, COMB); comb_in_progress = 1; } /* in any case, write the current line, and count it */ flint_fputs(line_new, COMB); i++; } else { /* not combinable */ q_new_old = q_new; comb_in_progress = 0; /* and dump it to the TMP file */ flint_fputs(line_new, TMP); /* and stash it away for a moment */ swap_lines(); comb_in_progress = 0; } } /* while */ fclose(TMP); return i; } /* normal case: both LPNEW and LPREL are not empty */ q_new = atol(line_new); q = atol(line); for(;;) { /* main merging loop */ i = comb_in_progress = 0; /* first the harder case: let LPNEW catch up with LPREL, and possibly overtake it, checking for combinables coming from LPNEW alone */ while (q > q_new) { if (!COMB || !comb_in_progress) flint_fputs(line_new, TMP); if (!COMB) c++; /* in FREL mode, count lines written */ else if (!comb_in_progress) { q_new_old = q_new; swap_lines(); } if (!fgets(line_new, MPQS_STRING_LENGTH, LPNEW)) { flint_fputs(line, TMP); if (!COMB) c++; else c += i; i = append_file(TMP, LPREL); return COMB ? c : c + i; } q_new = atol(line_new); if (!COMB) continue; /* LP mode only: */ if (q_new_old != q_new) /* not combinable */ comb_in_progress = 0; /* next loop will deal with it, or loop may end */ else { /* found combinables, check whether we're already busy on this large prime */ if (!comb_in_progress) { flint_fputs(line_new_old, COMB); comb_in_progress = 1; } /* in any case, write the current line, and count it */ flint_fputs(line_new, COMB); i++; } } /* while q > q_new */ /* q <= q_new */ if (COMB) c += i; /* accumulate count of combinables */ i = 0; /* and clear it */ comb_in_progress = 0;/* redundant */ /* now let LPREL catch up with LPNEW, and possibly overtake it */ while (q < q_new) { flint_fputs(line, TMP); if (!COMB) c++; if (!fgets(line, MPQS_STRING_LENGTH, LPREL)) { flint_fputs(line_new, TMP); i = append_file(TMP, LPNEW); return COMB ? c : c + i + 1; } else q = atol(line); } /* q >= q_new */ /* Finally, it may happen that q == q_new, indicating combinables whose large prime is already in LPREL, and appears now one or more times in LPNEW. Thus in this sub-loop we advance LPNEW. The `line' from LPREL is left alone, and will be written to TMP the next time around the main for loop; we only write it to COMB here -- unless all we find is an exact duplicate of the line we already have, that is. (There can be at most one such, and if so it is simply discarded.) */ while (q == q_new) { if (!strcmp(line_new, line)) { /* duplicate -- move right ahead to the next LPNEW line */ ;/* do nothing here */ } else if (!COMB) { /* full relations mode: write line_new out first, keep line */ flint_fputs(line_new, TMP); c++; } else { /* LP mode, and combinable relation */ if (!comb_in_progress) { flint_fputs(line, COMB); comb_in_progress = 1; } flint_fputs(line_new, COMB); i++; } /* NB comb_in_progress is cleared by q_new becoming bigger than q, thus the current while loop terminating, the next time through the main for loop */ /* common ending: get another line_new, if any */ if (!fgets(line_new, MPQS_STRING_LENGTH, LPNEW)) { flint_fputs(line, TMP); if (!COMB) c++; else c += i; i = append_file(TMP, LPREL); return COMB ? c : c + i; } else q_new = atol(line_new); } /* while */ if (COMB) c += i; /* accumulate count of combinables */ } } /* Perform mergesort of large prime files */ long mergesort_lp_file(char *REL_str, char *NEW_str, char *TMP_str, FILE *COMB) { FILE *NEW = flint_fopen(NEW_str, "r"); #if defined(WINCE) || defined(macintosh) char * tmp_dir = NULL; #else char * tmp_dir = getenv("TMPDIR"); #endif if (tmp_dir == NULL) tmp_dir = "./"; char * TMP_name = get_filename(tmp_dir,unique_filename(TMP_str)); char * REL_name = get_filename(tmp_dir,unique_filename(REL_str)); FILE * TMP = fopen(TMP_name,"w"); FILE * REL = fopen(REL_name,"r"); if ((!TMP) || (!REL)) { printf("Unable to open temporary file\n"); abort(); } long tp = mergesort_lp_file_internal(REL, NEW, COMB, TMP); fclose(REL); fclose(NEW); if (rename(TMP_name,REL_name)) { printf("Cannot rename file %s to %s", TMP_str, REL_str); abort(); } return tp; } /********************************************************************* Routines for writing relations as strings *********************************************************************/ /* Writes a factor pi^ei into a string as " ei pi" */ void add_factor(char **last, unsigned long ei, unsigned long pi) { sprintf(*last, " %ld %ld", ei, pi); *last += strlen(*last); } /* Concatenate " 0" to string */ void add_0(char **last) { char *s = *last; *s++ = ' '; *s++ = '0'; *s++ = 0; *last = s; } /********************************************************************* Large prime relation combining *********************************************************************/ /* Add to an array of unsigned longs the exponents from a relation string */ void set_exponents(unsigned long *ei, char *r) { char *s, b[MPQS_STRING_LENGTH]; long e; strcpy(b, r); s = strtok(b, " \n"); while (s != NULL) { e = atol(s); if (!e) break; s = strtok(NULL, " \n"); ei[atol(s)] += e; s = strtok(NULL, " \n"); } } /* Writes an lp_entry from a string */ void set_lp_entry(mpqs_lp_entry *e, char *buf) { char *s1, *s2; s1 = buf; s2 = strchr(s1, ' '); *s2 = '\0'; e->q = atol(s1); s1 = s2 + 3; s2 = strchr(s1, ' '); *s2 = '\0'; strcpy(e->Y, s1); s1 = s2 + 3; s2 = strchr(s1, '\n'); *s2 = '\0'; strcpy(e->E, s1); } /* Combines the large prime relations in COMB to full relations and inserts them as relations into the matrix, etc. */ unsigned long combine_large_primes(QS_t * qs_inf, linalg_t * la_inf, poly_t * poly_inf, FILE *COMB, mpz_t factor) { char new_relation[MPQS_STRING_LENGTH], buf[MPQS_STRING_LENGTH]; mpqs_lp_entry e[2]; /* we'll use the two alternatingly */ unsigned long *ei; long ei_size = qs_inf->num_primes; mpz_t * N = &qs_inf->mpz_n; long old_q; mpz_t inv_q, Y1, Y2, new_Y, new_Y1; mpz_init(inv_q); mpz_init(Y1); mpz_init(Y2); mpz_init(new_Y); mpz_init(new_Y1); long i, l, c = 0; unsigned long newrels = 0; if (!fgets(buf, MPQS_STRING_LENGTH, COMB)) return 0; /* should not happen */ ei = (unsigned long *) malloc(sizeof(unsigned long)*ei_size); /* put first lp relation in row 0 of e */ set_lp_entry(&e[0], buf); i = 1; /* second relation will go into row 1 */ old_q = e[0].q; mpz_set_ui(inv_q, old_q); while (!mpz_invert(inv_q, inv_q, *N)) /* can happen */ { /* We have found a factor. It could be N when N is quite small; or we might just have found a divisor by sheer luck. */ mpz_gcd_ui(inv_q, *N, old_q); if (!mpz_cmp(inv_q, *N)) /* pity */ { if (!fgets(buf, MPQS_STRING_LENGTH, COMB)) { return 0; } set_lp_entry(&e[0], buf); old_q = e[0].q; mpz_set_ui(inv_q, old_q); continue; } mpz_set(factor, inv_q); free(ei); return c; } gmp_sscanf(e[0].Y, "%Zd", Y1); while (fgets(buf, MPQS_STRING_LENGTH, COMB)) { set_lp_entry(&e[i], buf); if (e[i].q != old_q) { /* switch to combining a new bunch, swapping the rows */ old_q = e[i].q; mpz_set_ui(inv_q, old_q); while (!mpz_invert(inv_q, inv_q, *N)) /* can happen */ { mpz_gcd_ui(inv_q, *N, old_q); if (!mpz_cmp(inv_q, *N)) /* pity */ { old_q = -1; /* sentinel */ continue; /* discard this combination */ } mpz_set(factor, inv_q); free(ei); return c; } gmp_sscanf(e[i].Y, "%Zd", Y1); i = 1 - i; /* subsequent relations go to other row */ continue; } /* count and combine the two we've got, and continue in the same row */ memset((void *)ei, 0, ei_size * sizeof(long)); set_exponents(ei, e[0].E); set_exponents(ei, e[1].E); gmp_sscanf(e[i].Y, "%Zd", Y2); if (mpz_cmpabs(Y1,Y2)!=0) { unsigned long * small = la_inf->small; fac_t * factor = la_inf->factor; unsigned long num_factors = 0; unsigned long small_primes = qs_inf->small_primes; unsigned long num_primes = qs_inf->num_primes; c++; mpz_mul(new_Y, Y1, Y2); mpz_mul(new_Y, new_Y, inv_q); mpz_mod(new_Y, new_Y, *N); mpz_sub(new_Y1, *N, new_Y); if (mpz_cmpabs(new_Y1, new_Y) < 0) mpz_set(new_Y, new_Y1); for (l = 0; l < small_primes; l++) { small[l] = ei[l]; } for (l = small_primes; l < num_primes; l++) { if (ei[l]) { factor[num_factors].ind = l; factor[num_factors].exp = ei[l]; num_factors++; } } la_inf->num_factors = num_factors; newrels += insert_relation(qs_inf, la_inf, poly_inf, new_Y); } } /* while */ free(ei); mpz_clear(inv_q); mpz_clear(Y1); mpz_clear(Y2); mpz_clear(new_Y); mpz_clear(new_Y1); return newrels; } flint-1.011/QS/poly.h0000644017361200017500000000417611025357253014211 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** poly.h Header file for poly.c. (C) 2006 William Hart ******************************************************************************/ #ifndef POLY_H #define POLY_H #include #include "common.h" #define POLY_PARAMS 0 // Print the parameters being used to choose polynomials #define POLY_A 0 // Print target A and actual A typedef struct poly_s { unsigned long s; unsigned long fact, span, min; unsigned long target_A; unsigned long A; unsigned long B; mpz_t C; unsigned long * A_ind; unsigned long * A_modp; unsigned long * A_inv; unsigned long * soln1; unsigned long * soln2; unsigned long ** A_inv2B; double * inv_p2; unsigned long * B_terms; } poly_t; void poly_init(QS_t * qs_inf, poly_t * poly_inf, mpz_t N); void poly_clear(poly_t * poly_inf); void compute_A(QS_t * qs_inf, poly_t * poly_inf); void compute_B_terms(QS_t * qs_inf, poly_t * poly_inf); void compute_C(QS_t * qs_inf, poly_t * poly_inf); void compute_off_adj(QS_t * qs_inf, poly_t * poly_inf); void compute_A_factor_offsets(QS_t * qs_inf, poly_t * poly_inf); #endif flint-1.011/QS/common.h0000644017361200017500000000662211025357253014514 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** common.h Common header file for tinyQS (C) 2006 William Hart ******************************************************************************/ #ifndef QS_COMMON_H #define QS_COMMON_H #include #include "../fmpz.h" // For each bitsize, this table stores, in order: // bitsize, num_primes, sieve_size, small_primes and large_prime/factor_base[num_primes-1] static const unsigned long prime_tab[][5] = { {32, 30, 2500, 4, 1}, {40, 50, 3000, 4, 1}, {50, 80, 3500, 5, 1}, {60, 100, 4000, 5, 1}, {70, 300, 6000, 6, 1}, {80, 400, 8000, 6, 1}, {90, 500, 10000, 7, 1}, {100, 650, 13000, 7, 1}, {110, 800, 15000, 7, 1}, // 31 digits {120, 1000, 20000, 7, 2}, {130, 800, 32000, 9, 1}, // 41 digits {140, 1200, 28000, 8, 10}, {150, 1800, 32000, 8, 10}, {160, 2000, 40000, 8, 30}, {170, 2200, 64000, 9, 1}, // 50 digits {180, 2400, 64000, 9, 35}, {190, 2700, 64000, 10, 40}, {200, 3600, 64000, 10, 60}, // 60 digits 5200 {210, 6000, 64000, 12, 60}, {220, 11000, 64000, 15, 70}, {230, 8500, 64000, 17, 80}, // 70 digits {240, 24000, 64000, 19, 80}, {250, 24000, 64000, 19, 80}, {260, 55000, 128000, 25, 100}, {270, 55000, 128000, 27, 100} }; #define PTABSIZE (sizeof(prime_tab)/(5*sizeof(unsigned long))) #define SIEVE_BLOCK 64000 #define SECOND_PRIME 3000 // 3000 6400 #define EXTRA_RELS 64L // number of additional relations to find above the number of primes #define MAX_FACS 60 // Maximum number of different prime factors // a relation can have 25, 30 #define SMALL_PRIMES 8 // Used by tinyQS Todo: make this a variable #define SIEVE_SIZE 64000 // Used by tinyQS Todo: make this a variable typedef struct prime_s { uint32_t p; // prime double pinv; // precomputed inverse } prime_t; typedef struct QS_s { fmpz_t n; // Number to factor = kn when multiplier is found mpz_t mpz_n; unsigned long k; // Multiplier unsigned long bits; unsigned long prec; // Number of limbs required to hold B unsigned long num_primes; unsigned long sieve_size; unsigned long error_bits; unsigned long sieve_fill; unsigned long small_primes; unsigned long large_prime; prime_t * factor_base; uint32_t * sqrts; unsigned char * sizes; unsigned long * prime_count; } QS_t; #endif flint-1.011/QS/linear_algebra.c0000644017361200017500000002241611025357253016145 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** linear_algebra.c Routines for dealing with building and handling the final F_2 matrix (C) 2006 William Hart ******************************************************************************/ #include #include #include #include "../flint.h" #include "../memory-manager.h" #include "common.h" #include "poly.h" #include "linear_algebra.h" #include "block_lanczos.h" /*========================================================================= linear_algebra_init: Function: Allocate space for the various linear algebra structures ==========================================================================*/ void linear_algebra_init(linalg_t * la_inf, QS_t * qs_inf, poly_t * poly_inf) { la_col_t * matrix; mpz_t * Y_arr; const unsigned long buffer_size = 3*(qs_inf->num_primes + EXTRA_RELS + 100)/2; // Allows for 1/3 of relations to be duplicates la_inf->small = (unsigned long *) flint_stack_alloc(SMALL_PRIMES); la_inf->factor = (fac_t *) flint_stack_alloc_bytes(sizeof(fac_t)*MAX_FACS); matrix = la_inf->matrix = (la_col_t *) flint_stack_alloc_bytes(sizeof(la_col_t)*(qs_inf->num_primes + EXTRA_RELS + 200)); la_inf->unmerged = la_inf->matrix + qs_inf->num_primes + EXTRA_RELS + 100; Y_arr = la_inf->Y_arr = (mpz_t *) flint_stack_alloc_bytes(sizeof(mpz_t)*buffer_size); la_inf->curr_rel = la_inf->relation = (unsigned long *) flint_stack_alloc(buffer_size*MAX_FACS*2); la_inf->qsort_arr = (la_col_t **) flint_stack_alloc(100); for (unsigned long i = 0; i < buffer_size; i++) { mpz_init2(Y_arr[i], 128); } for (unsigned long i = 0; i < qs_inf->num_primes + EXTRA_RELS + 200; i++) { matrix[i].weight = 0; } la_inf->num_unmerged = 0; la_inf->columns = 0; la_inf->num_relations = 0; } void linear_algebra_clear(linalg_t * la_inf, QS_t * qs_inf) { la_col_t * matrix = la_inf->matrix; la_col_t * unmerged = la_inf->unmerged; mpz_t * Y_arr = la_inf->Y_arr; const unsigned long buffer_size = 3*(qs_inf->num_primes + EXTRA_RELS + 100)/2; for (unsigned long i = 0; i < buffer_size; i++) { mpz_clear(Y_arr[i]); } for (unsigned long i = 0; i < la_inf->columns; i++) // Clear all used columns { free_col(matrix + i); } for (unsigned long i = 0; i < la_inf->num_unmerged; i++) // Clear all used columns { free_col(unmerged + i); } flint_stack_release(); // Clear qsort_array flint_stack_release(); // Clear relation flint_stack_release(); // Clear Y_arr flint_stack_release(); // Clear matrix and unmerged flint_stack_release(); // Clear factor flint_stack_release(); // Clear small } /*========================================================================= Compare relations: Function: Compare two relations; used by qsort ==========================================================================*/ int relations_cmp(const void *a, const void *b) { la_col_t * ra = *((la_col_t **) a); la_col_t * rb = *((la_col_t **) b); long point; if (ra->weight > rb->weight) return 1; else if (ra->weight < rb->weight) return -1; for (point = ra->weight-1; (ra->data[point] == rb->data[point]) && (point >= 0); point--) { ; } if (point == -1L) return 0; if (ra->data[point] > rb->data[point]) return 1; else if (ra->data[point] < rb->data[point]) return -1; } int relations_cmp2(const void *a, const void *b) { la_col_t * ra = (la_col_t *) a; la_col_t * rb = (la_col_t *) b; if (!ra->weight) printf("ra error\n"); long point; if (ra->weight > rb->weight) return 1; else if (ra->weight < rb->weight) return -1; for (point = ra->weight-1; (ra->data[point] == rb->data[point]) && (point >= 0); point--) { ; } if (point == -1L) return 0; if (ra->data[point] > rb->data[point]) return 1; else if (ra->data[point] < rb->data[point]) return -1; } /*========================================================================== Merge sort: Function: Merge a list of sorted new relations into a list of existing sorted relations. Sort is done using a merge sort algorithm with a short stack. ===========================================================================*/ unsigned long merge_sort(linalg_t * la_inf) { la_col_t * matrix = la_inf->matrix; long columns = la_inf->columns; la_col_t ** qsort_arr = la_inf->qsort_arr; long num_unmerged = la_inf->num_unmerged; long dups = 0; int comp; for (long i = columns + num_unmerged - 1L; i >= dups; i--) { if (!columns) comp = -1; else if (!num_unmerged) comp = 1; else { comp = relations_cmp2(matrix + columns - 1L, qsort_arr[num_unmerged - 1L]); } switch (comp) { case -1: { copy_col(matrix + i, qsort_arr[num_unmerged - 1L]); clear_col(qsort_arr[num_unmerged - 1L]); num_unmerged--; break; } case 1 : { copy_col(matrix + i, matrix + columns - 1L); columns--; break; } case 0 : { free_col(qsort_arr[num_unmerged - 1L]); clear_col(qsort_arr[num_unmerged - 1L]); num_unmerged--; copy_col(matrix + i, matrix + columns - 1L); columns--; dups++; break; } } } columns = la_inf->columns + la_inf->num_unmerged - dups; if (dups) { for (unsigned long i = 0; i < columns; i++) { copy_col(matrix + i, matrix + i + dups); } } la_inf->columns = columns; columns = la_inf->num_unmerged - dups; la_inf->num_unmerged = 0; #if DUPS printf("%ld new, %ld dups\n", columns, dups); #endif return columns; } /*========================================================================== Merge relations: Function: Merge unmerged relations into the matrix ===========================================================================*/ unsigned long merge_relations(linalg_t * la_inf) { const unsigned long num_unmerged = la_inf->num_unmerged; la_col_t * unmerged = la_inf->unmerged; la_col_t ** qsort_arr = la_inf->qsort_arr; if (num_unmerged) { for (unsigned long i = 0; i < num_unmerged; i++) { qsort_arr[i] = unmerged + i; } qsort(qsort_arr, num_unmerged, sizeof(la_col_t *), relations_cmp); return merge_sort(la_inf); } return 0; } /*========================================================================== Insert relation: Function: Insert the relation into the matrix and store the Y value ===========================================================================*/ unsigned long insert_relation(linalg_t * la_inf, poly_t * poly_inf, mpz_t Y) { la_col_t * unmerged = la_inf->unmerged; unsigned long num_unmerged = la_inf->num_unmerged; unsigned long * small = la_inf->small; const unsigned long num_factors = la_inf->num_factors; fac_t * factor = la_inf->factor; unsigned long * curr_rel = la_inf->curr_rel; unsigned long fac_num = 0; clear_col(unmerged + num_unmerged); for (unsigned long i = 0; i < SMALL_PRIMES; i++) { if (small[i] & 1) insert_col_entry(unmerged + num_unmerged, i); if (small[i]) { curr_rel[2*fac_num + 1] = i; curr_rel[2*fac_num + 2] = small[i]; fac_num++; } } for (unsigned long i = 0; i < num_factors; i++) { if (factor[i].exp & 1) insert_col_entry(unmerged + num_unmerged, factor[i].ind); curr_rel[2*fac_num + 1] = factor[i].ind; curr_rel[2*fac_num + 2] = factor[i].exp; fac_num++; } curr_rel[0] = fac_num; unmerged[num_unmerged].orig = la_inf->num_relations; mpz_set(la_inf->Y_arr[la_inf->num_relations], Y); la_inf->curr_rel += MAX_FACS*2; la_inf->num_unmerged++; la_inf->num_relations++; if (la_inf->num_unmerged == 100) { return merge_relations(la_inf); } return 0; } flint-1.011/QS/mp_sieve.h0000644017361200017500000000464111025357253015032 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** mp_sieve.h Header file for sieve.c. (C) 2006 William Hart ******************************************************************************/ #ifndef MPSIEVE_H #define MPSIEVE_H #include #include "mp_linear_algebra.h" #include "common.h" #define RELATIONS 0 // Print out relations as they are generated #define POLYS 0 // Print out polynomials and offsets in candidate evaluation void get_sieve_params(QS_t * qs_inf); void do_sieving(QS_t * qs_inf, poly_t * poly_inf, unsigned char * sieve, unsigned long first_prime, unsigned long second_prime, unsigned long M, int first, int last); void do_sieving2(QS_t * qs_inf, poly_t * poly_inf, unsigned char * sieve); void do_sieving3(QS_t * qs_inf, poly_t * poly_inf, unsigned char * sieve, unsigned long first_prime, unsigned long second_prime, unsigned long M); void update_offsets(unsigned long poly_add, uint32_t * poly_corr, QS_t * qs_inf, poly_t * poly_inf); unsigned long evaluate_sieve(linalg_t * la_inf, QS_t * qs_inf, poly_t * poly_inf, unsigned char * sieve); unsigned long evaluate_candidate(linalg_t * la_inf, QS_t * qs_inf, poly_t * poly_inf, unsigned long i, unsigned char * sieve); #endif flint-1.011/QS/mp_linear_algebra.h0000644017361200017500000000636311025357253016651 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** mp_linear_algebra.h Header file for mp_linear_algebra.c. (C) 2006 William Hart ******************************************************************************/ #ifndef MPLINALG_H #define MPLINALG_H #include #include #include "common.h" #include "mp_poly.h" #include "block_lanczos.h" #define DUPS 0 // Print info about number of duplicate relations #define TEST3 0 // Checks if X = Y^2 mod N immediately after storing each relation typedef struct fac_s { unsigned long ind; unsigned long exp; } fac_t; typedef struct linalg_s { unsigned long * small; // Exponents of small primes in currently evaluated candidate fac_t * factor; // An array of factors with exponents corresponding to the current candidate unsigned long num_factors; //The length of the factor array for the current candidate la_col_t * matrix; // The final sorted F_2 matrix plus possibly some empty columns at the start unsigned long columns; // The number of columns in the matrix so far la_col_t * unmerged; // A new list of unmerged F_2 columns unsigned long num_unmerged; // The current number of unmerged F_2 relations unsigned long num_lp_unmerged; // The current number of unmerged partial relations mpz_t * Y_arr; // The Y values corresponding to all relations found unsigned long * relation; // The list of all relations found unsigned long * curr_rel; // Pointer to where we have got up to in the list of relations found unsigned long num_relations; // Total number of relations found so far la_col_t ** qsort_arr; // An array of pointers to the unmerged relations for quicksort char * rel_str; FILE * lpnew; } linalg_t; void linear_algebra_init(linalg_t * la_inf, QS_t * qs_inf, poly_t * poly_inf); void linear_algebra_clear(linalg_t * la_inf, QS_t * qs_inf); unsigned long merge_sort(linalg_t * la_inf); unsigned long merge_lp_relations(QS_t * qs_inf, poly_t * poly_inf, linalg_t * la_inf); unsigned long merge_relations(linalg_t * la_inf); unsigned long insert_lp_relation(QS_t * qs_inf, linalg_t * la_inf, poly_t * poly_inf, mpz_t Y, mpz_t res); unsigned long insert_relation(QS_t * qs_inf, linalg_t * la_inf, poly_t * poly_inf, mpz_t Y); #endif flint-1.011/QS/sieve.c0000644017361200017500000002176511025357253014337 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** sieve.c Routines for doing and managing sieving (C) 2006 William Hart ******************************************************************************/ #include #include #include #include #include "../flint.h" #include "../long_extras.h" #include "common.h" #include "poly.h" #include "linear_algebra.h" void do_sieving(QS_t * qs_inf, poly_t * poly_inf, unsigned char * sieve) { unsigned long num_primes = qs_inf->num_primes; unsigned long * soln1 = poly_inf->soln1; unsigned long * soln2 = poly_inf->soln2; prime_t * factor_base = qs_inf->factor_base; unsigned long p, correction; register unsigned char * position; unsigned char * end = sieve + SIEVE_SIZE; unsigned char * sizes = qs_inf->sizes; register unsigned char * pos1; unsigned char * pos2; register unsigned char * bound; unsigned long size; long diff; memset(sieve, 0, SIEVE_SIZE); *end = 255; for (unsigned long prime = SMALL_PRIMES; prime < num_primes; prime++) { if (soln2[prime] == -1L) continue; p = factor_base[prime].p; size = sizes[prime]; pos1 = sieve + soln1[prime]; pos2 = sieve + soln2[prime]; diff = pos2 - pos1; bound = end - 2*p; while (bound - pos1 > 0) { (*pos1)+=size, (*(pos1+diff))+=size, pos1+=p; (*pos1)+=size, (*(pos1+diff))+=size, pos1+=p; } while ((end - pos1 > 0) && (end - pos1 - diff > 0)) { (*pos1)+=size, (*(pos1+diff))+=size, pos1+=p; } pos2 = pos1+diff; if (end - pos2 > 0) { (*pos2)+=size; } if (end - pos1 > 0) { (*pos1)+=size; } } } void update_offsets(unsigned long poly_add, unsigned long * poly_corr, QS_t * qs_inf, poly_t * poly_inf) { unsigned long num_primes = qs_inf->num_primes; unsigned long * soln1 = poly_inf->soln1; unsigned long * soln2 = poly_inf->soln2; prime_t * factor_base = qs_inf->factor_base; unsigned long p, correction; for (unsigned long prime = 2; prime < num_primes; prime++) { p = factor_base[prime].p; correction = (poly_add ? p - poly_corr[prime] : poly_corr[prime]); soln1[prime] += correction; if (soln1[prime] >= p) soln1[prime] -= p; if (soln2[prime] == -1L) continue; soln2[prime] += correction; if (soln2[prime] >= p) soln2[prime] -= p; } } /*========================================================================== evaluate_candidate: Function: determine whether a given sieve entry is a relation ===========================================================================*/ unsigned long evaluate_candidate(linalg_t * la_inf, QS_t * qs_inf, poly_t * poly_inf, unsigned long i, unsigned char * sieve) { unsigned long bits, exp, extra_bits, modp, prime; unsigned long num_primes = qs_inf->num_primes; prime_t * factor_base = qs_inf->factor_base; unsigned long * soln1 = poly_inf->soln1; unsigned long * soln2 = poly_inf->soln2; unsigned long * small = la_inf->small; fac_t * factor = la_inf->factor; unsigned long A = poly_inf->A; unsigned long B = poly_inf->B; unsigned long num_factors = 0; unsigned long j; mpz_t * C = &poly_inf->C; unsigned long relations = 0; double pinv; mpz_t X, Y, res, p; mpz_init(X); mpz_init(Y); mpz_init(res); mpz_init(p); #if POLYS printf("X = %ld\n", i); printf("%ldX^2+2*%ldX+%ld\n", A, B, C); #endif mpz_set_ui(X, i); mpz_sub_ui(X, X, SIEVE_SIZE/2); //X mpz_mul_ui(Y, X, A); if ((long) B < 0) { mpz_sub_ui(Y, Y, -B); // Y = AX+B mpz_sub_ui(res, Y, -B); } else { mpz_add_ui(Y, Y, B); mpz_add_ui(res, Y, B); } mpz_mul(res, res, X); mpz_add(res, res, *C); // res = AX^2+2BX+C bits = mpz_sizeinbase(res, 2); bits -= 10; extra_bits = 0; mpz_set_ui(p, 2); // divide out by powers of 2 exp = mpz_remove(res, res, p); #if RELATIONS if (exp) printf("2^%ld ", exp); #endif extra_bits += exp; small[1] = exp; if (factor_base[0].p != 1) // divide out powers of the multiplier { mpz_set_ui(p, factor_base[0].p); exp = mpz_remove(res, res, p); if (exp) extra_bits += exp*qs_inf->sizes[0]; small[0] = exp; #if RELATIONS if (exp) printf("%ld^%ld ", factor_base[0].p, exp); #endif } else small[0] = 0; for (unsigned long j = 2; j < SMALL_PRIMES; j++) // pull out small primes { prime = factor_base[j].p; pinv = factor_base[j].pinv; modp = z_mod_64_precomp(i, prime, pinv); if ((modp == soln1[j]) || (modp == soln2[j])) { mpz_set_ui(p, prime); exp = mpz_remove(res, res, p); if (exp) extra_bits += qs_inf->sizes[j]; small[j] = exp; #if RELATIONS if (exp) gmp_printf("%Zd^%ld ", p, exp); #endif } else small[j] = 0; } if (extra_bits + sieve[i] > bits) { sieve[i] += extra_bits; for (j = SMALL_PRIMES; (j < num_primes) && (extra_bits < sieve[i]); j++) // pull out remaining primes { prime = factor_base[j].p; pinv = factor_base[j].pinv; modp = z_mod_64_precomp(i, prime, pinv); if (soln2[j] != -1L) { if ((modp == soln1[j]) || (modp == soln2[j])) { mpz_set_ui(p, prime); exp = mpz_remove(res, res, p); #if RELATIONS if (exp) gmp_printf("%Zd^%ld ", p, exp); #endif if (exp) { extra_bits += qs_inf->sizes[j]; factor[num_factors].ind = j; factor[num_factors++].exp = exp; } } } else { mpz_set_ui(p, prime); exp = mpz_remove(res, res, p); factor[num_factors].ind = j; factor[num_factors++].exp = exp+1; #if RELATIONS if (exp) gmp_printf("%Zd^%ld ", p, exp); #endif } } if (mpz_cmpabs_ui(res, 1) == 0) // We've found a relation { unsigned long * A_ind = poly_inf->A_ind; for (unsigned long i = 0; i < poly_inf->s; i++) // Commit any outstanding A factors { if (A_ind[i] >= j) { factor[num_factors].ind = A_ind[i]; factor[num_factors++].exp = 1; } } la_inf->num_factors = num_factors; relations += insert_relation(la_inf, poly_inf, Y); // Insert the relation in the matrix if (la_inf->num_relations >= qs_inf->num_primes + EXTRA_RELS + 100) { printf("Error: too many duplicate relations!\n"); abort(); } goto cleanup; } } #if RELATIONS printf("\n"); #endif cleanup: mpz_clear(X); mpz_clear(Y); mpz_clear(res); mpz_clear(p); return relations; } /*========================================================================== evaluateSieve: Function: searches sieve for relations and sticks them into a matrix ===========================================================================*/ unsigned long evaluate_sieve(linalg_t * la_inf, QS_t * qs_inf, poly_t * poly_inf, unsigned char * sieve) { unsigned long i = 0; unsigned long j=0; unsigned long * sieve2 = (unsigned long *) sieve; unsigned long rels = 0; while (j < SIEVE_SIZE/sizeof(unsigned long)) { while (!(sieve2[j] & 0xE0E0E0E0E0E0E0E0U)) j++; i = j*sizeof(unsigned long); while ((i < (j+1)*sizeof(unsigned long)) && (i < SIEVE_SIZE)) { if (sieve[i] > 38) { rels += evaluate_candidate(la_inf, qs_inf, poly_inf, i, sieve); } i++; } j++; } return rels; } flint-1.011/QS/mpQS.h0000644017361200017500000000330311025357253014075 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** tinyQS.h Header file for mpQS.c. (C) 2006 William Hart ******************************************************************************/ #ifndef MPQS_H #define MPQS_H #include #include "mp_factor_base.h" #include "common.h" #define QS_INFO 1 // Print some info about what is being factored, etc #define MINBITS 64 // Smallest bits including multiplier that can be factored #define TEST 0 // Test that squares have come out of the linear algebra phase #define TEST2 0 // When the large prime variant is not used we can X^2 = Y^2 mod N #define CURVES 1 #define PRINT_FACTORS 1 typedef struct F_mpz_fact_s { mpz_t * fact; unsigned long num; } F_mpz_factor_t; #endif flint-1.011/QS/block_lanczos.c0000644017361200017500000006173711025357253016052 0ustar tabbotttabbott/*============================================================================ Copyright 2006 Jason Papadopoulos This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA =============================================================================== Optionally, please be nice and tell me if you find this source to be useful. Again optionally, if you add to the functionality present here please consider making those additions public too, so that others may benefit from your work. --jasonp@boo.net 9/8/06 The following modifications were made by William Hart: -addition of a random generator and max function -added the utility function get_null_entry -reformatted original code so it would operate as a standalone filter and block Lanczos module --------------------------------------------------------------------*/ #include #include #include #include #include "block_lanczos.h" #include "../flint.h" #include "../memory-manager.h" #define NUM_EXTRA_RELATIONS 64 #define BIT(x) (((uint64_t)(1)) << (x)) static const uint64_t bitmask[64] = { BIT( 0), BIT( 1), BIT( 2), BIT( 3), BIT( 4), BIT( 5), BIT( 6), BIT( 7), BIT( 8), BIT( 9), BIT(10), BIT(11), BIT(12), BIT(13), BIT(14), BIT(15), BIT(16), BIT(17), BIT(18), BIT(19), BIT(20), BIT(21), BIT(22), BIT(23), BIT(24), BIT(25), BIT(26), BIT(27), BIT(28), BIT(29), BIT(30), BIT(31), BIT(32), BIT(33), BIT(34), BIT(35), BIT(36), BIT(37), BIT(38), BIT(39), BIT(40), BIT(41), BIT(42), BIT(43), BIT(44), BIT(45), BIT(46), BIT(47), BIT(48), BIT(49), BIT(50), BIT(51), BIT(52), BIT(53), BIT(54), BIT(55), BIT(56), BIT(57), BIT(58), BIT(59), BIT(60), BIT(61), BIT(62), BIT(63), }; /*--------------------------------------------------------------------*/ uint64_t get_null_entry(uint64_t * nullrows, long i, long l) { /* Returns true if the entry with indices i,l is 1 in the supplied 64xN matrix. This is used to read the nullspace vectors which are output by the Lanczos routine */ return nullrows[i]&bitmask[l]; } uint64_t random32(void) { /* Poor man's random number generator. It satisfies no particularly good randomness properties, but is good enough for this application */ static unsigned long randval = 4035456057U; randval = ((uint64_t)randval*1025416097U+286824428U)%(uint64_t)4294967291U; return (unsigned long)randval; } /*--------------------------------------------------------------------*/ void reduce_matrix(unsigned long *nrows, unsigned long *ncols, la_col_t *cols) { /* Perform light filtering on the nrows x ncols matrix specified by cols[]. The processing here is limited to deleting columns that contain a singleton row, then resizing the matrix to have a few more columns than rows. Because deleting a column reduces the counts in several different rows, the process must iterate to convergence. Note that this step is not intended to make the Lanczos iteration run any faster (though it will); it's just that if we don't go to this trouble then there are factorizations for which the matrix step will fail outright */ unsigned long r, c, i, j, k; unsigned long passes; unsigned long *counts; unsigned long reduced_rows; unsigned long reduced_cols; /* count the number of nonzero entries in each row */ counts = (unsigned long *)calloc((size_t)*nrows, sizeof(unsigned long)); for (i = 0; i < *ncols; i++) { for (j = 0; j < cols[i].weight; j++) counts[cols[i].data[j]]++; } reduced_rows = *nrows; reduced_cols = *ncols; passes = 0; do { r = reduced_rows; /* remove any columns that contain the only entry in one or more rows, then update the row counts to reflect the missing column. Iterate until no more columns can be deleted */ do { c = reduced_cols; for (i = j = 0; i < reduced_cols; i++) { la_col_t *col = cols + i; for (k = 0; k < col->weight; k++) { if (counts[col->data[k]] < 2) break; } if (k < col->weight) { for (k = 0; k < col->weight; k++) { counts[col->data[k]]--; } free_col(col); clear_col(col); } else { cols[j++] = cols[i]; if (j-1 != i) clear_col(col); } } reduced_cols = j; } while (c != reduced_cols); /* count the number of rows that contain a nonzero entry */ for (i = reduced_rows = 0; i < *nrows; i++) { if (counts[i]) reduced_rows++; } /* Because deleting a column reduces the weight of many rows, the number of nonzero rows may be much less than the number of columns. Delete more columns until the matrix has the correct aspect ratio. Columns at the end of cols[] are the heaviest, so delete those (and update the row counts again) */ if (reduced_cols > reduced_rows + NUM_EXTRA_RELATIONS) { for (i = reduced_rows + NUM_EXTRA_RELATIONS; i < reduced_cols; i++) { la_col_t *col = cols + i; for (j = 0; j < col->weight; j++) { counts[col->data[j]]--; } free_col(col); clear_col(col); } reduced_cols = reduced_rows + NUM_EXTRA_RELATIONS; } /* if any columns were deleted in the previous step, then the matrix is less dense and more columns can be deleted; iterate until no further deletions are possible */ passes++; } while (r != reduced_rows); #if DISPLAY printf("reduce to %ld x %ld in %ld passes\n", reduced_rows, reduced_cols, passes); #endif free(counts); /* record the final matrix size. Note that we can't touch nrows because all the column data (and the sieving relations that produced it) would have to be updated */ *ncols = reduced_cols; } /*-------------------------------------------------------------------*/ static void mul_64x64_64x64(uint64_t *a, uint64_t *b, uint64_t *c ) { /* c[][] = x[][] * y[][], where all operands are 64 x 64 (i.e. contain 64 words of 64 bits each). The result may overwrite a or b. */ uint64_t ai, bj, accum; uint64_t tmp[64]; unsigned long i, j; for (i = 0; i < 64; i++) { j = 0; accum = 0; ai = a[i]; while (ai) { bj = b[j]; if( ai & 1 ) accum ^= bj; ai >>= 1; j++; } tmp[i] = accum; } memcpy(c, tmp, sizeof(tmp)); } /*-----------------------------------------------------------------------*/ static void precompute_Nx64_64x64(uint64_t *x, uint64_t *c) { /* Let x[][] be a 64 x 64 matrix in GF(2), represented as 64 words of 64 bits each. Let c[][] be an 8 x 256 matrix of 64-bit words. This code fills c[][] with a bunch of "partial matrix multiplies". For 0<=i<256, the j_th row of c[][] contains the matrix product ( i << (8*j) ) * x[][] where the quantity in parentheses is considered a 1 x 64 vector of elements in GF(2). The resulting table can dramatically speed up matrix multiplies by x[][]. */ uint64_t accum, xk; unsigned long i, j, k, index; for (j = 0; j < 8; j++) { for (i = 0; i < 256; i++) { k = 0; index = i; accum = 0; while (index) { xk = x[k]; if (index & 1) accum ^= xk; index >>= 1; k++; } c[i] = accum; } x += 8; c += 256; } } /*-------------------------------------------------------------------*/ static void mul_Nx64_64x64_acc(uint64_t *v, uint64_t *x, uint64_t *c, uint64_t *y, unsigned long n) { /* let v[][] be a n x 64 matrix with elements in GF(2), represented as an array of n 64-bit words. Let c[][] be an 8 x 256 scratch matrix of 64-bit words. This code multiplies v[][] by the 64x64 matrix x[][], then XORs the n x 64 result into y[][] */ unsigned long i; uint64_t word; precompute_Nx64_64x64(x, c); for (i = 0; i < n; i++) { word = v[i]; y[i] ^= c[ 0*256 + ((word>> 0) & 0xff) ] ^ c[ 1*256 + ((word>> 8) & 0xff) ] ^ c[ 2*256 + ((word>>16) & 0xff) ] ^ c[ 3*256 + ((word>>24) & 0xff) ] ^ c[ 4*256 + ((word>>32) & 0xff) ] ^ c[ 5*256 + ((word>>40) & 0xff) ] ^ c[ 6*256 + ((word>>48) & 0xff) ] ^ c[ 7*256 + ((word>>56) ) ]; } } /*-------------------------------------------------------------------*/ static void mul_64xN_Nx64(uint64_t *x, uint64_t *y, uint64_t *c, uint64_t *xy, unsigned long n) { /* Let x and y be n x 64 matrices. This routine computes the 64 x 64 matrix xy[][] given by transpose(x) * y. c[][] is a 256 x 8 scratch matrix of 64-bit words. */ unsigned long i; memset(c, 0, 256 * 8 * sizeof(uint64_t)); memset(xy, 0, 64 * sizeof(uint64_t)); for (i = 0; i < n; i++) { uint64_t xi = x[i]; uint64_t yi = y[i]; c[ 0*256 + ( xi & 0xff) ] ^= yi; c[ 1*256 + ((xi >> 8) & 0xff) ] ^= yi; c[ 2*256 + ((xi >> 16) & 0xff) ] ^= yi; c[ 3*256 + ((xi >> 24) & 0xff) ] ^= yi; c[ 4*256 + ((xi >> 32) & 0xff) ] ^= yi; c[ 5*256 + ((xi >> 40) & 0xff) ] ^= yi; c[ 6*256 + ((xi >> 48) & 0xff) ] ^= yi; c[ 7*256 + ((xi >> 56) ) ] ^= yi; } for(i = 0; i < 8; i++) { unsigned long j; uint64_t a0, a1, a2, a3, a4, a5, a6, a7; a0 = a1 = a2 = a3 = 0; a4 = a5 = a6 = a7 = 0; for (j = 0; j < 256; j++) { if ((j >> i) & 1) { a0 ^= c[0*256 + j]; a1 ^= c[1*256 + j]; a2 ^= c[2*256 + j]; a3 ^= c[3*256 + j]; a4 ^= c[4*256 + j]; a5 ^= c[5*256 + j]; a6 ^= c[6*256 + j]; a7 ^= c[7*256 + j]; } } xy[ 0] = a0; xy[ 8] = a1; xy[16] = a2; xy[24] = a3; xy[32] = a4; xy[40] = a5; xy[48] = a6; xy[56] = a7; xy++; } } /*-------------------------------------------------------------------*/ static unsigned long find_nonsingular_sub(uint64_t *t, unsigned long *s, unsigned long *last_s, unsigned long last_dim, uint64_t *w) { /* given a 64x64 matrix t[][] (i.e. sixty-four 64-bit words) and a list of 'last_dim' column indices enumerated in last_s[]: - find a submatrix of t that is invertible - invert it and copy to w[][] - enumerate in s[] the columns represented in w[][] */ unsigned long i, j; unsigned long dim; unsigned long cols[64]; uint64_t M[64][2]; uint64_t mask, *row_i, *row_j; uint64_t m0, m1; /* M = [t | I] for I the 64x64 identity matrix */ for (i = 0; i < 64; i++) { M[i][0] = t[i]; M[i][1] = bitmask[i]; } /* put the column indices from last_s[] into the back of cols[], and copy to the beginning of cols[] any column indices not in last_s[] */ mask = 0; for (i = 0; i < last_dim; i++) { cols[63 - i] = last_s[i]; mask |= bitmask[last_s[i]]; } for (i = j = 0; i < 64; i++) { if (!(mask & bitmask[i])) cols[j++] = i; } /* compute the inverse of t[][] */ for (i = dim = 0; i < 64; i++) { /* find the next pivot row and put in row i */ mask = bitmask[cols[i]]; row_i = M[cols[i]]; for (j = i; j < 64; j++) { row_j = M[cols[j]]; if (row_j[0] & mask) { m0 = row_j[0]; m1 = row_j[1]; row_j[0] = row_i[0]; row_j[1] = row_i[1]; row_i[0] = m0; row_i[1] = m1; break; } } /* if a pivot row was found, eliminate the pivot column from all other rows */ if (j < 64) { for (j = 0; j < 64; j++) { row_j = M[cols[j]]; if ((row_i != row_j) && (row_j[0] & mask)) { row_j[0] ^= row_i[0]; row_j[1] ^= row_i[1]; } } /* add the pivot column to the list of accepted columns */ s[dim++] = cols[i]; continue; } /* otherwise, use the right-hand half of M[] to compensate for the absence of a pivot column */ for (j = i; j < 64; j++) { row_j = M[cols[j]]; if (row_j[1] & mask) { m0 = row_j[0]; m1 = row_j[1]; row_j[0] = row_i[0]; row_j[1] = row_i[1]; row_i[0] = m0; row_i[1] = m1; break; } } if (j == 64) { #ifdef ERRORS printf("lanczos error: submatrix " "is not invertible\n"); #endif return 0; } /* eliminate the pivot column from the other rows of the inverse */ for (j = 0; j < 64; j++) { row_j = M[cols[j]]; if ((row_i != row_j) && (row_j[1] & mask)) { row_j[0] ^= row_i[0]; row_j[1] ^= row_i[1]; } } /* wipe out the pivot row */ row_i[0] = row_i[1] = 0; } /* the right-hand half of M[] is the desired inverse */ for (i = 0; i < 64; i++) w[i] = M[i][1]; /* The block Lanczos recurrence depends on all columns of t[][] appearing in s[] and/or last_s[]. Verify that condition here */ mask = 0; for (i = 0; i < dim; i++) mask |= bitmask[s[i]]; for (i = 0; i < last_dim; i++) mask |= bitmask[last_s[i]]; if (mask != (uint64_t)(-1)) { #ifdef ERRORS printf("lanczos error: not all columns used\n"); #endif return 0; } return dim; } /*-------------------------------------------------------------------*/ void mul_MxN_Nx64(unsigned long vsize, unsigned long dense_rows, unsigned long ncols, la_col_t *A, uint64_t *x, uint64_t *b) { /* Multiply the vector x[] by the matrix A (stored columnwise) and put the result in b[]. vsize refers to the number of uint64_t's allocated for x[] and b[]; vsize is probably different from ncols */ unsigned long i, j; memset(b, 0, vsize * sizeof(uint64_t)); for (i = 0; i < ncols; i++) { la_col_t *col = A + i; unsigned long *row_entries = col->data; uint64_t tmp = x[i]; for (j = 0; j < col->weight; j++) { b[row_entries[j]] ^= tmp; } } if (dense_rows) { for (i = 0; i < ncols; i++) { la_col_t *col = A + i; unsigned long *row_entries = col->data + col->weight; uint64_t tmp = x[i]; for (j = 0; j < dense_rows; j++) { if (row_entries[j / 32] & ((unsigned long)1 << (j % 32))) { b[j] ^= tmp; } } } } } /*-------------------------------------------------------------------*/ void mul_trans_MxN_Nx64(unsigned long dense_rows, unsigned long ncols, la_col_t *A, uint64_t *x, uint64_t *b) { /* Multiply the vector x[] by the transpose of the matrix A and put the result in b[]. Since A is stored by columns, this is just a matrix-vector product */ unsigned long i, j; for (i = 0; i < ncols; i++) { la_col_t *col = A + i; unsigned long *row_entries = col->data; uint64_t accum = 0; for (j = 0; j < col->weight; j++) { accum ^= x[row_entries[j]]; } b[i] = accum; } if (dense_rows) { for (i = 0; i < ncols; i++) { la_col_t *col = A + i; unsigned long *row_entries = col->data + col->weight; uint64_t accum = b[i]; for (j = 0; j < dense_rows; j++) { if (row_entries[j / 32] & ((unsigned long)1 << (j % 32))) { accum ^= x[j]; } } b[i] = accum; } } } /*-----------------------------------------------------------------------*/ static void transpose_vector(unsigned long ncols, uint64_t *v, uint64_t **trans) { /* Hideously inefficent routine to transpose a vector v[] of 64-bit words into a 2-D array trans[][] of 64-bit words */ unsigned long i, j; unsigned long col; uint64_t mask, word; for (i = 0; i < ncols; i++) { col = i / 64; mask = bitmask[i % 64]; word = v[i]; j = 0; while (word) { if (word & 1) trans[j][col] |= mask; word = word >> 1; j++; } } } /*-----------------------------------------------------------------------*/ void combine_cols(unsigned long ncols, uint64_t *x, uint64_t *v, uint64_t *ax, uint64_t *av) { /* Once the block Lanczos iteration has finished, x[] and v[] will contain mostly nullspace vectors between them, as well as possibly some columns that are linear combinations of nullspace vectors. Given vectors ax[] and av[] that are the result of multiplying x[] and v[] by the matrix, this routine will use Gauss elimination on the columns of [ax | av] to find all of the linearly dependent columns. The column operations needed to accomplish this are mir- rored in [x | v] and the columns that are independent are skipped. Finally, the dependent columns are copied back into x[] and represent the nullspace vector output of the block Lanczos code. v[] and av[] can be NULL, in which case the elimination process assumes 64 dependencies instead of 128 */ unsigned long i, j, k, bitpos, col, col_words, num_deps; uint64_t mask; uint64_t *matrix[128], *amatrix[128], *tmp; num_deps = 128; if (v == NULL || av == NULL) num_deps = 64; col_words = (ncols + 63) / 64; for (i = 0; i < num_deps; i++) { matrix[i] = (uint64_t *)calloc((size_t)col_words, sizeof(uint64_t)); amatrix[i] = (uint64_t *)calloc((size_t)col_words, sizeof(uint64_t)); } /* operations on columns can more conveniently become operations on rows if all the vectors are first transposed */ transpose_vector(ncols, x, matrix); transpose_vector(ncols, ax, amatrix); if (num_deps == 128) { transpose_vector(ncols, v, matrix + 64); transpose_vector(ncols, av, amatrix + 64); } /* Keep eliminating rows until the unprocessed part of amatrix[][] is all zero. The rows where this happens correspond to linearly dependent vectors in the nullspace */ for (i = bitpos = 0; i < num_deps && bitpos < ncols; bitpos++) { /* find the next pivot row */ mask = bitmask[bitpos % 64]; col = bitpos / 64; for (j = i; j < num_deps; j++) { if (amatrix[j][col] & mask) { tmp = matrix[i]; matrix[i] = matrix[j]; matrix[j] = tmp; tmp = amatrix[i]; amatrix[i] = amatrix[j]; amatrix[j] = tmp; break; } } if (j == num_deps) continue; /* a pivot was found; eliminate it from the remaining rows */ for (j++; j < num_deps; j++) { if (amatrix[j][col] & mask) { /* Note that the entire row, *not* just the nonzero part of it, must be eliminated; this is because the corresponding (dense) row of matrix[][] must have the same operation applied */ for (k = 0; k < col_words; k++) { amatrix[j][k] ^= amatrix[i][k]; matrix[j][k] ^= matrix[i][k]; } } } i++; } /* transpose rows i to 64 back into x[] */ for (j = 0; j < ncols; j++) { uint64_t word = 0; col = j / 64; mask = bitmask[j % 64]; for (k = i; k < 64; k++) { if (matrix[k][col] & mask) word |= bitmask[k]; } x[j] = word; } for (i = 0; i < num_deps; i++) { free(matrix[i]); free(amatrix[i]); } } /*-----------------------------------------------------------------------*/ uint64_t * block_lanczos(unsigned long nrows, unsigned long dense_rows, unsigned long ncols, la_col_t *B) { /* Solve Bx = 0 for some nonzero x; the computed solution, containing up to 64 of these nullspace vectors, is returned */ uint64_t *vnext, *v[3], *x, *v0; uint64_t *winv[3]; uint64_t *vt_a_v[2], *vt_a2_v[2]; uint64_t *scratch; uint64_t *d, *e, *f, *f2; uint64_t *tmp; unsigned long s[2][64]; unsigned long i, iter; unsigned long n = ncols; unsigned long dim0, dim1; uint64_t mask0, mask1; unsigned long vsize; /* allocate all of the size-n variables. Note that because B has been preprocessed to ignore singleton rows, the number of rows may really be less than nrows and may be greater than ncols. vsize is the maximum of these two numbers */ vsize = FLINT_MAX(nrows, ncols); v[0] = (uint64_t *)malloc(vsize * sizeof(uint64_t)); v[1] = (uint64_t *)malloc(vsize * sizeof(uint64_t)); v[2] = (uint64_t *)malloc(vsize * sizeof(uint64_t)); vnext = (uint64_t *)malloc(vsize * sizeof(uint64_t)); x = (uint64_t *)malloc(vsize * sizeof(uint64_t)); v0 = (uint64_t *)malloc(vsize * sizeof(uint64_t)); scratch = (uint64_t *)malloc(FLINT_MAX(vsize, 256 * 8) * sizeof(uint64_t)); /* allocate all the 64x64 variables */ winv[0] = (uint64_t *)malloc(64 * sizeof(uint64_t)); winv[1] = (uint64_t *)malloc(64 * sizeof(uint64_t)); winv[2] = (uint64_t *)malloc(64 * sizeof(uint64_t)); vt_a_v[0] = (uint64_t *)malloc(64 * sizeof(uint64_t)); vt_a_v[1] = (uint64_t *)malloc(64 * sizeof(uint64_t)); vt_a2_v[0] = (uint64_t *)malloc(64 * sizeof(uint64_t)); vt_a2_v[1] = (uint64_t *)malloc(64 * sizeof(uint64_t)); d = (uint64_t *)malloc(64 * sizeof(uint64_t)); e = (uint64_t *)malloc(64 * sizeof(uint64_t)); f = (uint64_t *)malloc(64 * sizeof(uint64_t)); f2 = (uint64_t *)malloc(64 * sizeof(uint64_t)); /* The iterations computes v[0], vt_a_v[0], vt_a2_v[0], s[0] and winv[0]. Subscripts larger than zero represent past versions of these quantities, which start off empty (except for the past version of s[], which contains all the column indices */ memset(v[1], 0, vsize * sizeof(uint64_t)); memset(v[2], 0, vsize * sizeof(uint64_t)); for (i = 0; i < 64; i++) { s[1][i] = i; vt_a_v[1][i] = 0; vt_a2_v[1][i] = 0; winv[1][i] = 0; winv[2][i] = 0; } dim0 = 0; dim1 = 64; mask1 = (uint64_t)(-1); iter = 0; /* The computed solution 'x' starts off random, and v[0] starts off as B*x. This initial copy of v[0] must be saved off separately */ for (i = 0; i < n; i++) v[0][i] = (uint64_t)(random32()) << 32 | (uint64_t)(random32()); memcpy(x, v[0], vsize * sizeof(uint64_t)); mul_MxN_Nx64(vsize, dense_rows, ncols, B, v[0], scratch); mul_trans_MxN_Nx64(dense_rows, ncols, B, scratch, v[0]); memcpy(v0, v[0], vsize * sizeof(uint64_t)); /* perform the iteration */ while (1) { iter++; /* multiply the current v[0] by a symmetrized version of B, or B'B (apostrophe means transpose). Use "A" to refer to B'B */ mul_MxN_Nx64(vsize, dense_rows, ncols, B, v[0], scratch); mul_trans_MxN_Nx64(dense_rows, ncols, B, scratch, vnext); /* compute v0'*A*v0 and (A*v0)'(A*v0) */ mul_64xN_Nx64(v[0], vnext, scratch, vt_a_v[0], n); mul_64xN_Nx64(vnext, vnext, scratch, vt_a2_v[0], n); /* if the former is orthogonal to itself, then the iteration has finished */ for (i = 0; i < 64; i++) { if (vt_a_v[0][i] != 0) break; } if (i == 64) { break; } /* Find the size-'dim0' nonsingular submatrix of v0'*A*v0, invert it, and list the column indices present in the submatrix */ dim0 = find_nonsingular_sub(vt_a_v[0], s[0], s[1], dim1, winv[0]); if (dim0 == 0) break; /* mask0 contains one set bit for every column that participates in the inverted submatrix computed above */ mask0 = 0; for (i = 0; i < dim0; i++) mask0 |= bitmask[s[0][i]]; /* compute d */ for (i = 0; i < 64; i++) d[i] = (vt_a2_v[0][i] & mask0) ^ vt_a_v[0][i]; mul_64x64_64x64(winv[0], d, d); for (i = 0; i < 64; i++) d[i] = d[i] ^ bitmask[i]; /* compute e */ mul_64x64_64x64(winv[1], vt_a_v[0], e); for (i = 0; i < 64; i++) e[i] = e[i] & mask0; /* compute f */ mul_64x64_64x64(vt_a_v[1], winv[1], f); for (i = 0; i < 64; i++) f[i] = f[i] ^ bitmask[i]; mul_64x64_64x64(winv[2], f, f); for (i = 0; i < 64; i++) f2[i] = ((vt_a2_v[1][i] & mask1) ^ vt_a_v[1][i]) & mask0; mul_64x64_64x64(f, f2, f); /* compute the next v */ for (i = 0; i < n; i++) vnext[i] = vnext[i] & mask0; mul_Nx64_64x64_acc(v[0], d, scratch, vnext, n); mul_Nx64_64x64_acc(v[1], e, scratch, vnext, n); mul_Nx64_64x64_acc(v[2], f, scratch, vnext, n); /* update the computed solution 'x' */ mul_64xN_Nx64(v[0], v0, scratch, d, n); mul_64x64_64x64(winv[0], d, d); mul_Nx64_64x64_acc(v[0], d, scratch, x, n); /* rotate all the variables */ tmp = v[2]; v[2] = v[1]; v[1] = v[0]; v[0] = vnext; vnext = tmp; tmp = winv[2]; winv[2] = winv[1]; winv[1] = winv[0]; winv[0] = tmp; tmp = vt_a_v[1]; vt_a_v[1] = vt_a_v[0]; vt_a_v[0] = tmp; tmp = vt_a2_v[1]; vt_a2_v[1] = vt_a2_v[0]; vt_a2_v[0] = tmp; memcpy(s[1], s[0], 64 * sizeof(unsigned long)); mask1 = mask0; dim1 = dim0; } #if DISPLAY printf("lanczos halted after %ld iterations\n", iter); #endif /* free unneeded storage */ free(vnext); free(scratch); free(v0); free(vt_a_v[0]); free(vt_a_v[1]); free(vt_a2_v[0]); free(vt_a2_v[1]); free(winv[0]); free(winv[1]); free(winv[2]); free(d); free(e); free(f); free(f2); /* if a recoverable failure occurred, start everything over again */ if (dim0 == 0) { #ifdef ERRORS printf("linear algebra failed; retrying...\n"); #endif free(x); free(v[0]); free(v[1]); free(v[2]); return NULL; } /* convert the output of the iteration to an actual collection of nullspace vectors */ mul_MxN_Nx64(vsize, dense_rows, ncols, B, x, v[1]); mul_MxN_Nx64(vsize, dense_rows, ncols, B, v[0], v[2]); combine_cols(ncols, x, v[0], v[1], v[2]); /* verify that these really are linear dependencies of B */ mul_MxN_Nx64(vsize, dense_rows, ncols, B, x, v[0]); for (i = 0; i < ncols; i++) { if (v[0][i] != 0) break; } if (i < ncols) { printf("lanczos error: dependencies don't work %ld\n",i); abort(); } free(v[0]); free(v[1]); free(v[2]); return x; } flint-1.011/QS/mpQS.c0000644017361200017500000003366111025357253014102 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** mpQS.c A full implementation of the Quadratic Sieve using multi-precision arithmetic (C) 2006 William Hart ******************************************************************************/ #include #include #include #include #include #include "../fmpz.h" #include "../long_extras.h" #include "../memory-manager.h" #include "../flint.h" #include "../mpn_extras.h" #include "mpQS.h" #include "mp_factor_base.h" #include "mp_poly.h" #include "mp_sieve.h" #include "mp_linear_algebra.h" #include "block_lanczos.h" /*=========================================================================== Square Root: Function: Compute the square root of the product of all the partial relations and take it mod p ===========================================================================*/ static inline void square_root(mpz_t X, mpz_t Y, QS_t * qs_inf, linalg_t * la_inf, uint64_t * nullrows, unsigned long ncols, unsigned long l, mpz_t N) { unsigned long position; unsigned long * relation = la_inf->relation; prime_t * factor_base = qs_inf->factor_base; unsigned long * prime_count = qs_inf->prime_count; unsigned long num_primes = qs_inf->num_primes; mpz_t * Y_arr = la_inf->Y_arr; mpz_t pow; mpz_init(pow); memset(prime_count, 0, num_primes*sizeof(unsigned long)); mpz_set_ui(X, 1); mpz_set_ui(Y, 1); for (unsigned long i = 0; i < ncols; i++) { if (get_null_entry(nullrows, i, l)) { position = la_inf->matrix[i].orig*2*MAX_FACS; for (unsigned long j = 0; j < relation[position]; j++) { prime_count[relation[position+2*j+1]] += (relation[position+2*j+2]); } mpz_mul(Y, Y, Y_arr[la_inf->matrix[i].orig]); if (i % 10 == 0) mpz_mod(Y, Y, N); } } mpz_mod(Y, Y, N); for (unsigned long i = 0; i < num_primes; i++) { if (prime_count[i]) { mpz_set_ui(pow, factor_base[i].p); mpz_powm_ui(pow, pow, prime_count[i]/2, N); mpz_mul(X, X, pow); } if (i%10 == 0) mpz_mod(X, X, N); } mpz_mod(X, X, N); #if TEST for (unsigned long i = 0; i < num_primes; i++) { if ((prime_count[i] %2) != 0) printf("Error %ld, %ld, %ld\n", l, i, prime_count[i]); } #endif #if TEST2 mpz_t temp, temp2; mpz_init(temp); mpz_init(temp2); mpz_mul(temp, Y, Y); mpz_mod(temp, temp, N); mpz_mul(temp2, X, X); mpz_mod(temp2, temp2, N); if (mpz_cmp(temp, temp2) != 0) gmp_printf("Y^2 = %Zd (mod N) != \nX^2 = %Zd (mod N)\n\n", temp, temp2); mpz_clear(temp); mpz_clear(temp2); #endif mpz_clear(pow); } /*=========================================================================== Collect relations: Function: Sets up batches of polynomials Do the sieving Evaluate candidates Returns: The number of relations found with this batch of polynomials ===========================================================================*/ unsigned long collect_relations(linalg_t * la_inf, QS_t * qs_inf, poly_t * poly_inf, unsigned char * sieve) { unsigned long s = poly_inf->s; uint32_t * poly_corr; unsigned long relations = 0; uint32_t ** A_inv2B = poly_inf->A_inv2B; unsigned long poly_index, j; unsigned long poly_add; unsigned long * B = poly_inf->B; unsigned long * B_terms = poly_inf->B_terms; unsigned long sieve_size = qs_inf->sieve_size; unsigned long small_primes = qs_inf->small_primes; unsigned long limbs = qs_inf->prec+1; unsigned long limbs2; mp_limb_t msl; #if CURVES static unsigned long count = 0; count++; if ((count & 7) == 0) printf("%ld curves\n", count*((1<<(s-1))-1)); #endif compute_A(qs_inf, poly_inf); compute_B_terms(qs_inf, poly_inf); compute_off_adj(qs_inf, poly_inf); compute_A_factor_offsets(qs_inf, poly_inf); compute_B_C(qs_inf, poly_inf); for (poly_index = 1; poly_index < (1<<(s-1)); poly_index++) { for (j = 0; j < s; j++) { if (((poly_index >> j) & 1UL) != 0UL) break; } poly_add = (((poly_index >> j) & 2UL) != 0UL); poly_corr = A_inv2B[j]; if (sieve_size <= SIEVE_BLOCK) { do_sieving2(qs_inf, poly_inf, sieve); } else { unsigned long blocks = sieve_size/SIEVE_BLOCK; unsigned long offset = SIEVE_BLOCK; unsigned long sieve_fill = qs_inf->sieve_fill; unsigned long second_prime = FLINT_MIN(SECOND_PRIME, qs_inf->num_primes); memset(sieve, sieve_fill, sieve_size); *(sieve+sieve_size) = 255; do_sieving(qs_inf, poly_inf, sieve, small_primes, second_prime, SIEVE_BLOCK, 1, 0); for (long i = 1; i < blocks - 1; i++, offset += SIEVE_BLOCK) do_sieving(qs_inf, poly_inf, sieve, small_primes, second_prime, offset+SIEVE_BLOCK, 0, 0); do_sieving(qs_inf, poly_inf, sieve, small_primes, second_prime, sieve_size, 0, 1); do_sieving3(qs_inf, poly_inf, sieve, second_prime, qs_inf->num_primes, sieve_size); } relations += evaluate_sieve(la_inf, qs_inf, poly_inf, sieve); update_offsets(poly_add, poly_corr, qs_inf, poly_inf); limbs2 = B_terms[j*limbs]; if (((poly_add) && ((long)(limbs2 ^ B[0]) >= 0L)) || ((!poly_add) && ((long)(limbs2 ^ B[0]) < 0L))) { msl = mpn_add_n(B+1, B+1, B_terms + j*limbs + 1, limbs2); msl += mpn_add_n(B+1, B+1, B_terms + j*limbs + 1, limbs2); if (msl) mpn_add_1(B + limbs2 + 1, B + limbs2 + 1, limbs - limbs2 - 1, msl); } else { msl = mpn_sub_n(B+1, B+1, B_terms + j*limbs + 1, limbs2); msl += mpn_sub_n(B+1, B+1, B_terms + j*limbs + 1, limbs2); if ((msl) && (limbs2 < limbs - 1)) msl = mpn_sub_1(B + limbs2 + 1, B + limbs2 + 1, limbs - limbs2 - 1, msl); if (msl) { F_mpn_negate(B+1, B+1, limbs - 1); B[0] = -B[0]; } } if ((long) B[0] < 0) { B[0] = 1L - limbs; while (!B[FLINT_ABS(B[0])] && B[0]) B[0]++; } else { B[0] = limbs - 1L; while (!B[B[0]] && B[0]) B[0]--; } compute_B_C(qs_inf, poly_inf); //compute_A_factor_offsets(qs_inf, poly_inf); } relations += merge_relations(la_inf); return relations; } /*=========================================================================== Main Quadratic Sieve Factoring Routine: Function: Finds the factors of a number using the quadratic sieve Assume n is odd, not a perfect power, positive and not a prime Returns 0 if factorisation was unsuccessful Returns 1 if factorisation was successful If a small factor is found, it is returned and the QS is not run ===========================================================================*/ int F_mpz_factor_mpQS(F_mpz_factor_t factors, mpz_t N) { unsigned long small_factor; unsigned long rels_found = 0; unsigned long prec; QS_t qs_inf; poly_t poly_inf; linalg_t la_inf; qs_inf.bits = mpz_sizeinbase(N,2); if (qs_inf.bits < MINBITS) return 0; // Number too small for mpQS prec = (qs_inf.bits + max_mult_size - 1)/FLINT_BITS + 1; qs_inf.n = (fmpz_t) flint_stack_alloc(prec+1); qs_inf.n[prec] = 0; mpz_to_fmpz(qs_inf.n, N); // set n to the number to be factored qs_inf.prec = (prec + 1)/2; small_factor = knuth_schroeppel(&qs_inf); // Compute multiplier and some FB primes if (small_factor) goto cleanup_2; #if QS_INFO printf("Multiplier = %ld\n", qs_inf.k); #endif mpz_init(qs_inf.mpz_n); mpz_set(qs_inf.mpz_n, N); mpz_mul_ui(qs_inf.mpz_n, qs_inf.mpz_n, qs_inf.k); qs_inf.bits = mpz_sizeinbase(qs_inf.mpz_n, 2); if (qs_inf.bits < MINBITS) { small_factor = 0; // Number too small for mpQS goto cleanup_1a; } mpz_to_fmpz(qs_inf.n, qs_inf.mpz_n); // set n to the number to be factored times k primes_init(&qs_inf); sqrts_init(&qs_inf); if (qs_inf.bits < MINBITS) { small_factor = 0; // kn too big for tinyQS goto cleanup_1; } small_factor = compute_factor_base(&qs_inf); // Computes the factor base primes and modular square roots if (small_factor) goto cleanup_1; compute_sizes(&qs_inf); get_sieve_params(&qs_inf); poly_init(&qs_inf, &poly_inf, N); linear_algebra_init(&la_inf, &qs_inf, &poly_inf); unsigned char * sieve = (unsigned char *) flint_stack_alloc_bytes(qs_inf.sieve_size+1); while (rels_found < qs_inf.num_primes + EXTRA_RELS) { rels_found += collect_relations(&la_inf, &qs_inf, &poly_inf, sieve); } flint_stack_release(); // release sieve la_col_t * matrix = la_inf.matrix; unsigned long ncols = qs_inf.num_primes + EXTRA_RELS; unsigned long nrows = qs_inf.num_primes; reduce_matrix(&nrows, &ncols, matrix); // Do some filtering on the matrix uint64_t* nullrows; do { nullrows = block_lanczos(nrows, 0, ncols, matrix); // Linear algebra (block Lanczos) } while (nullrows == NULL); unsigned long i, j; uint64_t mask; for (i = 0, mask = 0; i < ncols; i++) mask |= nullrows[i]; for (i = j = 0; i < 64; i++) { if (mask & (((uint64_t)(1)) << i)) j++; } #if QS_INFO printf("%ld nullspace vectors found\n", j); #endif qs_inf.prime_count = (unsigned long *) flint_stack_alloc(qs_inf.num_primes); mpz_t X, Y, F, Q, R; mpz_init(X); mpz_init(Y); mpz_init(F); mpz_init(Q); mpz_init(R); mpz_set(F, N); #if PRINT_FACTORS gmp_printf("Factors of %Zd:\n", N); #endif for (unsigned long l = 0; l < 64; l++) { if (mask & ((uint64_t)(1) << l)) { square_root(X, Y, &qs_inf, &la_inf, nullrows, ncols, l, N); mpz_sub(X, X, Y); mpz_gcd(X, X, N); if ((mpz_cmp(X, N) != 0) && (mpz_cmp_ui(X, 1) != 0)) { #if PRINT_FACTORS gmp_printf("%Zd\n", X); #endif if (mpz_probab_prime_p(X, 10)) { mpz_fdiv_qr(Q, R, F, X); if (mpz_cmp_ui(R, 0) == 0) mpz_set(F, Q); } if (mpz_cmp_ui(F, 1) == 0) break; } } } small_factor = 1; // sieve was successful mpz_clear(Q); mpz_clear(R); mpz_clear(F); mpz_clear(X); mpz_clear(Y); flint_stack_release(); // release prime_count linear_algebra_clear(&la_inf, &qs_inf); poly_clear(&poly_inf); sizes_clear(); cleanup_1: sqrts_clear(); // release modular square roots primes_clear(); // release factor_base cleanup_1a: mpz_clear(qs_inf.mpz_n); cleanup_2: flint_stack_release(); // release n return small_factor; } /*=========================================================================== Main Program: Function: Factors a user specified number using the quadratic sieve ===========================================================================*/ int main(int argc, unsigned char *argv[]) { mpz_t N; mpz_init(N); F_mpz_factor_t factors; printf("Input number to factor [ >= 40 decimal digits ] : "); gmp_scanf("%Zd", N); getchar(); F_mpz_factor_mpQS(factors, N); mpz_clear(N); } /*int main(int argc, unsigned char *argv[]) { mpz_t N; mpz_init(N); F_mpz_factor_t factors; unsigned long factor; unsigned long failed = 0; unsigned long small_factors = 0; unsigned long succeed = 0; unsigned long bits1, bits2, bits3, i; for (i = 0; i < 1; i++) { mpz_set_ui(N, z_nextprime(z_randint(4000000000000000000UL))); mpz_mul_ui(N, N, z_nextprime(z_randint(4000000000000000000UL))); mpz_mul_ui(N, N, z_nextprime(z_randint(4000000000000000UL))); mpz_mul_ui(N, N, z_nextprime(z_randint(1000000000UL))); //bits1 = z_randint(41UL)+13UL; //bits2 = z_randint(22UL)+13UL; //bits3 = z_randint(22UL)+13UL; //mpz_set_ui(N, z_nextprime(z_randint((1UL< 1) small_factors++; if (factor == 1) succeed++; } printf("mpQS succeeded %ld times, found a small factor %ld times\n", succeed, small_factors); printf("and failed %ld times\n", failed); mpz_clear(N); }*/ flint-1.011/QS/mp_factor_base.h0000644017361200017500000000412711025357253016166 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** mp_factor_base.h Header file for factor_base.c. (C) 2006 William Hart ******************************************************************************/ #ifndef MP_FACTORBASE_H #define MP_FACTORBASE_H #include #include "mpQS.h" #include "common.h" #define KSMAX 1000 unsigned long num_FB_primes(unsigned long bits); void sqrts_init(QS_t * qs_inf); void sqrts_clear(void); void compute_sizes(QS_t * qs_inf); void sizes_clear(void); //Knuth-Schroeppel multipliers and a macro to count them static const unsigned long multipliers[] = {1, 2, 3, 5, 6, 7, 10, 11, 13, 14, 15, 17, 19, 21, 22, 23, 26, 29, 30, 31, 33, 34, 35, 37, 38, 41, 42, 43, 47}; #define NUMMULTS (sizeof(multipliers)/sizeof(unsigned long)) #define max_mult_size 6 // number of bits of maximum multiplier void primes_clear(void); void primes_init(QS_t * qs_inf); unsigned long knuth_schroeppel(QS_t * qs_inf); unsigned long compute_factor_base(QS_t * qs_inf); #endif flint-1.011/QS/mp_poly.c0000644017361200017500000004250211025357253014673 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** mp_poly.c Routines for managing polynomials (C) 2006 William Hart ******************************************************************************/ #include #include #include #include "../flint.h" #include "../mpn_extras.h" #include "../fmpz.h" #include "../memory-manager.h" #include "../long_extras.h" #include "../longlong_wrapper.h" #include "../longlong.h" #include "mp_poly.h" #include "common.h" /*========================================================================= poly_init: Function: computes parameters for the polynomials and initialises the various structures required ==========================================================================*/ void poly_init(QS_t * qs_inf, poly_t * poly_inf, mpz_t N) { unsigned long num_primes = qs_inf->num_primes; unsigned long s = (qs_inf->bits-1)/28+1; prime_t * factor_base = qs_inf->factor_base; unsigned long fact_approx, fact, span; unsigned long sieve_size = qs_inf->sieve_size; unsigned long small_primes = qs_inf->small_primes; long min; poly_inf->s = s; poly_inf->B = (unsigned long*) flint_stack_alloc(qs_inf->prec+1); poly_inf->B_terms = (unsigned long*) flint_stack_alloc(s*(qs_inf->prec+1)); poly_inf->A = (unsigned long*) flint_stack_alloc(qs_inf->prec+1); poly_inf->target_A = (unsigned long*) flint_stack_alloc(qs_inf->prec+1); poly_inf->A_ind = (unsigned long*) flint_stack_alloc(s); poly_inf->A_modp = (unsigned long*) flint_stack_alloc(s); poly_inf->A_inv2B = (uint32_t**) flint_stack_alloc(s); poly_inf->inv_p2 = (double*) flint_stack_alloc_bytes(s*sizeof(double)); poly_inf->A_inv = (uint32_t *) flint_stack_alloc_bytes(num_primes*sizeof(uint32_t)); poly_inf->soln1 = (uint32_t *) flint_stack_alloc_bytes(num_primes*sizeof(uint32_t)); poly_inf->soln2 = (uint32_t *) flint_stack_alloc_bytes(num_primes*sizeof(uint32_t)); poly_inf->posn1 = (uint32_t *) flint_stack_alloc_bytes(num_primes*sizeof(uint32_t)); poly_inf->posn2 = (uint32_t *) flint_stack_alloc_bytes(num_primes*sizeof(uint32_t)); uint32_t ** A_inv2B = poly_inf->A_inv2B; A_inv2B[0] = (uint32_t *) flint_stack_alloc_bytes(num_primes*s*sizeof(uint32_t)); mpz_init(poly_inf->A_mpz); mpz_init(poly_inf->B_mpz); mpz_init(poly_inf->C); for (unsigned long i = 1; i < s; i++) { A_inv2B[i] = A_inv2B[i-1] + num_primes; } mpz_t temp; mpz_init(temp); mpz_mul_ui(temp, N, 2*qs_inf->k); mpz_sqrt(temp, temp); mpz_div_ui(temp, temp, 47*sieve_size/100); mpz_to_fmpz(poly_inf->target_A, temp); mpz_root(temp, temp, s); fact_approx = mpz_get_ui(temp); for (fact = 0; fact_approx >= factor_base[fact].p; fact++); span = num_primes/s/s/2; if (span < 5*s) span = 5*s; min = fact - span/2; if (min < small_primes) min = small_primes; if (min + span >= qs_inf->num_primes) span = num_primes - min - 1; fact = min + span/2; #if POLY_PARAMS printf("min = FB[%ld], span = %ld, number of factors = %ld\n", min, span, s); #endif poly_inf->min = min; poly_inf->fact = fact; poly_inf->span = span; mpz_clear(temp); } void poly_clear(poly_t * poly_inf) { mpz_clear(poly_inf->A_mpz); mpz_clear(poly_inf->B_mpz); mpz_clear(poly_inf->C); flint_stack_release(); // release all A_inv2B[i] flint_stack_release(); // release posn1 flint_stack_release(); // release posn2 flint_stack_release(); // release soln1 flint_stack_release(); // release soln2 flint_stack_release(); // release A_inv flint_stack_release(); // release inv_p2 flint_stack_release(); // release A_inv2B flint_stack_release(); // release A_modp flint_stack_release(); // release A_ind flint_stack_release(); // release target_A flint_stack_release(); // release A flint_stack_release(); // release B_terms flint_stack_release(); // release B } /*========================================================================= compute_A: Function: Compute a new polynomial A value The function attempts to pick A near to an optimal size ==========================================================================*/ void compute_A(QS_t * qs_inf, poly_t * poly_inf) { unsigned long min = poly_inf->min; unsigned long span = poly_inf->span; unsigned long s = poly_inf->s; unsigned long * A_ind = poly_inf->A_ind; unsigned long * A = poly_inf->A; unsigned long * target_A = poly_inf->target_A; unsigned long * current_A = (unsigned long *) flint_stack_alloc(qs_inf->prec+1); unsigned long * diff = (unsigned long *) flint_stack_alloc(qs_inf->prec+1); unsigned long * best_diff = (unsigned long *) flint_stack_alloc(qs_inf->prec+1); prime_t * factor_base = qs_inf->factor_base; unsigned long factor, p; unsigned long best1, best2, best3; unsigned long odds = s - 3; mp_limb_t msl; int taken; long i, j, k; A[0] = 1; A[1] = 1; for (i = 0; i < odds; i++) // Randomly choose the first s-3 prime factors of A with odd indices { do { taken = 0; A_ind[i] = ((z_randint(span) + min) | 1); if (A_ind[i] == min + span) A_ind[i] -= 2; for (j = 0; j < i; j++) { if (A_ind[i] == A_ind[j]) taken = 1; } } while (taken); msl = mpn_mul_1(A+1, A+1, A[0], factor_base[A_ind[i]].p); if (msl) // Compute the product of these s-3 primes { A[A[0]+1] = msl; A[0]++; } } for (k = 0; k < 30; k++) // Now try 8 different sets of even index primes as the remaining factors { F_mpn_copy(current_A, A, A[0] + 1); for (i = 0; i < 3; i++) // Randomly choose the last 3 prime factors of A with even indices { do { taken = 0; A_ind[s-3+i] = ((z_randint(span) + min) & -2L); if (A_ind[s-3+i] < min) A_ind[s-3+i] += 2; for (j = 0; j < i; j++) { if (A_ind[s-3+i] == A_ind[s-3+j]) taken = 1; } } while (taken); msl = mpn_mul_1(current_A+1, current_A+1, current_A[0], factor_base[A_ind[s-3+i]].p); if (msl) // Compute the product of these s-3 primes and the odd indexed primes { current_A[current_A[0]+1] = msl; current_A[0]++; } } if (k == 0) // Just store the first difference as the best one { if (target_A[0] >= current_A[0]) // Compute the difference with the target A { msl = mpn_sub(best_diff+1, target_A+1, target_A[0], current_A+1, current_A[0]); best_diff[0] = target_A[0]; } else { msl = mpn_sub(best_diff+1, current_A+1, current_A[0], target_A+1, target_A[0]); best_diff[0] = current_A[0]; } if (msl) F_mpn_negate(best_diff+1, best_diff+1, best_diff[0]); while ((!best_diff[best_diff[0]]) && (best_diff[0])) best_diff[0]--; // Normalise best_diff best1 = A_ind[s-3]; best2 = A_ind[s-2]; best3 = A_ind[s-1]; continue; } if (target_A[0] >= current_A[0]) // Compute the difference with the target A { msl = mpn_sub(diff+1, target_A+1, target_A[0], current_A+1, current_A[0]); diff[0] = target_A[0]; } else { msl = mpn_sub(diff+1, current_A+1, current_A[0], target_A+1, target_A[0]); diff[0] = current_A[0]; } if (msl) F_mpn_negate(diff+1, diff+1, diff[0]); while ((!diff[diff[0]]) && (diff[0])) diff[0]--; // Normalise diff if ((diff[0] < best_diff[0]) || ((diff[0] == best_diff[0]) && (mpn_cmp(diff+1, best_diff+1, diff[0]) < 0))) // The new diff is better { F_mpn_copy(best_diff, diff, diff[0]+1); best1 = A_ind[s-3]; best2 = A_ind[s-2]; best3 = A_ind[s-1]; } } A_ind[s-3] = best1; // Multiply A by the product of these 3 primes and store their indices A_ind[s-2] = best2; A_ind[s-1] = best3; for (i = 0; i < 3; i++) { msl = mpn_mul_1(A+1, A+1, A[0], factor_base[A_ind[s+i-3]].p); if (msl) { A[A[0]+1] = msl; A[0]++; } } #if POLY_A mpz_t A_disp, targ_A; mpz_init(A_disp); mpz_init(targ_A); fmpz_to_mpz(A_disp, A); fmpz_to_mpz(targ_A, target_A); gmp_printf("A = %Zd, target A = %Zd\n", A_disp, targ_A); mpz_clear(A_disp); mpz_clear(targ_A); #endif /*for (i = 0; i < s; i++) { p = factor_base[A_ind[i]].p; poly_inf->inv_p2[i] = z_precompute_inverse(p*p); } */ fmpz_to_mpz(poly_inf->A_mpz, A); flint_stack_release(); // release current_A flint_stack_release(); // release diff flint_stack_release(); // release best_diff } /*========================================================================= compute B terms: Function: Compute the terms from which the B values of the polynomials are constructed and compute the starting B coefficient ==========================================================================*/ void compute_B_terms(QS_t * qs_inf, poly_t * poly_inf) { unsigned long s = poly_inf->s; unsigned long * A_ind = poly_inf->A_ind; unsigned long * A_modp = poly_inf->A_modp; unsigned long * B_terms = poly_inf->B_terms; prime_t * factor_base = qs_inf->factor_base; unsigned long limbs = qs_inf->prec+1; unsigned long limbs2; unsigned long * A = poly_inf->A; unsigned long * B = poly_inf->B; unsigned long p, i; unsigned long * temp1 = (unsigned long *) flint_stack_alloc(limbs); unsigned long temp; mp_limb_t msl; double pinv; for (i = 0; i < s; i++) { p = factor_base[A_ind[i]].p; pinv = z_precompute_inverse(p); mpn_divmod_1(temp1 + 1, A + 1, A[0], p); temp1[0] = A[0] - (temp1[A[0]] == 0); A_modp[i] = (temp = mpn_mod_1(temp1 + 1, temp1[0], p)); temp = z_invert(temp, p); temp = z_mulmod_precomp(temp, qs_inf->sqrts[A_ind[i]], p, pinv); if (temp > p/2) temp = p - temp; msl = mpn_mul_1(B_terms + i*limbs + 1, temp1 + 1, temp1[0], temp); if (msl) { B_terms[i*limbs + temp1[0] + 1] = msl; B_terms[i*limbs] = temp1[0] + 1; } else B_terms[i*limbs] = temp1[0]; #if B_TERMS mpz_t temp; mpz_init(temp); fmpz_to_mpz(temp, B_terms + i*limbs); gmp_printf("B_%ld = %Zd\n", i, temp); mpz_clear(temp); #endif } F_mpn_copy(B, B_terms, B_terms[0]+1); // Set B to the sum of the B terms if (limbs > B_terms[0] + 1) F_mpn_clear(B + B_terms[0] + 1, limbs - B_terms[0] - 1); for (i = 1; i < s; i++) { limbs2 = B_terms[i*limbs]; msl = mpn_add_n(B+1, B+1, B_terms + i*limbs + 1, limbs2); if (msl) mpn_add_1(B + limbs2 + 1, B + limbs2 + 1, limbs - limbs2 - 1, msl); } B[0] = limbs - 1; while (!B[B[0]] && B[0]) B[0]--; #if B_TERMS mpz_t temp2; mpz_init(temp2); fmpz_to_mpz(temp2, B); gmp_printf("B = %Zd\n", temp2); mpz_clear(temp2); #endif flint_stack_release(); // release temp1 } /*========================================================================= Compute offsets and hypercube polynomial correction factors: Function: Compute the starting offsets in the sieve for each prime and the polynomial correction factors used by the hypercube method ==========================================================================*/ void compute_off_adj(QS_t * qs_inf, poly_t * poly_inf) { unsigned long num_primes = qs_inf->num_primes; unsigned long * A = poly_inf->A; unsigned long * B = poly_inf->B; uint32_t * A_inv = poly_inf->A_inv; uint32_t ** A_inv2B = poly_inf->A_inv2B; unsigned long * B_terms = poly_inf->B_terms; uint32_t * soln1 = poly_inf->soln1; uint32_t* soln2 = poly_inf->soln2; uint32_t * sqrts = qs_inf->sqrts; prime_t * factor_base = qs_inf->factor_base; unsigned long sieve_size = qs_inf->sieve_size; unsigned long s = poly_inf->s; unsigned long p, temp; unsigned limbs = qs_inf->prec+1; double pinv; for (unsigned long i = 2; i < num_primes; i++) // skip k and 2 { p = factor_base[i].p; pinv = factor_base[i].pinv; A_inv[i] = z_invert(mpn_mod_1(A+1, A[0], p), p); for (unsigned long j = 0; j < s; j++) { temp = mpn_mod_1(B_terms + j*limbs + 1, B_terms[j*limbs], p); temp = z_mulmod_precomp(temp, A_inv[i], p, pinv); temp *= 2; if (temp >= p) temp -= p; A_inv2B[j][i] = temp; } temp = mpn_mod_1(B+1, B[0], p); temp = sqrts[i] + p - temp; #if FLINT_BITS == 64 temp *= A_inv[i]; #else temp = z_mulmod2_precomp(temp, A_inv[i], p, pinv); #endif temp += sieve_size/2; soln1[i] = z_mod2_precomp(temp, p, pinv); // Consider using long_mod_precomp temp = p - sqrts[i]; if (temp == p) temp -= p; temp = z_mulmod_precomp(temp, A_inv[i], p, pinv); temp *= 2; if (temp >= p) temp -= p; soln2[i] = temp+soln1[i]; if (soln2[i] >= p) soln2[i] -= p; } } /*========================================================================= Compute offsets and hypercube polynomial correction factors: Function: Compute the starting offsets in the sieve for each prime and the polynomial correction factors used by the hypercube method ==========================================================================*/ void compute_A_factor_offsets(QS_t * qs_inf, poly_t * poly_inf) { unsigned long s = poly_inf->s; unsigned long * A_ind = poly_inf->A_ind; unsigned long * A_modp = poly_inf->A_modp; uint32_t * soln1 = poly_inf->soln1; uint32_t * soln2 = poly_inf->soln2; unsigned long p, D; unsigned long * n = qs_inf->n; unsigned long * B = poly_inf->B; unsigned long temp, temp2, B_modp2, index, p2; unsigned long sieve_size = qs_inf->sieve_size; prime_t * factor_base = qs_inf->factor_base; double * inv_p2 = poly_inf->inv_p2; double pinv; for (unsigned long j = 0; j < s; j++) { index = A_ind[j]; /* p = factor_base[index].p; p2 = p*p; pinv = factor_base[index].pinv; D = z_ll_mod_precomp(n[2], n[1], p*p, inv_p2[j]); if ((long) B < 0) { B_modp2 = z_mod2_precomp(-B, p2, inv_p2[j]); B_modp2 = p2 - B_modp2; if (B_modp2 == p2) B_modp2 = 0; } else B_modp2 = z_mod2_precomp(B, p2, inv_p2[j]); temp = B_modp2*A_modp[j]; temp = z_mod2_precomp(temp, p, pinv); temp2 = z_invert(temp, p); D -= (B_modp2*B_modp2); if ((long) D < 0) temp = -z_div2_precomp(-D, p, pinv); else temp = -z_div2_precomp(-D, p, pinv); temp *= temp2; temp += sieve_size/2; if ((long) temp < 0) { temp = p - z_mod2_precomp(-temp, p, pinv); if (temp == p) temp = 0; } else temp = z_mod2_precomp(temp, p, pinv); soln1[index] = temp;*/ soln2[index] = -1L; } } /*========================================================================= Compute C: Function: Compute the C coefficient of the polynomial with the current A and B values ==========================================================================*/ void compute_B_C(QS_t * qs_inf, poly_t * poly_inf) { mpz_t * A_mpz = &poly_inf->A_mpz; mpz_t * B_mpz = &poly_inf->B_mpz; mpz_t * C = &poly_inf->C; unsigned long * B = poly_inf->B; mpz_t * mpz_n = &qs_inf->mpz_n; fmpz_to_mpz(*B_mpz, B); mpz_mul(*C, *B_mpz, *B_mpz); mpz_sub(*C, *C, *mpz_n); #if TEST_C mpz_t temp; mpz_init(temp); mpz_mod(temp, *C, *A_mpz); if (mpz_cmp_ui(temp, 0) != 0) gmp_printf("B^2 - n = %Zd is not divisible by A = %Zd\n", *C, *A_mpz); mpz_clear(temp); #endif mpz_divexact(*C, *C, *A_mpz); qs_inf->sieve_fill = 128-mpz_sizeinbase(*C, 2)+qs_inf->error_bits+13;// 16, 20, 20 } flint-1.011/QS/linear_algebra.h0000644017361200017500000000544511025357253016155 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** linear_algebra.h Header file for linear_algebra.c. (C) 2006 William Hart ******************************************************************************/ #ifndef LINALG_H #define LINALG_H #include #include "common.h" #include "poly.h" #include "block_lanczos.h" #define DUPS 0 // Print info about number of duplicate relations typedef struct fac_s { unsigned long ind; unsigned long exp; } fac_t; typedef struct linalg_s { unsigned long * small; // Exponents of small primes in currently evaluated candidate fac_t * factor; // An array of factors with exponents corresponding to the current candidate unsigned long num_factors; //The length of the factor array for the current candidate la_col_t * matrix; // The final sorted F_2 matrix plus possibly some empty columns at the start unsigned long columns; // The number of columns in the matrix so far la_col_t * unmerged; // A new list of unmerged F_2 columns unsigned long num_unmerged; // The current number of unmerged F_2 relations mpz_t * Y_arr; // The Y values corresponding to all relations found unsigned long * relation; // The list of all relations found unsigned long * curr_rel; // Pointer to where we have got up to in the list of relations found unsigned long num_relations; // Total number of relations found so far la_col_t ** qsort_arr; // An array of pointers to the unmerged relations for quicksort } linalg_t; void linear_algebra_init(linalg_t * la_inf, QS_t * qs_inf, poly_t * poly_inf); void linear_algebra_clear(linalg_t * la_inf, QS_t * qs_inf); unsigned long merge_sort(linalg_t * la_inf); unsigned long merge_relations(linalg_t * la_inf); unsigned long insert_relation(linalg_t * la_inf, poly_t * poly_inf, mpz_t Y); #endif flint-1.011/QS/tinyQS.c0000644017361200017500000002555711025357253014456 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** tinyQS.c Implementation of a tiny hypercube MPQS (C) 2006 William Hart ******************************************************************************/ #include #include #include #include #include #include "../fmpz.h" #include "../long_extras.h" #include "../memory-manager.h" #include "tinyQS.h" #include "factor_base.h" #include "poly.h" #include "sieve.h" #include "linear_algebra.h" #include "block_lanczos.h" /*=========================================================================== Square Root: Function: Compute the square root of the product of all the partial relations and take it mod p ===========================================================================*/ static inline void square_root(mpz_t X, mpz_t Y, QS_t * qs_inf, linalg_t * la_inf, uint64_t * nullrows, unsigned long ncols, unsigned long l, mpz_t N) { unsigned long position; unsigned long * relation = la_inf->relation; prime_t * factor_base = qs_inf->factor_base; unsigned long * prime_count = qs_inf->prime_count; unsigned long num_primes = qs_inf->num_primes; mpz_t * Y_arr = la_inf->Y_arr; mpz_t pow; mpz_init(pow); memset(prime_count, 0, num_primes*sizeof(unsigned long)); mpz_set_ui(X, 1); mpz_set_ui(Y, 1); for (unsigned long i = 0; i < ncols; i++) { if (get_null_entry(nullrows, i, l)) { position = la_inf->matrix[i].orig*2*MAX_FACS; for (unsigned long j = 0; j < relation[position]; j++) { prime_count[relation[position+2*j+1]] += (relation[position+2*j+2]); } mpz_mul(Y, Y, Y_arr[la_inf->matrix[i].orig]); if (i % 10 == 0) mpz_mod(Y, Y, N); } } for (unsigned long i = 0; i < num_primes; i++) { if (prime_count[i]) { mpz_set_ui(pow, factor_base[i].p); mpz_powm_ui(pow, pow, prime_count[i]/2, N); mpz_mul(X, X, pow); } if (i%10 == 0) mpz_mod(X, X, N); } #if TEST for (unsigned long i = 0; i < num_primes; i++) { if ((prime_count[i] %2) != 0) printf("Error %ld, %ld, %ld\n", l, i, prime_count[i]); } #endif mpz_clear(pow); } /*=========================================================================== Collect relations: Function: Sets up batches of polynomials Do the sieving Evaluate candidates Returns: The number of relations found with this batch of polynomials ===========================================================================*/ unsigned long collect_relations(linalg_t * la_inf, QS_t * qs_inf, poly_t * poly_inf, unsigned char * sieve) { unsigned long s = poly_inf->s; unsigned long * poly_corr; unsigned long relations = 0; unsigned long ** A_inv2B = poly_inf->A_inv2B; unsigned long poly_index, j; unsigned long poly_add; compute_A(qs_inf, poly_inf); compute_B_terms(qs_inf, poly_inf); compute_off_adj(qs_inf, poly_inf); compute_A_factor_offsets(qs_inf, poly_inf); compute_C(qs_inf, poly_inf); for (poly_index = 1; poly_index < (1<<(s-1)); poly_index++) { for (j = 0; j < s; j++) { if (((poly_index >> j) & 1UL) != 0UL) break; } poly_add = (((poly_index >> j) & 2UL) != 0UL); poly_corr = A_inv2B[j]; do_sieving(qs_inf, poly_inf, sieve); relations += evaluate_sieve(la_inf, qs_inf, poly_inf, sieve); update_offsets(poly_add, poly_corr, qs_inf, poly_inf); if (poly_add) poly_inf->B += (2*poly_inf->B_terms[j]); else poly_inf->B -= (2*poly_inf->B_terms[j]); compute_C(qs_inf, poly_inf); compute_A_factor_offsets(qs_inf, poly_inf); } relations += merge_relations(la_inf); return relations; } /*=========================================================================== Main Quadratic Sieve Factoring Routine: Function: Finds the factors of a number using the quadratic sieve Assume n is odd, not a perfect power and not a prime Returns 0 if factorisation was unsuccessful Returns 1 if factorisation was successful If a small factor is found, it is returned and the QS is not run ===========================================================================*/ int F_mpz_factor_tinyQS(F_mpz_factor_t factors, mpz_t N) { unsigned long small_factor; unsigned long rels_found = 0; QS_t qs_inf; poly_t poly_inf; linalg_t la_inf; qs_inf.bits = mpz_sizeinbase(N,2); if (qs_inf.bits > MAXBITS) return 0; // Number too big for tinyQS mpz_init(qs_inf.mpz_n); qs_inf.n = (fmpz_t) flint_stack_alloc(3); qs_inf.n[2] = 0; mpz_to_fmpz(qs_inf.n, N); // set n to the number to be factored small_factor = knuth_schroeppel(&qs_inf); // Compute multiplier and some FB primes if (small_factor) goto cleanup_2; #if QS_INFO printf("Multiplier = %ld\n", qs_inf.k); #endif mpz_set(qs_inf.mpz_n, N); mpz_mul_ui(qs_inf.mpz_n, qs_inf.mpz_n, qs_inf.k); qs_inf.bits = mpz_sizeinbase(qs_inf.mpz_n, 2); if (qs_inf.bits > MAXBITS) { small_factor = 0; // Number too big for tinyQS goto cleanup_2; } mpz_to_fmpz(qs_inf.n, qs_inf.mpz_n); // set n to the number to be factored times k primes_init(&qs_inf); sqrts_init(&qs_inf); if (qs_inf.bits > MAXBITS) { small_factor = 0; // kn too big for tinyQS goto cleanup_1; } small_factor = compute_factor_base(&qs_inf); // Computes the factor base primes and modular square roots if (small_factor) goto cleanup_1; compute_sizes(&qs_inf); poly_init(&qs_inf, &poly_inf, N); linear_algebra_init(&la_inf, &qs_inf, &poly_inf); unsigned char * sieve = (unsigned char *) flint_stack_alloc_bytes(SIEVE_SIZE+1); while (rels_found < qs_inf.num_primes + EXTRA_RELS) { rels_found += collect_relations(&la_inf, &qs_inf, &poly_inf, sieve); } flint_stack_release(); // release sieve la_col_t * matrix = la_inf.matrix; unsigned long ncols = qs_inf.num_primes + EXTRA_RELS; unsigned long nrows = qs_inf.num_primes; reduce_matrix(&nrows, &ncols, matrix); // Do some filtering on the matrix uint64_t* nullrows; do { nullrows = block_lanczos(nrows, 0, ncols, matrix); // Linear algebra (block Lanczos) } while (nullrows == NULL); unsigned long i, j, mask; for (i = 0, mask = 0; i < ncols; i++) mask |= nullrows[i]; for (i = j = 0; i < 64; i++) { if (mask & ((uint64_t)(1) << i)) j++; } #if QS_INFO printf("%ld nullspace vectors found\n", j); #endif qs_inf.prime_count = (unsigned long *) flint_stack_alloc(qs_inf.num_primes); mpz_t X, Y, F, Q, R; mpz_init(X); mpz_init(Y); mpz_init(F); mpz_init(Q); mpz_init(R); mpz_set(F, N); #if PRINT_FACTORS gmp_printf("Factors of %Zd:\n", N); #endif for (unsigned long l = 0; l < 64; l++) { if (mask & ((uint64_t)(1) << l)) { square_root(X, Y, &qs_inf, &la_inf, nullrows, ncols, l, N); mpz_sub(X, X, Y); mpz_gcd(X, X, N); if ((mpz_cmp(X, N) != 0) && (mpz_cmp_ui(X, 1) != 0)) { #if PRINT_FACTORS gmp_printf("%Zd\n", X); #endif if (mpz_probab_prime_p(X, 10)) { mpz_fdiv_qr(Q, R, F, X); if (mpz_cmp_ui(R, 0) == 0) mpz_set(F, Q); } if (mpz_cmp_ui(F, 1) == 0) break; } } } small_factor = 1; // sieve was successful mpz_clear(Q); mpz_clear(R); mpz_clear(F); mpz_clear(X); mpz_clear(Y); flint_stack_release(); // release prime_count linear_algebra_clear(&la_inf, &qs_inf); poly_clear(&poly_inf); sizes_clear(); cleanup_1: sqrts_clear(); // release modular square roots primes_clear(); // release factor_base cleanup_2: flint_stack_release(); // release n mpz_clear(qs_inf.mpz_n); return small_factor; } /*=========================================================================== Main Program: Function: Factors a user specified number using the quadratic sieve ===========================================================================*/ /*int main(int argc, unsigned char *argv[]) { mpz_t N; mpz_init(N); F_mpz_factor_t factors; printf("Input number to factor [ <= 35 decimal digits ] : "); gmp_scanf("%Zd", N); getchar(); F_mpz_factor_tinyQS(factors, N); mpz_clear(N); }*/ int main(int argc, unsigned char *argv[]) { mpz_t N; mpz_init(N); F_mpz_factor_t factors; unsigned long factor; unsigned long failed = 0; unsigned long small_factors = 0; unsigned long succeed = 0; unsigned long bits1, bits2, i; for (i = 0; i < 100; i++) { mpz_set_ui(N, z_nextprime(z_randint(4000000000UL)+1UL)); mpz_mul_ui(N, N, z_nextprime(z_randint(4000000000UL)+1UL)); //bits1 = z_randint(41UL)+13UL; //bits2 = z_randint(22UL)+13UL; //mpz_mul_ui(N, N, z_nextprime(z_randint((1UL< 1) small_factors++; if (factor == 1) succeed++; } printf("TinyQS succeeded %ld times, found a small factor %ld times\n", succeed, small_factors); printf("and failed %ld times\n", failed); mpz_clear(N); } flint-1.011/mpz_poly.h0000644017361200017500000002770511025357254014560 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** mpz_poly.h: Polynomials over Z, implemented as an array of mpz_t's Copyright (C) 2007, William Hart and David Harvey ******************************************************************************/ #ifndef MPZ_POLY_H #define MPZ_POLY_H #ifdef __cplusplus extern "C" { #endif #include #include #include #include "memory-manager.h" #include "fmpz_poly.h" typedef struct { mpz_t* coeffs; unsigned long alloc; unsigned long length; } mpz_poly_struct; // mpz_poly_t allows reference-like semantics for mpz_poly_struct: typedef mpz_poly_struct mpz_poly_t[1]; // for some functions it's convenient to trick the compiler into letting us // swap input argument pointers; we use mpz_poly_p for this typedef mpz_poly_struct* mpz_poly_p; #define SWAP_MPZ_POLY_PTRS(x, y) \ do { \ mpz_poly_p zzz_ptr = (x); \ (x) = (y); \ (y) = zzz_ptr; \ } while (0); // ------------------------------------------------------ // Initialisation and memory management void mpz_poly_init(mpz_poly_t poly); void mpz_poly_clear(mpz_poly_t poly); void mpz_poly_init2(mpz_poly_t poly, unsigned long alloc); void mpz_poly_init3(mpz_poly_t poly, unsigned long alloc, unsigned long bits); void mpz_poly_realloc(mpz_poly_t poly, unsigned long alloc); // _bits_ only applies to newly allocated coefficients, not existing ones... void mpz_poly_realloc2(mpz_poly_t poly, unsigned long alloc, unsigned long bits); // this non-inlined version REQUIRES that alloc > poly->alloc void __mpz_poly_ensure_alloc(mpz_poly_t poly, unsigned long alloc); // this is arranged so that the initial comparison (very frequent) is inlined, // but the actual allocation (infrequent) is not static inline void mpz_poly_ensure_alloc(mpz_poly_t poly, unsigned long alloc) { if (alloc > poly->alloc) __mpz_poly_ensure_alloc(poly, alloc); } // ------------------------------------------------------ // Setting/retrieving coefficients static inline mpz_t* mpz_poly_coeff_ptr(mpz_poly_t poly, unsigned long n) { if (n >= poly->length) return NULL; return &poly->coeffs[n]; } static inline void mpz_poly_get_coeff(mpz_t c, mpz_poly_t poly, unsigned long n) { if (n >= poly->length) mpz_set_ui(c, 0); else mpz_set(c, poly->coeffs[n]); } static inline unsigned long mpz_poly_get_coeff_ui(mpz_poly_t poly, unsigned long n) { if (n >= poly->length) return 0; return mpz_get_ui(poly->coeffs[n]); } static inline long mpz_poly_get_coeff_si(mpz_poly_t poly, unsigned long n) { if (n >= poly->length) return 0; return mpz_get_si(poly->coeffs[n]); } void mpz_poly_set_coeff(mpz_poly_t poly, unsigned long n, mpz_t c); void mpz_poly_set_coeff_ui(mpz_poly_t poly, unsigned long n, unsigned long c); void mpz_poly_set_coeff_si(mpz_poly_t poly, unsigned long n, long c); static inline mpz_t* _mpz_poly_get_coeff_ptr(mpz_poly_t poly, unsigned long n) { return poly->coeffs + n; } static inline void _mpz_poly_get_coeff(mpz_t c, mpz_poly_t poly, unsigned long n) { mpz_set(c, poly->coeffs[n]); } static inline unsigned long _mpz_poly_get_coeff_ui(mpz_poly_t poly, unsigned long n) { return mpz_get_ui(poly->coeffs[n]); } static inline long _mpz_poly_get_coeff_si(mpz_poly_t poly, unsigned long n) { return mpz_get_si(poly->coeffs[n]); } static inline void _mpz_poly_set_coeff(mpz_poly_t poly, unsigned long n, mpz_t c) { mpz_set(poly->coeffs[n], c); } static inline void _mpz_poly_set_coeff_ui(mpz_poly_t poly, unsigned long n, unsigned long c) { mpz_set_ui(poly->coeffs[n], c); } static inline void _mpz_poly_set_coeff_si(mpz_poly_t poly, unsigned long n, long c) { mpz_set_si(poly->coeffs[n], c); } // ------------------------------------------------------ // String conversions and I/O int mpz_poly_from_string(mpz_poly_t poly, const char* s); char* mpz_poly_to_string(mpz_poly_t poly); void mpz_poly_print(mpz_poly_t poly); void mpz_poly_fprint(mpz_poly_t poly, FILE* f); int mpz_poly_read(mpz_poly_t poly); int mpz_poly_fread(mpz_poly_t poly, FILE* f); char* mpz_poly_to_string_pretty(mpz_poly_t poly, const char * x); void mpz_poly_fprint_pretty(mpz_poly_t poly, FILE* f, const char * x); void mpz_poly_print_pretty(mpz_poly_t poly, const char * x); // ------------------------------------------------------ // Length and degree void mpz_poly_normalise(mpz_poly_t poly); int mpz_poly_normalised(mpz_poly_t poly); void mpz_poly_pad(mpz_poly_t poly, unsigned long length); void mpz_poly_truncate(mpz_poly_t res, mpz_poly_t poly, unsigned long length); static inline unsigned long mpz_poly_length(mpz_poly_t poly) { return poly->length; } static inline long mpz_poly_degree(mpz_poly_t poly) { return (long) poly->length - 1; } // ------------------------------------------------------ // Assignment void mpz_poly_set(mpz_poly_t res, mpz_poly_t poly); static inline void mpz_poly_zero(mpz_poly_t poly) { poly->length = 0; } static inline void mpz_poly_swap(mpz_poly_t poly1, mpz_poly_t poly2) { mpz_t* temp1; unsigned long temp2; temp1 = poly2->coeffs; poly2->coeffs = poly1->coeffs; poly1->coeffs = temp1; temp2 = poly1->alloc; poly1->alloc = poly2->alloc; poly2->alloc = temp2; temp2 = poly1->length; poly1->length = poly2->length; poly2->length = temp2; } // ------------------------------------------------------ // Conversions void mpz_poly_to_fmpz_poly(fmpz_poly_t res, mpz_poly_t poly); void fmpz_poly_to_mpz_poly(mpz_poly_t res, const fmpz_poly_t poly); // ------------------------------------------------------ // Comparison int mpz_poly_equal(mpz_poly_t poly1, mpz_poly_t poly2); // ------------------------------------------------------ // Addition/subtraction void mpz_poly_add(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_sub(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_neg(mpz_poly_t res, mpz_poly_t poly); // ------------------------------------------------------ // Shifting void mpz_poly_lshift(mpz_poly_t res, mpz_poly_t poly, unsigned long k); void mpz_poly_rshift(mpz_poly_t res, mpz_poly_t poly, unsigned long k); static inline void mpz_poly_shift(mpz_poly_t res, mpz_poly_t poly, long k) { if (k >= 0) mpz_poly_lshift(res, poly, k); else mpz_poly_rshift(res, poly, -k); } //------------------------------------------------------- // Norms void mpz_poly_2norm(mpz_t norm, mpz_poly_t poly); // ------------------------------------------------------ // Scalar multiplication and division void mpz_poly_scalar_mul(mpz_poly_t res, mpz_poly_t poly, mpz_t c); void mpz_poly_scalar_mul_ui(mpz_poly_t res, mpz_poly_t poly, unsigned long c); void mpz_poly_scalar_mul_si(mpz_poly_t res, mpz_poly_t poly, long c); void mpz_poly_scalar_div(mpz_poly_t res, mpz_poly_t poly, mpz_t c); void mpz_poly_scalar_div_ui(mpz_poly_t res, mpz_poly_t poly, unsigned long c); void mpz_poly_scalar_div_si(mpz_poly_t res, mpz_poly_t poly, long c); void mpz_poly_scalar_div_exact(mpz_poly_t res, mpz_poly_t poly, mpz_t c); void mpz_poly_scalar_div_exact_ui(mpz_poly_t res, mpz_poly_t poly, unsigned long c); void mpz_poly_scalar_div_exact_si(mpz_poly_t res, mpz_poly_t poly, long c); void mpz_poly_scalar_mod(mpz_poly_t res, mpz_poly_t poly, mpz_t c); void mpz_poly_scalar_mod_ui(mpz_poly_t res, mpz_poly_t poly, unsigned long c); // ------------------------------------------------------ // Polynomial multiplication void mpz_poly_mul(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_mul_naive(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_mul_karatsuba(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_mul_SS(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_mul_naive_KS(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_sqr(mpz_poly_t res, mpz_poly_t poly); void mpz_poly_sqr_naive(mpz_poly_t res, mpz_poly_t poly); void mpz_poly_sqr_SS(mpz_poly_t res, mpz_poly_t poly); void mpz_poly_sqr_karatsuba(mpz_poly_t res, mpz_poly_t poly); void mpz_poly_sqr_naive_KS(mpz_poly_t res, mpz_poly_t poly); // exported for profiling... unsigned long _mpz_poly_mul_karatsuba_crossover(unsigned long limbs); // ------------------------------------------------------ // Polynomial division void mpz_poly_monic_inverse(mpz_poly_t res, mpz_poly_t poly, unsigned long k); void mpz_poly_pseudo_inverse(mpz_poly_t res, mpz_poly_t poly, unsigned long k); void mpz_poly_monic_div(mpz_poly_t quot, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_pseudo_div(mpz_poly_t quot, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_monic_rem(mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_pseudo_rem(mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_monic_div_rem(mpz_poly_t quot, mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_pseudo_div_rem(mpz_poly_t quot, mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_monic_inverse_naive(mpz_poly_t res, mpz_poly_t poly, unsigned long k); void mpz_poly_pseudo_inverse_naive(mpz_poly_t res, mpz_poly_t poly, unsigned long k); void mpz_poly_monic_div_naive(mpz_poly_t quot, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_pseudo_div_naive(mpz_poly_t quot, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_monic_rem_naive(mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_pseudo_rem_naive(mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_monic_div_rem_naive(mpz_poly_t quot, mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_pseudo_div_rem_naive(mpz_poly_t quot, mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2); // ------------------------------------------------------ // GCD and extended GCD void mpz_poly_content(mpz_t x, mpz_poly_t poly); unsigned long mpz_poly_content_ui(mpz_poly_t poly); void mpz_poly_gcd(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2); void mpz_poly_xgcd(mpz_poly_t res, mpz_poly_t a, mpz_poly_t b, mpz_poly_t poly1, mpz_poly_t poly2); // ------------------------------------------------------ // Miscellaneous unsigned long mpz_poly_max_limbs(mpz_poly_t poly); unsigned long mpz_poly_max_bits(mpz_poly_t poly); unsigned long mpz_poly_product_max_limbs(mpz_poly_t poly1, mpz_poly_t poly2); unsigned long mpz_poly_product_max_bits(mpz_poly_t poly1, mpz_poly_t poly2); // ------------------------------------------------------ // Exported for testing only void _mpz_poly_mul_kara_recursive(mpz_t* out, mpz_t* in1, unsigned long len1, mpz_t* in2, unsigned long len2, mpz_t* scratch, unsigned long skip, unsigned long crossover); // *************** end of file #ifdef __cplusplus } #endif #endif flint-1.011/profiler-main.h0000644017361200017500000000567611025357254015456 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** Command-line profiling utility (C) 2007 William Hart and David Harvey ******************************************************************************/ #ifdef __cplusplus extern "C" { #endif #include "profiler.h" // A function that takes one/two coordinates, an implementation-defined // argument, and an iteration count // (i.e. the type of function that is getting profiled). typedef void (*prof1d_Sampler_t)(unsigned long x, void* arg, unsigned long count); typedef void (*prof2d_Sampler_t)(unsigned long x, unsigned long y, void* arg, unsigned long count); // A function that runs a bunch of profiles typedef void (*prof_Driver_t)(char* params); // A function that returns a string (the description of the target) typedef char* (*prof_DriverString_t)(char* params); // A function that returns a string (the default parameters for this target) typedef char* (*prof_DriverDefaultParams_t)(); void prof2d_set_sampler(prof2d_Sampler_t sampler); void prof2d_sample(unsigned long x, unsigned long y, void* arg); void prof1d_set_sampler(prof1d_Sampler_t sampler); void prof1d_sample(unsigned long x, void* arg); void prof_start(); void prof_stop(); /* Generates count random unsigned limbs, stores them at output */ void profiler_random_limbs(unsigned long* output, unsigned long count); /* ============================================================================ Imported data from the auto-generated table file (See make-profile-tables.py.) =============================================================================*/ // name of module being profiled. extern char* prof_module_name; extern int prof_target_count; extern char* prof_target_name[]; extern prof_Driver_t prof_Driver_list[]; extern prof_DriverString_t prof_DriverString_list[]; extern prof_DriverDefaultParams_t prof_DriverDefaultParams_list[]; #ifdef __cplusplus } #endif // end of file **************************************************************** flint-1.011/mpn_extras.c0000644017361200017500000010665111025357254015060 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** mpn_extras.c Extra functions for manipulating mpn's and limbs. Copyright (C) 2006, William Hart mp_limb_t mpn_divmod_1_preinv was adapted from GMP, (C) Free Software Foundation ******************************************************************************/ #include #include #include #include #include "flint.h" #include "longlong_wrapper.h" #include "longlong.h" #include "long_extras.h" #include "memory-manager.h" #include "mpn_extras.h" #include "ZmodF_poly.h" #include "ZmodF_mul.h" #include "F_mpn_mul-tuning.h" #define DEBUG2 1 /*======================================================================================= Performs division by a limb d and places the quotient in qp and returns the remainder. Requires a single limb approximation to 1/d as input. If the most significant bit of d is not 1 it expects d to be shifted left (by norm bits) until the most significant bit is 1 before the inverse is computed. However the original d should be supplied to the function, not the shifted d. This code has been adapted from code found in the GMP package version 4.2.1 (divrem_1.c) (C) Free Software Foundation */ mp_limb_t F_mpn_divrem_ui_precomp(mp_limb_t * qp, mp_limb_t * up, unsigned long un, mp_limb_t d, mp_limb_t dinv) { mp_size_t n; mp_size_t i; mp_limb_t n1, n0; mp_limb_t r = 0; unsigned long norm; n = un; if (n == 0) return 0; count_lead_zeros(norm, d); qp += (n - 1); /* Make qp point at most significant quotient limb */ if ((d & (1L<<(FLINT_BITS-1))) != 0) { if (un != 0) { /* High quotient limb is 0 or 1, skip a divide step. */ mp_limb_t q; r = up[un - 1]; q = (r >= d); *qp-- = q; r -= (d & -q); n--; un--; } /* Multiply-by-inverse, divisor already normalized. */ for (i = un - 1; i >= 0; i--) { n0 = up[i]; udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv); qp--; } return r; } else { /* Most significant bit of divisor == 0. */ /* Skip a division if high < divisor (high quotient 0). Testing here before normalizing will still skip as often as possible. */ if (un != 0) { n1 = up[un - 1]; if (n1 < d) { r = n1; *qp-- = 0; n--; if (n == 0) return r; un--; } } d <<= norm; r <<= norm; if (un != 0) { n1 = up[un - 1]; r |= (n1 >> (FLINT_BITS - norm)); for (i = un - 2; i >= 0; i--) { n0 = up[i]; udiv_qrnnd_preinv (*qp, r, r, ((n1 << norm) | (n0 >> (FLINT_BITS - norm))), d, dinv); qp--; n1 = n0; } udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv); qp--; } return r >> norm; } } mp_limb_t F_mpn_addmul(mp_limb_t * rp, mp_limb_t * s1p, unsigned long s1n, mp_limb_t * s2p, unsigned long s2n) { if (s2n == 0) return 0; mp_limb_t carry; carry = mpn_addmul_1(rp, s1p, s1n, s2p[0]); for (unsigned long i = 1; i < s2n; i++) { carry = mpn_add_1(rp+i+s1n-1, rp+i+s1n-1, 1, carry); if (s2p[i]) carry += mpn_addmul_1(rp+i, s1p, s1n, s2p[i]); } carry = mpn_add_1(rp+s2n+s1n-1, rp+s2n+s1n-1, 1, carry); return carry; } /*===================================================================================== Fast Integer Multiplication Code =====================================================================================*/ unsigned long MUL_TWK_VALS[MUL_TWK_COUNT][3] = { {2000, 2140, 1024}, {2140, 2430, 64}, {2430, 2580, 1024}, {2580, 2700, 64}, {2700, 2880, 4096}, {2880, 3850, 16}, {3850, 4220, 4}, {4220, 4400, 1024}, {4400, 4850, 16}, {4850, 5700, 1024}, {5700, 7900, 4}, {7900, 8900, 1024}, {8900, 97000, 4}, {97000, 127000, 1}, {127000, 262000, 4}, {262000, 517000, 1}, {517000, 1050000, 4}, {1050000, 2060000, 1}, {2060000, 4230000, 4}, {4230000, 8350000, 1} }; unsigned long SQR_TWK_VALS[SQR_TWK_COUNT][3] = { {1564, 1994, 16}, {1994, 2952, 64}, {2952, 5921, 16}, {5921, 32575, 4}, {32575, 40006, 16}, {40006, 66526, 4}, {66526, 127370, 1}, {127370, 257473, 4}, {257473, 520507, 1}, {520507, 1050000, 4}, {1050000, 2060000, 1}, {2060000, 4230000, 4}, {4230000, 8350000, 1} }; unsigned long FFT_MUL_TWK[FFT_MUL_COUNT][2] = { {1695, 7}, {1730, 8}, {1790, 7}, {1820, 8}, {1880, 7}, {1920, 8}, {3070, 9}, {3200, 8}, {3450, 9}, {3710, 8}, {3840, 9}, {4080, 8}, {4220, 9}, {4480, 8}, {4600, 9}, {4870, 8}, {4980, 9}, {5230, 8}, {5380, 9}, {6140, 10}, {7150, 9}, {7700, 10}, {8700, 9}, {9200, 10}, {12300, 11}, {34800, 10}, {36900, 12}, {41000, 11}, {49200, 12}, {99000, 13}, {130000, 12}, {198000, 13}, {396000, 14}, {526000, 13}, {1040000, 14}, {3150000, 15}, {6300000, 16}, {8400000, 15}, {12700000, 17}, {15900000, 16} }; unsigned long FFT_SQR_TWK[FFT_SQR_COUNT][2] = { {1300, 8}, {2700, 9}, {2950, 8}, {3080, 9}, {4100, 8}, {4240, 9}, {5100, 10}, {5630, 9}, {6150, 10}, {8700, 9}, {9200, 10}, {12300, 11}, {24700, 12}, {29000, 11}, {35000, 10}, {36900, 11}, {49000, 12}, {99000, 13}, {130000, 12}, {198000, 13}, {396000, 14}, {660000, 13}, {780000, 14}, {170000, 15}, {210000, 14}, {420000, 15}, {880000, 16}, {1060000, 15}, {1260000, 17}, {1680000, 16} }; /* Splits an mpn into segments of length coeff_limbs and stores in a ZmodF_poly in zero padded coefficients of length output_limbs, for use in FFT convolution code. Assumes that the input is total_limbs in length. Used by the large integer multiplication code (F_mpn_mul and F_mpn_mul_precomp and F_mpn_mul_trunc) */ void F_mpn_FFT_split(ZmodF_poly_t poly, mp_limb_t * limbs, unsigned long total_limbs, unsigned long coeff_limbs, unsigned long output_limbs) { unsigned long length = (total_limbs-1)/coeff_limbs + 1; unsigned long i, j, skip; for (skip = 0, i = 0; skip+coeff_limbs <= total_limbs; skip+=coeff_limbs, i++) { for (j = 0; j < output_limbs; j += 8) FLINT_PREFETCH(poly->coeffs[i+1], j); F_mpn_clear(poly->coeffs[i], output_limbs+1); // convert a coefficient F_mpn_copy(poly->coeffs[i], limbs+skip, coeff_limbs); } if (i < length) F_mpn_clear(poly->coeffs[i], output_limbs+1); if (total_limbs > skip) F_mpn_copy(poly->coeffs[i], limbs+skip, total_limbs-skip); poly->length = length; } /* Splits an mpn into segments of length _bits_ and stores in a ZmodF_poly in zero padded coefficients of length output_limbs, for use in FFT convolution code. Assumes that the input is total_limbs in length. Used by the large integer multiplication code (F_mpn_mul) It is assumed that bits is not divisible by FLINT_BITS */ void F_mpn_FFT_split_bits(ZmodF_poly_t poly, mp_limb_t * limbs, unsigned long total_limbs, unsigned long bits, unsigned long output_limbs) { unsigned long length = (FLINT_BITS*total_limbs-1)/bits + 1; unsigned long i, j; unsigned long top_bits = ((FLINT_BITS-1)&bits); if (top_bits == 0) { F_mpn_FFT_split(poly, limbs, total_limbs, bits >> FLINT_LG_BITS_PER_LIMB, output_limbs); return; } unsigned long coeff_limbs = (bits>>FLINT_LG_BITS_PER_LIMB) + 1; unsigned long mask = (1L<coeffs[i+1], j); F_mpn_clear(poly->coeffs[i], output_limbs+1); // convert a coefficient if (!shift_bits) { F_mpn_copy(poly->coeffs[i], limb_ptr, coeff_limbs); poly->coeffs[i][coeff_limbs-1] &= mask; limb_ptr += (coeff_limbs-1); shift_bits += top_bits; } else { mpn_rshift(poly->coeffs[i], limb_ptr, coeff_limbs, shift_bits); limb_ptr += (coeff_limbs-1); shift_bits += top_bits; if (shift_bits >= FLINT_BITS) { limb_ptr++; poly->coeffs[i][coeff_limbs-1] += (limb_ptr[0] << (FLINT_BITS - (shift_bits - top_bits))); shift_bits -= FLINT_BITS; } poly->coeffs[i][coeff_limbs-1] &= mask; } } F_mpn_clear(poly->coeffs[i], output_limbs+1); unsigned long limbs_left = total_limbs - (limb_ptr - limbs); if (!shift_bits) { F_mpn_copy(poly->coeffs[i], limb_ptr, limbs_left); } else { mpn_rshift(poly->coeffs[i], limb_ptr, limbs_left, shift_bits); } poly->length = length; } /* Recombines coefficients of a ZmodF_poly after doing a convolution. Assumes each of the coefficients of the ZmodF_poly is output_limbs long, that each of the coefficients is being shifted by a multiple of coeff_limbs and added to an mpn which is total_limbs long. It is assumed that the mpn has been zeroed in advance. Used by the large integer multiplication code (F_mpn_mul and F_mpn_mul_precomp and F_mpn_mul_trunc) */ void F_mpn_FFT_combine(mp_limb_t * res, ZmodF_poly_t poly, unsigned long coeff_limbs, unsigned long output_limbs, unsigned long total_limbs) { unsigned long skip, i, j; unsigned long length = poly->length; for (skip = 0, i = 0; (i < length) && (skip+output_limbs <= total_limbs); i++, skip+=coeff_limbs) { for (j = 0; j < output_limbs; j += 8) FLINT_PREFETCH(poly->coeffs[i+1], j); mpn_add(res+skip, res+skip, output_limbs+1, poly->coeffs[i], output_limbs); } while ((skip < total_limbs) && (i < length)) { mpn_add(res+skip, res+skip, total_limbs - skip, poly->coeffs[i], FLINT_MIN(total_limbs - skip, output_limbs)); i++; skip+=coeff_limbs; } } /* Recombines coefficients of a ZmodF_poly after doing a convolution. Assumes each of the coefficients of the ZmodF_poly is output_limbs long, that each of the coefficients is being shifted by a multiple of _bits_ and added to an mpn which is total_limbs long. It is assumed that the mpn has been zeroed in advance. Used by the large integer multiplication code (F_mpn_mul) It is assumed that bits is not divisible by FLINT_BITS */ void F_mpn_FFT_combine_bits(mp_limb_t * res, ZmodF_poly_t poly, unsigned long bits, unsigned long output_limbs, unsigned long total_limbs) { unsigned long top_bits = ((FLINT_BITS-1)&bits); if (top_bits == 0) { F_mpn_FFT_combine(res, poly, bits >> FLINT_LG_BITS_PER_LIMB, output_limbs, total_limbs); return; } unsigned long coeff_limbs = (bits>>FLINT_LG_BITS_PER_LIMB) + 1; unsigned long i, j; unsigned long length = poly->length; unsigned long * temp = (unsigned long *) flint_stack_alloc(output_limbs+1); unsigned long shift_bits = 0; unsigned long * limb_ptr = res; unsigned long * end = res + total_limbs; for (i = 0; (i < length) && (limb_ptr + output_limbs < end); i++) { for (j = 0; j < output_limbs; j += 8) FLINT_PREFETCH(poly->coeffs[i+1], j); if (shift_bits) { mpn_lshift(temp, poly->coeffs[i], output_limbs+1, shift_bits); mpn_add_n(limb_ptr, limb_ptr, temp, output_limbs+1); } else { mpn_add(limb_ptr, limb_ptr, output_limbs+1, poly->coeffs[i], output_limbs); } shift_bits += top_bits; limb_ptr += (coeff_limbs - 1); if (shift_bits >= FLINT_BITS) { limb_ptr++; shift_bits -= FLINT_BITS; } } while ((limb_ptr < end) && (i < length)) { if (shift_bits) { mpn_lshift(temp, poly->coeffs[i], output_limbs+1, shift_bits); mpn_add_n(limb_ptr, limb_ptr, temp, end - limb_ptr); } else { mpn_add_n(limb_ptr, limb_ptr, poly->coeffs[i], end - limb_ptr); } shift_bits += top_bits; limb_ptr += (coeff_limbs - 1); if (shift_bits >= FLINT_BITS) { limb_ptr++; shift_bits -= FLINT_BITS; } i++; } flint_stack_release(); } /*void F_mpn_mul_tuning(unsigned long * length1, unsigned long * length2, unsigned long * output_bits, unsigned long * coeff_limbs, unsigned long * log_length, unsigned long limbs1, unsigned long limbs2, unsigned long twk) { unsigned long length = 1; unsigned long log_length = 0; unsigned long coeff_limbs = limbs1 + limbs2; unsigned long s1 = (FLINT_BIT_COUNT(data1[limbs1-1]) + FLINT_BIT_COUNT(data2[limbs2-1]) <= FLINT_BITS); unsigned long total_limbs = coeff_limbs - s1; unsigned long output_bits = coeff_limbs*FLINT_BITS; unsigned long n = coeff_limbs; unsigned long length1 = 1; unsigned long length2 = 1; unsigned log_length2 = 0; //============================================================================== printf("%ld, %ld, %ld, %ld, %ld\n", length1, length2, output_bits, coeff_limbs, log_length); if (twk > 64) { length = 2; log_length = 1; while ((1<<(log_length-1)) < output_bits) { length<<=1; log_length++; coeff_limbs = (limbs1+limbs2-1)/length+1; while ((limbs1-1)/coeff_limbs+(limbs2-1)/coeff_limbs+2 > length) coeff_limbs++; output_bits = (2*coeff_limbs+1)*FLINT_BITS; output_bits = (((output_bits - 1) >> (log_length-1)) + 1) << (log_length-1); coeff_limbs = ((output_bits - FLINT_BITS)/FLINT_BITS)/2; if ((long) coeff_limbs < 1) coeff_limbs = 1; length1 = (limbs1-1)/coeff_limbs+1; length2 = (limbs2-1)/coeff_limbs+1; } while (twk > 64) { log_length--; length>>=1; twk>>=2; } if (length == 0) { length = 2; log_length = 1; } coeff_limbs = (limbs1+limbs2-1)/length+1; while ((limbs1-1)/coeff_limbs+(limbs2-1)/coeff_limbs+2 > length) coeff_limbs++; output_bits = (2*coeff_limbs+1)*FLINT_BITS; output_bits = (((output_bits - 1) >> (log_length-1)) + 1) << (log_length-1); while ((output_bits%3) != 0) output_bits+=(1<<(log_length-1)); coeff_limbs = ((output_bits - FLINT_BITS)/FLINT_BITS)/2; if ((long) coeff_limbs < 1) coeff_limbs = 1; length1 = (limbs1-1)/coeff_limbs+1; length2 = (limbs2-1)/coeff_limbs+1; log_length = 1; while ((1< length) coeff_limbs++; output_bits = (2*coeff_limbs+1)*FLINT_BITS; output_bits = (((output_bits - 1) >> (log_length-1)) + 1) << (log_length-1); coeff_limbs = ((output_bits - FLINT_BITS)/FLINT_BITS)/2; if ((long) coeff_limbs < 1) coeff_limbs = 1; length1 = (limbs1-1)/coeff_limbs+1; length2 = (limbs2-1)/coeff_limbs+1; } } }*/ /* Compute optimal lengths for the polynomials that coeff1 and coeff2 are broken into in the convolution based long integer code. We want the sum of the two lengths to satisfy the SS condition (with sqrt2): if 2^l1 < length1 + length2 <= 2^l2 then 2^(l2-1) divides output_bits Requires limbs1 and limbs2 are at least 1, ensures length1 and length2 are at least 1 */ #define F_mpn_mul_ADJUST \ do { \ /* Compute the coefficient size for breaking the two long integers up */ \ coeff_limbs = (limbs1+limbs2-1)/(length)+1; \ if (coeff_limbs == 1L) /* This is as far as we can go */ \ \ { \ length1 = limbs1; \ length2 = limbs2; \ done = 1; \ } \ while ((limbs1-1)/(coeff_limbs)+(limbs2-1)/(coeff_limbs)+2 > length) coeff_limbs++; \ /* Compute the number of bits for the output coefficients */ \ output_bits = (2*coeff_limbs+1)*FLINT_BITS; \ output_bits = (((output_bits - 1) >> (log_length-1)) + 1) << (log_length-1); \ /* Try and compute a more optimal coefficient size to break up inputs */ \ coeff_limbs = ((output_bits - FLINT_BITS)/FLINT_BITS)/2; \ if ((long) coeff_limbs <= 1L) coeff_limbs = 1; \ /* Compute the lengths of the polys the coefficients will be broken into with this coeff size */ \ length1 = (limbs1-1)/coeff_limbs+1; \ length2 = (limbs2-1)/coeff_limbs+1; \ } while (0) #define F_mpn_mul_TUNING \ do { \ if (twk > 64) \ { \ length = 2; \ log_length = 1; \ \ int done = 0; \ \ while ((length < 2*output_bits) && !done) \ { \ /* We are outside the optimal SS region, so double the length */ \ length<<=1; \ log_length++; \ F_mpn_mul_ADJUST; \ } \ \ while ((twk > 64) && (length >= 4)) \ { \ log_length--; \ length>>=1; \ twk>>=2; \ } \ \ F_mpn_mul_ADJUST; \ \ } else \ { \ int done = 0; \ \ while ((twk*length < 2*output_bits) && !done) \ { \ /* We are outside the optimal SS region, so double the length */ \ length<<=1; \ log_length++; \ F_mpn_mul_ADJUST; \ } \ } \ } while (0) mp_limb_t __F_mpn_mul(mp_limb_t * res, mp_limb_t * data1, unsigned long limbs1, mp_limb_t * data2, unsigned long limbs2, unsigned long log_length) { unsigned long length = 1; unsigned long coeff_limbs = limbs1 + limbs2; unsigned long s1 = (FLINT_BIT_COUNT(data1[limbs1-1]) + FLINT_BIT_COUNT(data2[limbs2-1]) <= FLINT_BITS); unsigned long total_limbs = coeff_limbs - s1; unsigned long output_bits = coeff_limbs*FLINT_BITS; unsigned long n = coeff_limbs; unsigned long length1 = 1; unsigned long length2 = 1; unsigned log_length2 = 1; unsigned long bits; do { bits = (((limbs1 << FLINT_LG_BITS_PER_LIMB)-1) >> (log_length-1)) + 1; output_bits = 2*bits + log_length2; output_bits = (((output_bits - 1) >> (log_length-1)) + 1) << (log_length-1); bits = (output_bits - log_length2)/2; length1 = ((limbs1 << FLINT_LG_BITS_PER_LIMB)-1)/bits + 1; length2 = ((limbs2 << FLINT_LG_BITS_PER_LIMB)-1)/bits + 1; log_length2++; } while ((length2 > (1L<<(log_length2-1))) || (length1 > (1L<<(log_length-1)))); n = (output_bits-1)/FLINT_BITS+1; #if DEBUG printf("%ld, %ld, %ld, %ld, %ld, %ld, %ld\n", bits, length1, length2, output_bits, coeff_limbs, n, log_length); #endif ZmodF_poly_t poly1; ZmodF_poly_stack_init(poly1, log_length, n, 1); F_mpn_FFT_split_bits(poly1, data1, limbs1, bits, n); if ((data1 == data2) && (limbs1 == limbs2)) { // identical operands case ZmodF_poly_convolution(poly1, poly1, poly1); } else { // distinct operands case ZmodF_poly_t poly2; ZmodF_poly_stack_init(poly2, log_length, n, 1); F_mpn_FFT_split_bits(poly2, data2, limbs2, bits, n); ZmodF_poly_convolution(poly1, poly1, poly2); ZmodF_poly_stack_clear(poly2); } ZmodF_poly_normalise(poly1); F_mpn_clear(res, limbs1+limbs2); F_mpn_FFT_combine_bits(res, poly1, bits, n, total_limbs); ZmodF_poly_stack_clear(poly1); return res[limbs1+limbs2-1]; } mp_limb_t __F_mpn_mul_trunc(mp_limb_t * res, mp_limb_t * data1, unsigned long limbs1, mp_limb_t * data2, unsigned long limbs2, unsigned long log_length, unsigned long trunc) { unsigned long length = 1; unsigned long coeff_limbs = limbs1 + limbs2; unsigned long s1 = (FLINT_BIT_COUNT(data1[limbs1-1]) + FLINT_BIT_COUNT(data2[limbs2-1]) <= FLINT_BITS); unsigned long total_limbs = coeff_limbs - s1; unsigned long output_bits = coeff_limbs*FLINT_BITS; unsigned long n = coeff_limbs; unsigned long length1 = 1; unsigned long length2 = 1; unsigned log_length2 = 1; unsigned long bits; do { bits = (((limbs1 << FLINT_LG_BITS_PER_LIMB)-1) >> (log_length-1)) + 1; output_bits = 2*bits + log_length2; output_bits = (((output_bits - 1) >> (log_length-1)) + 1) << (log_length-1); bits = (output_bits - log_length2)/2; length1 = ((limbs1 << FLINT_LG_BITS_PER_LIMB)-1)/bits + 1; length2 = ((limbs2 << FLINT_LG_BITS_PER_LIMB)-1)/bits + 1; log_length2++; } while ((length2 > (1L<<(log_length2-1))) || (length1 > (1L<<(log_length-1)))); n = (output_bits-1)/FLINT_BITS+1; #if DEBUG printf("%ld, %ld, %ld, %ld, %ld, %ld\n", bits, length1, length2, output_bits, coeff_limbs, n); #endif ZmodF_poly_t poly1; ZmodF_poly_stack_init(poly1, log_length, n, 1); F_mpn_FFT_split_bits(poly1, data1, limbs1, bits, n); if ((data1 == data2) && (limbs1 == limbs2)) { // identical operands case ZmodF_poly_convolution_range(poly1, poly1, poly1, 0, (trunc*FLINT_BITS-1)/bits+1); } else { // distinct operands case ZmodF_poly_t poly2; ZmodF_poly_stack_init(poly2, log_length, n, 1); F_mpn_FFT_split_bits(poly2, data2, limbs2, bits, n); ZmodF_poly_convolution_range(poly1, poly1, poly2, 0, (trunc*FLINT_BITS-1)/bits+1); ZmodF_poly_stack_clear(poly2); } poly1->length = FLINT_MIN(poly1->length, (trunc*FLINT_BITS-1)/bits+1); ZmodF_poly_normalise(poly1); F_mpn_clear(res, trunc); F_mpn_FFT_combine_bits(res, poly1, bits, n, trunc); ZmodF_poly_stack_clear(poly1); return res[trunc-1]; } /* Multiply two integers in mpn format WARNING: This function requires limbs1+limbs2 output limbs when limbs1+limbs2 < FLINT_FFT_LIMBS_CROSSOVER but may require one less limb otherwise. The function will return 0 if it did not require (and indeed did not zero) the extra limb, otherwise it returns the (non zero) value of this high limb after multiplication. Assumes neither of limbs1, limbs2 is zero. */ mp_limb_t F_mpn_mul(mp_limb_t * res, mp_limb_t * data1, unsigned long limbs1, mp_limb_t * data2, unsigned long limbs2) { unsigned long coeff_limbs = limbs1 + limbs2; unsigned long twk; if (coeff_limbs/2 > FFT_TUNE_CUTOFF) { twk = 0; while ((1L<<(2*twk)) < FLINT_BITS*coeff_limbs) { twk++; } } else if ((data1 != data2) || (limbs1 != limbs2)) { if (coeff_limbs/2 < FFT_MUL_TWK[0][0]) return mpn_mul(res, data1, limbs1, data2, limbs2); else { unsigned long i = 0; while ((i < FFT_MUL_COUNT-1) && (coeff_limbs/2 > FFT_MUL_TWK[i+1][0])) i++; twk = FFT_MUL_TWK[i][1]; } } else { if (coeff_limbs/2 < FFT_SQR_TWK[0][0]) return mpn_mul(res, data1, limbs1, data1, limbs1); else { unsigned long i = 0; while ((i < FFT_SQR_COUNT-1) && (coeff_limbs/2 > FFT_SQR_TWK[i+1][0])) i++; twk = FFT_SQR_TWK[i][1]; } } return __F_mpn_mul(res, data1, limbs1, data2, limbs2, twk); } /* Multiply two integers in mpn format truncating to _trunc_ output limbs Assumes none of limbs1, limbs2 and trunc is zero. */ mp_limb_t F_mpn_mul_trunc(mp_limb_t * res, mp_limb_t * data1, unsigned long limbs1, mp_limb_t * data2, unsigned long limbs2, unsigned long trunc) { unsigned long coeff_limbs = limbs1 + limbs2; if (trunc > coeff_limbs) trunc = coeff_limbs; unsigned long twk; if (coeff_limbs/2 > FFT_TUNE_CUTOFF) { twk = 0; while ((1L<<(2*twk)) < FLINT_BITS*coeff_limbs) { twk++; } } else if ((data1 != data2) || (limbs1 != limbs2)) { if (coeff_limbs/2 < FFT_MUL_TWK[0][0]) { mpn_mul(res, data1, limbs1, data2, limbs2); return res[trunc-1]; } else { unsigned long i = 0; while ((i < FFT_MUL_COUNT-1) && (coeff_limbs/2 > FFT_MUL_TWK[i+1][0])) i++; twk = FFT_MUL_TWK[i][1]; } } else { if (coeff_limbs/2 < FFT_SQR_TWK[0][0]) { mpn_mul(res, data1, limbs1, data1, limbs1); return res[trunc-1]; } else { unsigned long i = 0; while ((i < FFT_SQR_COUNT-1) && (coeff_limbs/2 > FFT_SQR_TWK[i+1][0])) i++; twk = FFT_SQR_TWK[i][1]; } } return __F_mpn_mul_trunc(res, data1, limbs1, data2, limbs2, twk, trunc); } /* Precompute an FFT for integer multiplication. Assumes neither of limbs1, limbs2 is zero. */ void F_mpn_mul_precomp_init(F_mpn_precomp_t precomp, mp_limb_t * data1, unsigned long limbs1, unsigned long limbs2) { if (limbs1 == 0) { precomp->poly = NULL; return; } int swapped = 0; if (limbs2 > limbs1) { unsigned long temp = limbs1; limbs1 = limbs2; limbs2 = temp; swapped = 1; } unsigned long coeff_limbs = limbs1 + limbs2; unsigned long log_length; if (coeff_limbs/2 > FFT_TUNE_CUTOFF) { log_length = 0; while ((1L<<(2*log_length)) < FLINT_BITS*coeff_limbs) { log_length++; } } else { unsigned long i = 0; while ((i < FFT_SQR_COUNT-1) && (coeff_limbs/2 > FFT_SQR_TWK[i+1][0])) i++; log_length = FFT_SQR_TWK[i][1]; } unsigned long length = 1; unsigned long total_limbs = coeff_limbs; unsigned long output_bits = coeff_limbs*FLINT_BITS; unsigned long n = coeff_limbs; unsigned long length1 = 1; unsigned long length2 = 1; unsigned log_length2 = 1; unsigned long bits; do { bits = (((limbs1 << FLINT_LG_BITS_PER_LIMB)-1) >> (log_length-1)) + 1; output_bits = 2*bits + log_length2; output_bits = (((output_bits - 1) >> (log_length-1)) + 1) << (log_length-1); bits = (output_bits - log_length2)/2; length1 = ((limbs1 << FLINT_LG_BITS_PER_LIMB)-1)/bits + 1; length2 = ((limbs2 << FLINT_LG_BITS_PER_LIMB)-1)/bits + 1; log_length2++; } while ((length2 > (1L<<(log_length2-1))) || (length1 > (1L<<(log_length-1)))); n = (output_bits-1)/FLINT_BITS+1; if (swapped) { unsigned long temp = limbs1; limbs1 = limbs2; limbs2 = temp; temp = length1; length1 = length2; length2 = temp; } #if DEBUG printf("%ld, %ld, %ld, %ld, %ld\n", length1, length2, output_bits, coeff_limbs, log_length); #endif ZmodF_poly_p poly1; poly1 = (ZmodF_poly_p) malloc(sizeof(ZmodF_poly_struct)); ZmodF_poly_init(poly1, log_length, n, 1); F_mpn_FFT_split_bits(poly1, data1, limbs1, bits, n); unsigned long size = (1L<depth); ZmodF_poly_FFT(poly1, size); precomp->type = FFT_PRE; precomp->bits = bits; precomp->length = length1; precomp->length2 = length2; precomp->coeff_limbs = coeff_limbs; precomp->limbs1 = limbs1; precomp->limbs2 = limbs2; precomp->poly = poly1; precomp->msl_bits = FLINT_BIT_COUNT(data1[limbs1-1]); } void F_mpn_mul_precomp_clear(F_mpn_precomp_t precomp) { if (precomp->type == FFT_PRE) { if (precomp->poly) { ZmodF_poly_clear(precomp->poly); free(precomp->poly); } } } /* Compute an integer multiplication given a precomputed FFT for one of the integers. Assumes neither of limbs1, limbs2 is zero. */ mp_limb_t F_mpn_mul_precomp(mp_limb_t * res, mp_limb_t * data2, unsigned long limbs2, F_mpn_precomp_t precomp) { ZmodF_poly_t poly2; ZmodF_poly_stack_init(poly2, precomp->poly->depth, precomp->poly->n, 1); int s1 = (FLINT_BIT_COUNT(data2[limbs2-1]) + precomp->msl_bits <= FLINT_BITS); F_mpn_FFT_split_bits(poly2, data2, limbs2, precomp->bits, precomp->poly->n); ZmodF_poly_FFT(poly2, precomp->length+poly2->length-1); ZmodF_poly_pointwise_mul(poly2, poly2, precomp->poly); ZmodF_poly_IFFT(poly2); ZmodF_poly_rescale(poly2); ZmodF_poly_normalise(poly2); F_mpn_clear(res, precomp->limbs1 + limbs2 - s1); F_mpn_FFT_combine_bits(res, poly2, precomp->bits, precomp->poly->n, precomp->limbs1 + limbs2 - s1); ZmodF_poly_stack_clear(poly2); if (s1) return 0; else return res[precomp->limbs1+limbs2-1]; } mp_limb_t F_mpn_mul_precomp_trunc(mp_limb_t * res, mp_limb_t * data2, unsigned long limbs2, F_mpn_precomp_t precomp, unsigned long trunc) { if (trunc == 0) return 0; ZmodF_poly_t poly2; ZmodF_poly_stack_init(poly2, precomp->poly->depth, precomp->poly->n, 1); int s1 = (FLINT_BIT_COUNT(data2[limbs2-1]) + precomp->msl_bits <= FLINT_BITS); if (trunc > precomp->limbs1+limbs2 - s1) trunc = precomp->limbs1+limbs2 - s1; F_mpn_FFT_split_bits(poly2, data2, limbs2, precomp->bits, precomp->poly->n); ZmodF_poly_FFT(poly2, precomp->length+poly2->length-1); ZmodF_poly_pointwise_mul(poly2, poly2, precomp->poly); ZmodF_poly_IFFT(poly2); ZmodF_poly_rescale_range(poly2, 0, (trunc*FLINT_BITS-1)/precomp->bits+1); poly2->length = FLINT_MIN(poly2->length,(trunc*FLINT_BITS-1)/precomp->bits+1); ZmodF_poly_normalise(poly2); F_mpn_clear(res, precomp->limbs1 + limbs2); F_mpn_FFT_combine_bits(res, poly2, precomp->bits, precomp->poly->n, trunc); ZmodF_poly_stack_clear(poly2); return res[trunc-1]; } mp_limb_t __F_mpn_mul_middle(mp_limb_t * res, mp_limb_t * data1, unsigned long limbs1, mp_limb_t * data2, unsigned long limbs2, unsigned long start, unsigned long trunc) { unsigned long coeff_limbs = trunc; unsigned long twk; if (coeff_limbs/2 > FFT_TUNE_CUTOFF) { twk = 0; while ((1L<<(2*twk)) < FLINT_BITS*coeff_limbs) { twk++; } } else if ((data1 != data2) || (limbs1 != limbs2)) { if (coeff_limbs/2 < FFT_MUL_TWK[0][0]) { mpn_mul(res, data1, limbs1, data2, limbs2); return res[trunc-1]; } else { unsigned long i = 0; while ((i < FFT_MUL_COUNT-1) && (coeff_limbs/2 > FFT_MUL_TWK[i+1][0])) i++; twk = FFT_MUL_TWK[i][1]; } } else { if (coeff_limbs/2 < FFT_SQR_TWK[0][0]) { mpn_mul(res, data1, limbs1, data1, limbs1); return res[trunc-1]; } else { unsigned long i = 0; while ((i < FFT_SQR_COUNT-1) && (coeff_limbs/2 > FFT_SQR_TWK[i+1][0])) i++; twk = FFT_SQR_TWK[i][1]; } } unsigned long log_length = twk; unsigned long length = 1; unsigned long total_limbs = coeff_limbs; unsigned long output_bits = coeff_limbs*FLINT_BITS; unsigned long n = coeff_limbs; unsigned long length1 = 1; unsigned long length2 = 1; unsigned log_length2 = 1; unsigned long bits; do { bits = (((limbs1 << FLINT_LG_BITS_PER_LIMB)-1) >> (log_length)) + 1; output_bits = 2*bits + log_length2; output_bits = (((output_bits - 1) >> (log_length-1)) + 1) << (log_length-1); bits = (output_bits - log_length2)/2; length1 = ((limbs1 << FLINT_LG_BITS_PER_LIMB)-1)/bits + 1; length2 = ((limbs2 << FLINT_LG_BITS_PER_LIMB)-1)/bits + 1; log_length2++; } while ((length2 > (1L<<(log_length2))) || (length1 > (1L<<(log_length)))); n = (output_bits-1)/FLINT_BITS+1; #if DEBUG printf("%ld, %ld, %ld, %ld, %ld, %ld, %ld\n", bits, length1, length2, output_bits, coeff_limbs, n, log_length); #endif ZmodF_poly_t poly1; ZmodF_poly_stack_init(poly1, log_length, n, 1); F_mpn_FFT_split_bits(poly1, data1, limbs1, bits, n); if ((data1 == data2) && (limbs1 == limbs2)) { // identical operands case ZmodF_poly_convolution_range(poly1, poly1, poly1, (start*FLINT_BITS)/bits-1, (trunc*FLINT_BITS-1)/bits+1); } else { // distinct operands case ZmodF_poly_t poly2; ZmodF_poly_stack_init(poly2, log_length, n, 1); F_mpn_FFT_split_bits(poly2, data2, limbs2, bits, n); ZmodF_poly_convolution_range(poly1, poly1, poly2, (start*FLINT_BITS)/bits-1, (trunc*FLINT_BITS-1)/bits+1); ZmodF_poly_stack_clear(poly2); } poly1->length = (trunc*FLINT_BITS-1)/bits+1; ZmodF_poly_normalise(poly1); F_mpn_clear(res, trunc); F_mpn_FFT_combine_bits(res, poly1, bits, n, trunc); ZmodF_poly_stack_clear(poly1); return res[trunc-1]; } mp_limb_t __F_mpn_mul_middle_precomp(mp_limb_t * res, mp_limb_t * data1, unsigned long limbs1, F_mpn_precomp_t precomp, unsigned long start, unsigned long trunc) { ZmodF_poly_t poly1; ZmodF_poly_stack_init(poly1, precomp->poly->depth, precomp->poly->n, 1); F_mpn_FFT_split_bits(poly1, data1, limbs1, precomp->bits, precomp->poly->n); unsigned long length = precomp->poly->length + poly1->length - 1; unsigned long size = (1L<poly->depth); if (length > size) length = size; ZmodF_poly_FFT(poly1, length); ZmodF_poly_pointwise_mul(poly1, poly1, precomp->poly); ZmodF_poly_IFFT(poly1); ZmodF_poly_rescale_range(poly1, (start*FLINT_BITS)/precomp->bits-1, (trunc*FLINT_BITS-1)/precomp->bits+1); poly1->length = FLINT_MIN(poly1->length, (trunc*FLINT_BITS-1)/precomp->bits+1); ZmodF_poly_normalise(poly1); F_mpn_clear(res, trunc); F_mpn_FFT_combine_bits(res, poly1, precomp->bits, precomp->poly->n, trunc); ZmodF_poly_stack_clear(poly1); return res[trunc-1]; } flint-1.011/CHANGES.txt0000644017361200017500000000433511035134540014331 0ustar tabbotttabbottv 1.0 -- 2-Dec-07 : * First version of FLINT, includes fmpz_poly, fmpz and mpQS v 1.0.1 -- 7-Dec-07 : * Fixed a bug in _fmpz_poly_maxbits1 on 32 bit machines, reported by Michael Abshoff and Carl Witty * Removed some instances of u_int64_t and replaced them with uint64_t, reported by Michael Abshoff * Replaced sys/types.h with stdint.h * Added FLINT macros to documentation * Corrected numerous typos in documentation v 1.0.2 -- 10-Dec-07 * Rewrote tuning code for integer multiplication functions, making it more robust and fixing a bug which showed up on 32 bit machines (reported by Michael Abshoff and Jaap Spies). Factored the tuning code out into a number of macros. v 1.0.3 -- 16-Dec-07 * Fixed a bug in the polynomial memory managment code which caused a segfault * Fixed a bug in the pseudo division code which caused a block overrun v 1.0.4 -- 04-Jan-08 * Fixed a bug in the bernoulli_zmod example program and associated polynomial zmod code which caused memory corruption. * Fixed a bug in the fmpz-test code which manifested on 32 bit machines, reported by David Harvey. * Fixed some bugs in the pari profiling code. v 1.0.5 -- 05-Jan-08 * Fixed some inline issues which cause problems because of the C99 inline rules (reported by David Harvey). * Fixed a makefile issue reported (and solved) by David Harvey when *not* linking against NTL. v 1.0.6 -- 17-Jan-08 * Fixed an issue with FLINT_BIT_COUNT on certain machines (probably due to arithmetic shift issues) v 1.0.7 -- 22-Jan-08 * Made F_mpn_mul binary compatible with the way mpn_mul *operates* in practice. v 1.0.8 -- 15-Feb-08 * Fixed a bug in fmpz_poly_right_shift (reported by Kiran Kedlaya) v 1.0.9 -- 11-Mar-08 * Fixed a memory allocation bug in fmpz_poly_power v 1.0.10 -- : * integer gcd (this just wraps the GMP gcd code) * polynomial content * convert to and from FLINT and NTL integers and polynomials * get a coefficient of a polynomial efficiently as a read only mpz_t * print polynomials in a prettified format with a specified variable v 1.0.11 -- 16-Jun-08 * Fixed a bug in z_ll_mod_precomp on ia64 (reported by Michael Abshoff and William Stein) flint-1.011/ZmodF_mul-tune.c0000644017361200017500000002067411025357254015545 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /* ZmodF_mul-tune Program for tuning the ZmodF_mul module. This program writes to standard output an automatically tuned version of ZmodF_mul-tuning.c. (If DEBUG is set, it also writes logging info to standard error.) (C) 2007 David Harvey and William Hart */ #include #include #include "flint.h" #include "test-support.h" #include "profiler.h" #include "ZmodF_mul.h" #include "ZmodF_mul-tuning.h" #define DEBUG 1 typedef struct { int algo; int squaring; unsigned long n; unsigned long depth, m, k; } sample_info_t; // arg should point to a sample_info_t void sample_mul(void* arg, unsigned long count) { ZmodF_mul_info_t info; sample_info_t* z = (sample_info_t*) arg; switch (z->algo) { case ZMODF_MUL_ALGO_PLAIN: ZmodF_mul_info_init_plain(info, z->n, z->squaring); break; case ZMODF_MUL_ALGO_THREEWAY: ZmodF_mul_info_init_threeway(info, z->n, z->squaring); break; case ZMODF_MUL_ALGO_FFT: ZmodF_mul_info_init_fft(info, z->n, z->depth, z->m, z->k, z->squaring); break; } mp_limb_t* in1 = (mp_limb_t*) flint_stack_alloc(z->n + 1); mp_limb_t* in2 = (mp_limb_t*) flint_stack_alloc(z->n + 1); mp_limb_t* out = (mp_limb_t*) flint_stack_alloc(z->n + 1); urandom_limbs(in1, z->n + 1); urandom_limbs(in2, z->n + 1); if (z->squaring) in2 = in1; // warm up for (unsigned long i = 0; i < count/4; i++) ZmodF_mul_info_mul(info, out, in1, in2); // time it start_clock(0); for (unsigned long i = 0; i < count; i++) ZmodF_mul_info_mul(info, out, in1, in2); stop_clock(0); flint_stack_release(); flint_stack_release(); flint_stack_release(); ZmodF_mul_info_clear(info); } /* Compares two ZmodF_mul algorithms for a specific n. algo1/algo2 are: 0 for plain algorithm 1 for threeway algorithm > 2 indicates FFT of given depth Returns nonzero if algo2 is more efficient than algo1 for given n. (n will be rounded up automatically to satisfy whatever divisibility conditions are required by the requested algorithms.) */ int algo_compare(unsigned long n, unsigned long squaring, unsigned long algo1, unsigned long algo2, FILE* f) { sample_info_t info1, info2; info1.squaring = info2.squaring = squaring; info1.n = info2.n = n; if (algo1 == 0) info1.algo = ZMODF_MUL_ALGO_PLAIN; else if (algo1 == 1) info1.algo = ZMODF_MUL_ALGO_THREEWAY; else { info1.algo = ZMODF_MUL_ALGO_FFT; info1.depth = algo1; info1.m = info1.k = 0; } if (algo2 == 0) info2.algo = ZMODF_MUL_ALGO_PLAIN; else if (algo2 == 1) info2.algo = ZMODF_MUL_ALGO_THREEWAY; else { info2.algo = ZMODF_MUL_ALGO_FFT; info2.depth = algo2; info2.m = info1.k = 0; } // round up n appropriately unsigned long round = 1; if (algo1 == 1 || algo2 == 1) round = 3; if (algo1 > FLINT_LG_BITS_PER_LIMB || algo2 > FLINT_LG_BITS_PER_LIMB) round <<= (FLINT_MAX(algo1, algo2) - FLINT_LG_BITS_PER_LIMB); n = (((n-1) / round) + 1) * round; double time1, time2; prof_repeat(&time1, NULL, sample_mul, &info1); prof_repeat(&time2, NULL, sample_mul, &info2); #if DEBUG fprintf(f, "n = %ld, ", n); if (algo1 == 0) fprintf(f, "plain"); else if (algo1 == 1) fprintf(f, "threeway"); else fprintf(f, "FFT %ld", algo1); fprintf(f, " vs "); if (algo2 == 0) fprintf(f, "plain"); else if (algo2 == 1) fprintf(f, "threeway"); else fprintf(f, "FFT %ld", algo2); if (time2 < time1) fprintf(f, ", 2nd wins"); else fprintf(f, ", 1st wins"); fprintf(f, " (%lf vs %lf)\n", time1, time2); #endif return time2 < time1; } /* Finds crossover value of n to get from algo1 to algo2. If start != 0, then it's a starting estimate. */ unsigned long algo_threshold(unsigned long algo1, unsigned long algo2, unsigned long squaring, unsigned long start, FILE* f) { // find upper bound unsigned long hi = start ? start : 100; while (!algo_compare(hi, squaring, algo1, algo2, f)) hi *= 2; hi *= 2; #if DEBUG fprintf(f, "upper bound is %ld\n\n", hi); #endif // find lower bound unsigned long lo = hi / 2; while (algo_compare(lo, squaring, algo1, algo2, f)) lo /= 2; lo /= 2; #if DEBUG fprintf(f, "lower bound is %ld\n\n", lo); #endif // shrink interval until we reach tolerance of 10% while (hi > 1.1 * lo) { unsigned long mid = (unsigned long) sqrt(1.0 * hi * lo); double range = 1.0 * hi / lo; if (algo_compare(mid, squaring, algo1, algo2, f)) { lo = (unsigned long) (pow(range, -0.15) * lo); hi = (unsigned long) (pow(range, -0.3) * hi); } else { lo = (unsigned long) (pow(range, 0.3) * lo); hi = (unsigned long) (pow(range, 0.15) * hi); } #if DEBUG fprintf(f, "interval is [%ld, %ld], ratio = %lf\n", lo, hi, 1.0 * hi / lo); #endif } return (unsigned long) sqrt(1.0 * hi * lo); } int main(int argc, char* argv[]) { FILE* fout = stdout; FILE* flog = stderr; test_support_init(); fprintf(fout, "/*\n"); fprintf(fout, " Tuning values for ZmodF_mul module\n"); fprintf(fout, "\n"); fprintf(fout, " Automatically generated by ZmodF_mul-tune program\n"); fprintf(fout, "*/\n\n"); fprintf(fout, "#include \"ZmodF_mul-tuning.h\"\n"); fprintf(fout, "#include \"ZmodF_mul.h\"\n"); fprintf(fout, "\n"); fflush(fout); for (int squaring = 0; squaring <= 1; squaring++) { char* type = squaring ? "sqr" : "mul"; // plain/threeway threshold unsigned long n; for (n = 3; algo_compare(n, squaring, 1, 0, flog); n += 3); fprintf(fout, "unsigned long ZmodF_%s_plain_threeway_threshold = %ld;\n", type, n); fflush(fout); if (!squaring) ZmodF_mul_plain_threeway_threshold = n; else ZmodF_sqr_plain_threeway_threshold = n; // plain/fft threshold n = algo_threshold(0, 3, squaring, 0, flog); fprintf(fout, "unsigned long ZmodF_%s_plain_fft_threshold = %ld;\n", type, n); fflush(fout); if (!squaring) ZmodF_mul_plain_fft_threshold = n; else ZmodF_sqr_plain_fft_threshold = n; // threeway/fft threshold n = algo_threshold(1, 4, squaring, 0, flog); fprintf(fout, "unsigned long ZmodF_%s_threeway_fft_threshold = %ld;\n", type, n); fflush(fout); if (!squaring) ZmodF_mul_threeway_fft_threshold = n; else ZmodF_sqr_threeway_fft_threshold = n; // fft thresholds between different depths fprintf(fout, "unsigned long ZmodF_%s_fft_table[20] =\n {", type); unsigned long depth; for (depth = 3; depth < 10; depth++) { n = algo_threshold(depth, depth+1, squaring, 0, flog); if (!squaring) ZmodF_mul_fft_table[depth - 3] = n; else ZmodF_sqr_fft_table[depth - 3] = n; fprintf(fout, "%ld, ", n); fflush(fout); } fprintf(fout, "0};\n\n"); if (!squaring) ZmodF_mul_fft_table[depth - 3] = 0; else ZmodF_sqr_fft_table[depth - 3] = 0; } fprintf(fout, "\n"); fprintf(fout, "// end of file *********************************\n"); test_support_cleanup(); return 0; } // end of file **************************************************************** flint-1.011/graphing/0000755017361200017500000000000011025357255014323 5ustar tabbotttabbottflint-1.011/graphing/compare1d_config.py0000644017361200017500000000422111025357252020071 0ustar tabbotttabbott####################################################################### # This file is part of FLINT. # # FLINT is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # FLINT is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with FLINT; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #****************************************************************************** # # default configuration file for compare.py # # Please don't edit this file; make a copy and then tell compare.py about it # by running e.g. # # python compare.py -c my_config.py # #****************************************************************************** #------------------------------------------------------------------------------ # dots per inch for output file # larger values make a bigger graphics file CONFIG_dpi = 96 #------------------------------------------------------------------------------ # title at the top of the image CONFIG_title = None #------------------------------------------------------------------------------ # ignore all data points whose minimum and maximum times differ by a factor # of more than CONFIG_tolerance CONFIG_tolerance = 1.05 #------------------------------------------------------------------------------ # xscale and yscale determine the scaling used on each axis. # Possible values are None, "log10", "log2" CONFIG_xscale = None CONFIG_yscale = None #------------------------------------------------------------------------------ # text labels for each axis (None to leave blank) CONFIG_xlabel = None CONFIG_ylabel = None ################# end of config file flint-1.011/graphing/compare1d.py0000644017361200017500000001005011025357252016541 0ustar tabbotttabbott####################################################################### # This file is part of FLINT. # # FLINT is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # FLINT is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with FLINT; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # script for comparing FLINT 1d profile output # requires matplotlib to be installed # # (C) 2007 David Harvey, GPL license yadda yadda import sys from optparse import OptionParser import pylab from math import log parser = OptionParser(usage="python compare1d.py [options] input1 [input2 ...]") parser.add_option("-o", "--output", default="graph.png", help="output filename (default graph.png)") parser.add_option("-c", "--config", default="compare1d_config", help="configuration file (default compare1d_config)") (options, args) = parser.parse_args() if len(args) == 0: parser.print_help() sys.exit() # default configuration settings; # see compare1d_config.py for explanation of each setting CONFIG_dpi = 96 CONFIG_dotsize = 3 CONFIG_title = None CONFIG_tolerance = 1.05 CONFIG_xscale = None CONFIG_yscale = None CONFIG_xlabel = None CONFIG_ylabel = None # override settings from configuration file if requested try: config_module = __import__(options.config) # import all variables starting with CONFIG_ into global namespace for var in dir(config_module): if var.startswith("CONFIG_"): globals()[var] = config_module.__dict__[var] except ImportError: # configuration file not found print "Warning: could not find configuration file \"%s.py\"; using default settings" % (options.config) if CONFIG_title is not None: pylab.title(CONFIG_title) log_strings = {None: "", "log10" : " (log10)", "log2" : " (log2)"} if CONFIG_xlabel is None: CONFIG_xlabel = "" if CONFIG_ylabel is None: CONFIG_ylabel = "" CONFIG_xlabel += log_strings[CONFIG_xscale] CONFIG_ylabel += log_strings[CONFIG_yscale] pylab.xlabel(CONFIG_xlabel) pylab.ylabel(CONFIG_ylabel) files = [open(arg) for arg in args] data = [] for f in files: # skip up to "==========" where the data starts iter = f.__iter__() while not iter.next().startswith("====="): pass # decode each quadruple (x, min, max) points = {} for line in iter: fields = line.split() x = float(fields[0]) y_min = float(fields[1]) y_max = float(fields[2]) # ignore points where the max and min are too far apart if y_max/y_min <= CONFIG_tolerance: points[x] = y_min data.append(points) for i in range(len(data)): points = data[i] L = list(points.iteritems()) L.sort() xvals = [x for (x, y) in L] yvals = [y for (x, y) in L] pylab.plot(xvals, yvals, label=args[i]) pylab.legend() pylab.savefig(options.output, dpi=CONFIG_dpi) # here's some code I was playing around with to generate a legend, just # leaving it here for now because I don't want to lose it # # import matplotlib # a = matplotlib.patches.Ellipse((0.9, 0.9), 0.2, 0.2, 360) # a.set_facecolor((0.5, 0.5, 0.5)) # a.set_edgecolor((0.5, 0.5, 0.5)) # a.set_transform(pylab.gcf().transFigure) # # b = matplotlib.patches.Ellipse((4.0, 0.5), 1.0, 1.0, 360) # b.set_facecolor((0.5, 0.5, 0.5)) # b.set_edgecolor((0.5, 0.5, 0.5)) # # pylab.gca().add_patch( a ) # pylab.gca().add_patch( b ) # #for i in range(10): # print cmap(i/10.0) flint-1.011/graphing/compare2d_config.py0000644017361200017500000000602411025357252020075 0ustar tabbotttabbott####################################################################### # This file is part of FLINT. # # FLINT is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # FLINT is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with FLINT; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #****************************************************************************** # # default configuration file for compare.py # # Please don't edit this file; make a copy and then tell compare.py about it # by running e.g. # # python compare.py -c my_config.py # #****************************************************************************** #------------------------------------------------------------------------------ # dots per inch for output file # larger values make a bigger graphics file CONFIG_dpi = 96 #------------------------------------------------------------------------------ # radius of each dot (don't ask me what the units are, I have no idea) CONFIG_dotsize = 3 #------------------------------------------------------------------------------ # title at the top of the image CONFIG_title = None #------------------------------------------------------------------------------ # ignore all data points whose minimum and maximum times differ by a factor # of more than CONFIG_tolerance CONFIG_tolerance = 1.05 #------------------------------------------------------------------------------ # xscale and yscale determine the scaling used on each axis. # Possible values are None, "log10", "log2" CONFIG_xscale = None CONFIG_yscale = None #------------------------------------------------------------------------------ # text labels for each axis (None to leave blank) CONFIG_xlabel = None CONFIG_ylabel = None #------------------------------------------------------------------------------ # if the values from the two data sets differ by a factor of more than # CONFIG_truncate, the maximum colour intensity is assigned CONFIG_truncate = 2.0 #------------------------------------------------------------------------------ # The minimum colour intensity, in the range (0, 1). # If this is zero, then negative through positive values are plotted on # a continuous spectrum from blue through red. If this is 0.5, then a tiny # positive value is already somewhat brown, and a tiny negative value is # already somewhat blue. If this is 1.0, you get all flat blue and all flat # red. CONFIG_min_intensity = 0.5 ################# end of config file flint-1.011/graphing/compare2d.py0000644017361200017500000001323511025357252016552 0ustar tabbotttabbott####################################################################### # This file is part of FLINT. # # FLINT is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # FLINT is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with FLINT; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # script for comparing FLINT 2d profile output # requires matplotlib to be installed # # (C) 2007 David Harvey, GPL license yadda yadda import sys import matplotlib matplotlib.use('Agg') from optparse import OptionParser from matplotlib.colors import LinearSegmentedColormap import pylab from math import log parser = OptionParser(usage="python compare2d.py [options] input1 input2") parser.add_option("-o", "--output", default="graph.png", help="output filename (default graph.png)") parser.add_option("-c", "--config", default="compare2d_config", help="configuration file (default compare2d_config)") (options, args) = parser.parse_args() if len(args) != 2: parser.print_help() sys.exit() # default configuration settings; # see compare2d_config.py for explanation of each setting CONFIG_dpi = 96 CONFIG_dotsize = 3 CONFIG_title = None CONFIG_tolerance = 1.05 CONFIG_xscale = None CONFIG_yscale = None CONFIG_xlabel = None CONFIG_ylabel = None CONFIG_truncate = 2.0 CONFIG_min_intensity = 0.5 # override settings from configuration file if requested try: config_module = __import__(options.config) # import all variables starting with CONFIG_ into global namespace for var in dir(config_module): if var.startswith("CONFIG_"): globals()[var] = config_module.__dict__[var] except ImportError: # configuration file not found print "Warning: could not find configuration file \"%s.py\"; using default settings" % (options.config) # define colour map which makes: # values in [0, 0.5] are blue # values in [0.5, 1] are red cmap_data = { 'red' : ((0.0, 0.0, 0.0), (0.5, 0.0, CONFIG_min_intensity), (1.0, 1.0, 1.0)), 'green': ((0.0, 0.0, 0.0), (1.0, 0.0, 0.0)), 'blue' : ((0.0, 1.0, 1.0), (0.5, CONFIG_min_intensity, 0.0), (1.0, 0.0, 0.0)) } cmap = LinearSegmentedColormap('????', cmap_data) if CONFIG_title is not None: pylab.title(CONFIG_title) log_strings = {None: "", "log10" : " (log10)", "log2" : " (log2)"} if CONFIG_xlabel is None: CONFIG_xlabel = "" if CONFIG_ylabel is None: CONFIG_ylabel = "" CONFIG_xlabel += log_strings[CONFIG_xscale] CONFIG_ylabel += log_strings[CONFIG_yscale] pylab.xlabel(CONFIG_xlabel) pylab.ylabel(CONFIG_ylabel) files = [open(args[0]), open(args[1])] data = [] for f in files: # skip up to "==========" where the data starts iter = f.__iter__() while not iter.next().startswith("====="): pass # decode each quadruple (x, y, min, max) points = {} for line in iter: fields = line.split() x = float(fields[0]) y = float(fields[1]) z_min = float(fields[2]) z_max = float(fields[3]) # ignore points where the max and min are too far apart if z_max/z_min <= CONFIG_tolerance: points[(x, y)] = z_min data.append(points) # merge data into single list of ratios # values in [-1.0, 0.0] means the 1st sample was faster # values in [0.0, 1.0] means the 2nd sample was faster ratios = [] for (x, y) in data[0]: z0 = data[0][x, y] if (x, y) in data[1]: z1 = data[1][x, y] if CONFIG_xscale == "log10": x = log(x)/log(10.0) elif CONFIG_xscale == "log2": x = log(x)/log(2.0) if CONFIG_yscale == "log10": y = log(y)/log(10.0) elif CONFIG_yscale == "log2": y = log(y)/log(2.0) ratio = log(z1 / z0) / log(CONFIG_truncate) if ratio > 1.0: ratio = 1.0 elif ratio < -1.0: ratio = -1.0 ratios.append((x, y, ratio)) ratios.sort() pylab.scatter([x for (x, y, z) in ratios], [y for (x, y, z) in ratios], c = [z for (x, y, z) in ratios], vmin=-1.0, vmax=1.0, s = CONFIG_dotsize, faceted=False, cmap = cmap) # write down the two filenames (this should really be the profile name) pylab.figtext(0.05, 0.03, args[0], fontsize=10, color=(1.0, 0, 0), verticalalignment="bottom") pylab.figtext(0.05, 0.03, args[1], fontsize=10, color=(0, 0, 1.0), verticalalignment="top") pylab.savefig(options.output, dpi=CONFIG_dpi) # here's some code I was playing around with to generate a legend, just # leaving it here for now because I don't want to lose it # # import matplotlib # a = matplotlib.patches.Ellipse((0.9, 0.9), 0.2, 0.2, 360) # a.set_facecolor((0.5, 0.5, 0.5)) # a.set_edgecolor((0.5, 0.5, 0.5)) # a.set_transform(pylab.gcf().transFigure) # # b = matplotlib.patches.Ellipse((4.0, 0.5), 1.0, 1.0, 360) # b.set_facecolor((0.5, 0.5, 0.5)) # b.set_edgecolor((0.5, 0.5, 0.5)) # # pylab.gca().add_patch( a ) # pylab.gca().add_patch( b ) # #for i in range(10): # print cmap(i/10.0) flint-1.011/gpl-2.0.txt0000644017361200017500000004310311025357254014344 0ustar tabbotttabbott GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. flint-1.011/F_mpn_mul-tuning.h0000644017361200017500000000305711025357254016117 0ustar tabbotttabbott/*============================================================================ Copyright (C) 2007, William Hart This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ #ifndef FLINT_F_MPN_MUL_TUNING_H #define FLINT_F_MPN_MUL_TUNING_H #ifdef __cplusplus extern "C" { #endif #define FLINT_FFT_LIMBS_CROSSOVER 2300 #define MUL_TWK_SMALL_CUTOFF 2000 #define MUL_TWK_SMALL_DEFAULT 64 #define MUL_TWK_LARGE_CUTOFF 8350000 #define MUL_TWK_LARGE_DEFAULT 1 #define MUL_TWK_COUNT 20 #define SQR_TWK_SMALL_CUTOFF 1564 #define SQR_TWK_SMALL_DEFAULT 16 #define SQR_TWK_LARGE_CUTOFF 8350000 #define SQR_TWK_LARGE_DEFAULT 1 #define SQR_TWK_COUNT 13 #define FFT_MUL_COUNT 40 #define FFT_SQR_COUNT 30 #define FFT_TUNE_CUTOFF 20000000L #ifdef __cplusplus } #endif #endif flint-1.011/mpz_poly.c0000644017361200017500000012564311025357254014553 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** mpz_poly.c: Polynomials over Z, implemented as an array of mpz_t's Copyright (C) 2007, William Hart and David Harvey ******************************************************************************/ #include #include #include "flint.h" #include "mpz_poly.h" #include "mpz_poly-tuning.h" #include "fmpz.h" #include "fmpz_poly.h" /**************************************************************************** Initialisation and memory management ****************************************************************************/ void mpz_poly_init(mpz_poly_t poly) { poly->coeffs = (mpz_t*) flint_heap_alloc(sizeof(mpz_t)); mpz_init(poly->coeffs[0]); poly->alloc = 1; poly->length = 0; } void mpz_poly_init2(mpz_poly_t poly, unsigned long alloc) { if ((long) alloc <= 0) mpz_poly_init(poly); poly->coeffs = (mpz_t*) flint_heap_alloc(alloc * sizeof(mpz_t)); for (unsigned long i = 0; i < alloc; i++) mpz_init(poly->coeffs[i]); poly->alloc = alloc; poly->length = 0; } void mpz_poly_init3(mpz_poly_t poly, unsigned long alloc, unsigned long bits) { if ((long) alloc <= 0) mpz_poly_init(poly); poly->coeffs = (mpz_t*) flint_heap_alloc(alloc * sizeof(mpz_t)); for (unsigned long i = 0; i < alloc; i++) mpz_init2(poly->coeffs[i], bits); poly->alloc = alloc; poly->length = 0; } void mpz_poly_clear(mpz_poly_t poly) { for (long i = 0; i < poly->alloc; i++) mpz_clear(poly->coeffs[i]); flint_heap_free(poly->coeffs); } void mpz_poly_realloc(mpz_poly_t poly, unsigned long alloc) { if ((long) alloc <= 0) alloc = 1; // clear any mpz_t's beyond the new array length for (unsigned long i = alloc; i < poly->alloc; i++) mpz_clear(poly->coeffs[i]); poly->coeffs = (mpz_t*) flint_heap_realloc(poly->coeffs, alloc * sizeof(mpz_t)); // init any new mpz_t's required for (unsigned long i = poly->alloc; i < alloc; i++) mpz_init(poly->coeffs[i]); poly->alloc = alloc; // truncate poly if necessary if (poly->length > alloc) { poly->length = alloc; mpz_poly_normalise(poly); } } void mpz_poly_realloc2(mpz_poly_t poly, unsigned long alloc, unsigned long bits) { if ((long) alloc <= 0) alloc = 1; // clear any mpz_t's beyond the new array length for (unsigned long i = alloc; i < poly->alloc; i++) mpz_clear(poly->coeffs[i]); poly->coeffs = (mpz_t*) flint_heap_realloc(poly->coeffs, alloc * sizeof(mpz_t)); // init any new mpz_t's required for (unsigned long i = poly->alloc; i < alloc; i++) mpz_init2(poly->coeffs[i], bits); poly->alloc = alloc; // truncate poly if necessary if (poly->length > alloc) { poly->length = alloc; mpz_poly_normalise(poly); } } void __mpz_poly_ensure_alloc(mpz_poly_t poly, unsigned long alloc) { FLINT_ASSERT(alloc > poly->alloc); if (alloc < 2*poly->alloc) alloc = 2*poly->alloc; mpz_poly_realloc(poly, alloc); } /**************************************************************************** Setting/retrieving coefficients ****************************************************************************/ void mpz_poly_set_coeff(mpz_poly_t poly, unsigned long n, mpz_t c) { mpz_poly_ensure_alloc(poly, n+1); if (n+1 < poly->length) // set interior coefficient mpz_set(poly->coeffs[n], c); else if (n+1 == poly->length) { // set leading coefficient if (mpz_sgn(c)) mpz_set(poly->coeffs[n], c); else { // set leading coefficient to zero poly->length--; mpz_poly_normalise(poly); } } else { // extend polynomial if (!mpz_sgn(c)) return; for (unsigned long i = poly->length; i < n; i++) mpz_set_ui(poly->coeffs[i], 0); mpz_set(poly->coeffs[n], c); poly->length = n+1; } } void mpz_poly_set_coeff_ui(mpz_poly_t poly, unsigned long n, unsigned long c) { mpz_poly_ensure_alloc(poly, n+1); if (n+1 < poly->length) // set interior coefficient mpz_set_ui(poly->coeffs[n], c); else if (n+1 == poly->length) { // set leading coefficient if (c) mpz_set_ui(poly->coeffs[n], c); else { // set leading coefficient to zero poly->length--; mpz_poly_normalise(poly); } } else { // extend polynomial if (!c) return; for (unsigned long i = poly->length; i < n; i++) mpz_set_ui(poly->coeffs[i], 0); mpz_set_ui(poly->coeffs[n], c); poly->length = n+1; } } void mpz_poly_set_coeff_si(mpz_poly_t poly, unsigned long n, long c) { mpz_poly_ensure_alloc(poly, n+1); if (n+1 < poly->length) // set interior coefficient mpz_set_si(poly->coeffs[n], c); else if (n+1 == poly->length) { // set leading coefficient if (c) mpz_set_si(poly->coeffs[n], c); else { // set leading coefficient to zero poly->length--; mpz_poly_normalise(poly); } } else { // extend polynomial if (!c) return; for (unsigned long i = poly->length; i < n; i++) mpz_set_ui(poly->coeffs[i], 0); mpz_set_si(poly->coeffs[n], c); poly->length = n+1; } } /**************************************************************************** String conversions and I/O ****************************************************************************/ int mpz_poly_from_string(mpz_poly_t poly, const char* s) { const char* whitespace = " \t\n\r"; // read poly length unsigned long length; if (!sscanf(s, "%ld", &length)) return 0; // jump to next whitespace s += strcspn(s, whitespace); poly->length = 0; mpz_poly_ensure_alloc(poly, length); for (unsigned long i = 0; i < length; i++) { // skip whitespace s += strspn(s, whitespace); if (!gmp_sscanf(s, "%Zd", poly->coeffs[i])) return 0; poly->length++; // jump to next whitespace s += strcspn(s, whitespace); } mpz_poly_normalise(poly); return 1; } char* mpz_poly_to_string(mpz_poly_t poly) { // estimate the size of the string // 20 = enough room for null terminator and length info unsigned long size = 20; for (unsigned long i = 0; i < poly->length; i++) // +2 is for the sign and a space size += mpz_sizeinbase(poly->coeffs[i], 10) + 2; // write the string char* buf = (char*) malloc(size); char* ptr = buf + sprintf(buf, "%ld ", poly->length); for (unsigned long i = 0; i < poly->length; i++) { mpz_get_str(ptr, 10, poly->coeffs[i]); ptr += strlen(ptr); *ptr = ' '; ptr++; } ptr--; *ptr = 0; return buf; } /*************************************************************************************************** ** LTOA.C ** ** Converts a long integer to a string. ** ** Copyright 1988-90 by Robert B. Stout dba MicroFirm ** ** Released to public domain, 1991 ** ** Parameters: 1 - number to be converted ** 2 - buffer in which to build the converted string ** 3 - number base to use for conversion ** ** Returns: A character pointer to the converted string if ** successful, a NULL pointer if the number base specified ** is out of range. ***************************************************************************************************/ #define BUFSIZE (sizeof(long) * 8 + 1) char *flint_ltoa(long N, char *str, int base) { register int i = 2; long uarg; char *tail, *head = str, buf[BUFSIZE]; if (36 < base || 2 > base) base = 10; /* can only use 0-9, A-Z */ tail = &buf[BUFSIZE - 1]; /* last character position */ *tail-- = '\0'; if (10 == base && N < 0L) { *head++ = '-'; uarg = -N; } else uarg = N; if (uarg) { for (i = 1; uarg; ++i) { register ldiv_t r; r = ldiv(uarg, base); *tail-- = (char)(r.rem + ((9L < r.rem) ? ('A' - 10L) : '0')); uarg = r.quot; } } else *tail-- = '0'; memcpy(head, ++tail, i); return str; } /*******************************************************************************************/ char* mpz_poly_to_string_pretty(mpz_poly_t poly, const char * x) { if (poly->length == 0) { char* buf = (char*) malloc(2); *buf = '0'; buf[1] = 0; return buf; } unsigned long x_len = strlen(x); // String length of the monomial unsigned long exp_len = FLINT_BIT_COUNT(poly->length)/3 + 1; // String length of largest degree // estimate the size of the string // 1 = enough room for null terminator unsigned long size = 1; long i; for (i = 0; i < poly->length; i++) // +3 is for the sign, a carot and a times size += mpz_sizeinbase(poly->coeffs[i], 10) + 3 + x_len + exp_len; // write the string char* buf = (char*) malloc(size); char* exp = (char*) malloc(exp_len+1); char* ptr = buf; for (i = poly->length - 1; i >=2; i--) { if ((mpz_sgn(poly->coeffs[i]) > 0L) && (i != poly->length - 1)) { *ptr = '+'; ptr++; } if (mpz_cmp_si(poly->coeffs[i], -1L) == 0) { *ptr = '-'; ptr++; } if (mpz_sgn(poly->coeffs[i]) != 0L) { if ((mpz_cmp_si(poly->coeffs[i], -1L) != 0) && (mpz_cmp_ui(poly->coeffs[i], 1L) != 0)) { mpz_get_str(ptr, 10, poly->coeffs[i]); ptr += strlen(ptr); *ptr = '*'; ptr++; } strcpy(ptr, x); ptr += strlen(x); *ptr = '^'; ptr++; flint_ltoa(i, exp, 10); strcpy(ptr, exp); ptr += strlen(exp); } } if (i == 1) { if ((mpz_sgn(poly->coeffs[i]) > 0L) && (i != poly->length - 1)) { *ptr = '+'; ptr++; } if (mpz_cmp_si(poly->coeffs[i], -1L) == 0) { *ptr = '-'; ptr++; } if (mpz_sgn(poly->coeffs[i]) != 0L) { if ((mpz_cmp_si(poly->coeffs[i], -1L) != 0) && (mpz_cmp_ui(poly->coeffs[i], 1L) != 0)) { mpz_get_str(ptr, 10, poly->coeffs[i]); ptr += strlen(ptr); *ptr = '*'; ptr++; } strcpy(ptr, x); ptr += strlen(x); } i--; } if ((mpz_sgn(poly->coeffs[i]) > 0L) && (i != poly->length - 1)) { *ptr = '+'; ptr++; } if (mpz_sgn(poly->coeffs[i]) != 0L) { mpz_get_str(ptr, 10, poly->coeffs[i]); ptr += strlen(ptr); } *ptr = 0; return buf; } void mpz_poly_fprint(mpz_poly_t poly, FILE* f) { char* s = mpz_poly_to_string(poly); fputs(s, f); free(s); } void mpz_poly_fprint_pretty(mpz_poly_t poly, FILE* f, const char * x) { char* s = mpz_poly_to_string_pretty(poly, x); fputs(s, f); free(s); } void mpz_poly_print(mpz_poly_t poly) { mpz_poly_fprint(poly, stdout); } void mpz_poly_print_pretty(mpz_poly_t poly, const char * x) { mpz_poly_fprint_pretty(poly, stdout, x); } int mpz_poly_fread(mpz_poly_t poly, FILE* f) { // read poly length unsigned long length; if (!fscanf(f, "%ld", &length)) return 0; poly->length = 0; mpz_poly_ensure_alloc(poly, length); // read coefficients for (unsigned long i = 0; i < length; i++) { if (!mpz_inp_str(poly->coeffs[i], f, 10)) return 0; poly->length++; } mpz_poly_normalise(poly); return 1; } int mpz_poly_read(mpz_poly_t poly) { return mpz_poly_fread(poly, stdin); } /**************************************************************************** Length and degree ****************************************************************************/ void mpz_poly_normalise(mpz_poly_t poly) { while (poly->length && !mpz_sgn(poly->coeffs[poly->length-1])) poly->length--; } int mpz_poly_normalised(mpz_poly_t poly) { return (poly->length == 0) || mpz_sgn(poly->coeffs[poly->length-1]); } void mpz_poly_pad(mpz_poly_t poly, unsigned long length) { mpz_poly_ensure_alloc(poly, length); if (poly->length < length) { for (unsigned long i = poly->length; i < length; i++) mpz_set_ui(poly->coeffs[i], 0); poly->length = length; } } void mpz_poly_truncate(mpz_poly_t res, mpz_poly_t poly, unsigned long length) { if (poly == res) { // inplace truncation if (length < poly->length) poly->length = length; } else { // copy and truncate if (length > poly->length) { mpz_poly_set(res, poly); return; } // todo: use mpz_init_set where appropriate mpz_poly_ensure_alloc(res, length); for (unsigned long i = 0; i < length; i++) mpz_set(res->coeffs[i], poly->coeffs[i]); res->length = length; } mpz_poly_normalise(res); } /**************************************************************************** Assignment ****************************************************************************/ void mpz_poly_set(mpz_poly_t res, mpz_poly_t poly) { if (res == poly) return; // todo: use mpz_init_set where appropriate mpz_poly_ensure_alloc(res, poly->length); for (unsigned long i = 0; i < poly->length; i++) mpz_set(res->coeffs[i], poly->coeffs[i]); res->length = poly->length; } /**************************************************************************** Conversions ****************************************************************************/ // assumes coefficients are big enough, and alloc is big enough void _mpz_poly_to_fmpz_poly(fmpz_poly_t res, mpz_poly_t poly) { FLINT_ASSERT(res->alloc >= poly->length); res->length = poly->length; if (poly->length == 0) return; for (unsigned long i = 0; i < poly->length; i++) { FLINT_ASSERT(res->limbs >= mpz_size(poly->coeffs[i])); mpz_to_fmpz(res->coeffs + i*(res->limbs+1), poly->coeffs[i]); } } void mpz_poly_to_fmpz_poly(fmpz_poly_t res, mpz_poly_t poly) { unsigned long limbs = mpz_poly_max_limbs(poly); // todo: there should be a single function that achieves both of the // following.... actually we don't even care in this case if the value // is preserved. fmpz_poly_fit_length(res, poly->length); fmpz_poly_fit_limbs(res, limbs); _mpz_poly_to_fmpz_poly(res, poly); } void fmpz_poly_to_mpz_poly(mpz_poly_t res, const fmpz_poly_t poly) { mpz_poly_ensure_alloc(res, poly->length); res->length = poly->length; if (poly->length == 0) return; long i; mp_limb_t* ptr = poly->coeffs; for (i = 0; i < poly->length; i++, ptr += poly->limbs+1) fmpz_to_mpz(res->coeffs[i], ptr); mpz_poly_normalise(res); } /**************************************************************************** Comparison ****************************************************************************/ int mpz_poly_equal(mpz_poly_t poly1, mpz_poly_t poly2) { if (poly1->length != poly2->length) return 0; for (long i = 0; i < poly1->length; i++) if (mpz_cmp(poly1->coeffs[i], poly2->coeffs[i])) return 0; return 1; } /**************************************************************************** Addition/subtraction ****************************************************************************/ void mpz_poly_add(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) { // rearrange parameters to make poly1 no longer than poly2 if (poly1->length > poly2->length) SWAP_MPZ_POLY_PTRS(poly1, poly2); mpz_poly_ensure_alloc(res, poly2->length); unsigned long i; for (i = 0; i < poly1->length; i++) mpz_add(res->coeffs[i], poly1->coeffs[i], poly2->coeffs[i]); for (; i < poly2->length; i++) mpz_set(res->coeffs[i], poly2->coeffs[i]); res->length = poly2->length; mpz_poly_normalise(res); } void mpz_poly_sub(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) { if (poly1 == poly2) { // equal operands res->length = 0; return; } // rearrange parameters to make poly1 no longer than poly2 int swapped = 0; if (poly1->length > poly2->length) { swapped = 1; SWAP_MPZ_POLY_PTRS(poly1, poly2); } mpz_poly_ensure_alloc(res, poly2->length); unsigned long i; if (swapped) { for (i = 0; i < poly1->length; i++) mpz_sub(res->coeffs[i], poly2->coeffs[i], poly1->coeffs[i]); for (; i < poly2->length; i++) mpz_set(res->coeffs[i], poly2->coeffs[i]); } else { for (i = 0; i < poly1->length; i++) mpz_sub(res->coeffs[i], poly1->coeffs[i], poly2->coeffs[i]); for (; i < poly2->length; i++) mpz_neg(res->coeffs[i], poly2->coeffs[i]); } res->length = poly2->length; mpz_poly_normalise(res); } void mpz_poly_neg(mpz_poly_t res, mpz_poly_t poly) { mpz_poly_ensure_alloc(res, poly->length); for (unsigned long i = 0; i < poly->length; i++) mpz_neg(res->coeffs[i], poly->coeffs[i]); res->length = poly->length; } /**************************************************************************** Shifting ****************************************************************************/ void mpz_poly_lshift(mpz_poly_t res, mpz_poly_t poly, unsigned long k) { mpz_poly_ensure_alloc(res, poly->length + k); if (poly == res) { // inplace; just shift the mpz_t's over for (long i = poly->length - 1; i >= 0; i--) mpz_swap(poly->coeffs[i], poly->coeffs[i+k]); for (unsigned long i = 0; i < k; i++) mpz_set_ui(poly->coeffs[i], 0); } else { // not inplace; need to copy data for (unsigned long i = 0; i < k; i++) mpz_set_ui(res->coeffs[i], 0); for (unsigned long i = 0; i < poly->length; i++) mpz_set(res->coeffs[i + k], poly->coeffs[i]); } res->length = poly->length + k; } void mpz_poly_rshift(mpz_poly_t res, mpz_poly_t poly, unsigned long k) { if (k >= poly->length) { // shift all coefficients off the end res->length = 0; return; } if (poly == res) { // inplace; just shift the mpz_t's over for (unsigned long i = k; i < poly->length; i++) mpz_swap(poly->coeffs[i - k], poly->coeffs[i]); } else { // not inplace; need to copy data mpz_poly_ensure_alloc(res, poly->length - k); for (unsigned long i = k; i < poly->length; i++) mpz_set(res->coeffs[i - k], poly->coeffs[i]); } res->length = poly->length - k; } /**************************************************************************** Norms ****************************************************************************/ void mpz_poly_2norm(mpz_t norm, mpz_poly_t poly) { mpz_set_ui(norm, 0L); if (poly->length == 0) { return; } mpz_t sqr; mpz_init(sqr); for (unsigned long i = 0; i < poly->length; i++) { mpz_mul(sqr, poly->coeffs[i], poly->coeffs[i]); mpz_add(norm, norm, sqr); } mpz_sqrtrem(norm, sqr, norm); if (mpz_sgn(sqr) != 0L) mpz_add_ui(norm, norm, 1L); mpz_clear(sqr); } /**************************************************************************** Scalar multiplication and division ****************************************************************************/ void mpz_poly_scalar_mul(mpz_poly_t res, mpz_poly_t poly, mpz_t c) { abort(); } void mpz_poly_scalar_mul_ui(mpz_poly_t res, mpz_poly_t poly, unsigned long c) { abort(); } void mpz_poly_scalar_mul_si(mpz_poly_t res, mpz_poly_t poly, long c) { abort(); } void mpz_poly_scalar_div(mpz_poly_t res, mpz_poly_t poly, mpz_t c) { abort(); } void mpz_poly_scalar_div_ui(mpz_poly_t res, mpz_poly_t poly, unsigned long c) { abort(); } void mpz_poly_scalar_div_si(mpz_poly_t res, mpz_poly_t poly, long c) { abort(); } void mpz_poly_scalar_div_exact(mpz_poly_t res, mpz_poly_t poly, mpz_t c) { abort(); } void mpz_poly_scalar_div_exact_ui(mpz_poly_t res, mpz_poly_t poly, unsigned long c) { abort(); } void mpz_poly_scalar_div_exact_si(mpz_poly_t res, mpz_poly_t poly, long c) { abort(); } void mpz_poly_scalar_mod(mpz_poly_t res, mpz_poly_t poly, mpz_t c) { abort(); } void mpz_poly_scalar_mod_ui(mpz_poly_t res, mpz_poly_t poly, unsigned long c) { abort(); } /**************************************************************************** Polynomial multiplication ****************************************************************************/ void mpz_poly_mul(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) { // use naive KS for now mpz_poly_mul_naive_KS(res, poly1, poly2); } void mpz_poly_sqr(mpz_poly_t res, mpz_poly_t poly) { // use naive KS for now mpz_poly_sqr_naive_KS(res, poly); } /* This is just like mpz_poly_mul_naive(), with the following restrictions: * assumes res does not alias poly1 and poly2 * neither polynomial is zero * res->alloc >= poly1->length + poly2->length - 1 (i.e. output has enough room for product) */ void _mpz_poly_mul_naive(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) { FLINT_ASSERT(res != poly1); FLINT_ASSERT(res != poly2); FLINT_ASSERT(poly1->length && poly2->length); res->length = poly1->length + poly2->length - 1; FLINT_ASSERT(res->alloc >= res->length); for (unsigned long i = 0; i < res->length; i++) mpz_set_ui(res->coeffs[i], 0); for (unsigned long i = 0; i < poly1->length; i++) for (unsigned long j = 0; j < poly2->length; j++) mpz_addmul(res->coeffs[i+j], poly1->coeffs[i], poly2->coeffs[j]); } void mpz_poly_mul_naive(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) { if (!poly1->length || !poly2->length) { // one of the polys is zero res->length = 0; return; } if (poly1 == poly2) { // polys are identical, so call specialised squaring routine mpz_poly_sqr_naive(res, poly1); return; } unsigned long limbs = mpz_poly_product_max_limbs(poly1, poly2); unsigned long length = poly1->length + poly2->length - 1; if (res == poly1 || res == poly2) { // output is inplace, so need a temporary mpz_poly_t temp; mpz_poly_init3(temp, length, FLINT_BITS * limbs); _mpz_poly_mul_naive(temp, poly1, poly2); mpz_poly_swap(temp, res); mpz_poly_clear(temp); } else { // output not inplace mpz_poly_ensure_alloc(res, length); _mpz_poly_mul_naive(res, poly1, poly2); } } /* This is just like mpz_poly_sqr_naive(), with the following restrictions: * assumes res does not alias poly * poly is nonzero * res->alloc >= 2*poly->length - 1 (i.e. output has enough room for product) */ void _mpz_poly_sqr_naive(mpz_poly_t res, mpz_poly_t poly) { FLINT_ASSERT(res != poly); FLINT_ASSERT(poly->length); res->length = 2*poly->length - 1; FLINT_ASSERT(res->alloc >= res->length); for (unsigned long i = 0; i < res->length; i++) mpz_set_ui(res->coeffs[i], 0); // off-diagonal products for (unsigned long i = 1; i < poly->length; i++) for (unsigned long j = 0; j < i; j++) mpz_addmul(res->coeffs[i+j], poly->coeffs[i], poly->coeffs[j]); // double the off-diagonal products for (unsigned long i = 1; i < res->length - 1; i++) mpz_add(res->coeffs[i], res->coeffs[i], res->coeffs[i]); // add in diagonal products for (unsigned long i = 0; i < poly->length; i++) mpz_addmul(res->coeffs[2*i], poly->coeffs[i], poly->coeffs[i]); } void mpz_poly_sqr_naive(mpz_poly_t res, mpz_poly_t poly) { if (!poly->length) { // input is zero res->length = 0; return; } unsigned long limbs = mpz_poly_product_max_limbs(poly, poly); unsigned long length = 2*poly->length - 1; if (res == poly) { // output is inplace, so need a temporary mpz_poly_t temp; mpz_poly_init3(temp, length, FLINT_BITS * limbs); _mpz_poly_sqr_naive(temp, poly); mpz_poly_swap(temp, res); mpz_poly_clear(temp); } else { // output not inplace // allocate more coefficients if necessary mpz_poly_ensure_alloc(res, length); _mpz_poly_sqr_naive(res, poly); } } /* Recursive portion of karatsuba multiplication. Input arrays are "in1" of length "len1", staggered by "skip", ditto for in2. Output array is "out" of length len1 + len2 - 1, also staggered by "skip". scratch buffer should be length len1 + len2, also staggered by "skip". All input/output/scratch space should be mpz_init'd, and shouldn't overlap. Must have 1 <= len1 <= len2. If len1*len2 <= crossover, it uses a naive multiplication algorithm. The crossover parameter is passed down recursively to subproducts. */ void _mpz_poly_mul_kara_recursive(mpz_t* out, mpz_t* in1, unsigned long len1, mpz_t* in2, unsigned long len2, mpz_t* scratch, unsigned long skip, unsigned long crossover) { FLINT_ASSERT(len1 >= 1); FLINT_ASSERT(len2 >= len1); // ==================== base cases if (len1 == 1) { // special case, just scalar multiplication for (unsigned long i = 0; i < len2; i++) mpz_mul(out[i*skip], in1[0], in2[i*skip]); return; } if (len1 * len2 < crossover) { // switch to naive multiplication for (unsigned long i = 0; i < len1 + len2 - 1; i++) mpz_set_ui(out[i*skip], 0); for (unsigned long i = 0; i < len1; i++) for (unsigned long j = 0; j < len2; j++) mpz_addmul(out[(i+j)*skip], in1[i*skip], in2[j*skip]); return; } // ==================== recursive case // Let in1 = A1(x^2) + x*B1(x^2) + x^(2*floor(len1/2))*C1, // where A1, B1 have length floor(len1/2), // and C1 is the leading term of in1 if len1 is odd // Similarly for in2 = A2(x^2) + x*B2(x^2) + x^(2*floor(len2/2))*C2. // Put A1 + B1 into even slots of scratch space // (uses len1/2 scratch slots) mpz_t* ptr = scratch; for (unsigned long i = 0; i < len1/2; i++, ptr += 2*skip) mpz_add(*ptr, in1[2*i*skip], in1[2*i*skip + skip]); // Put A2 + B2 into remaining even slots of scratch space // (uses len2/2 slots of scratch) mpz_t* scratch2 = ptr; for (unsigned long i = 0; i < len2/2; i++, ptr += 2*skip) mpz_add(*ptr, in2[2*i*skip], in2[2*i*skip + skip]); // The following three recursive calls all use the odd slots of the current // scratch array as the next layer's scratch space // Put product (A1+B1)*(A2+B2) into odd slots of output array _mpz_poly_mul_kara_recursive(out + skip, scratch, len1/2, scratch2, len2/2, scratch + skip, 2*skip, crossover); // Put product x^2*(B1*B2) into even slots of output array // (except first slot, which is an implied zero) _mpz_poly_mul_kara_recursive(out + 2*skip, in1 + skip, len1/2, in2 + skip, len2/2, scratch + skip, 2*skip, crossover); // Put product A1*A2 into even slots of scratch space _mpz_poly_mul_kara_recursive(scratch, in1, len1/2, in2, len2/2, scratch + skip, 2*skip, crossover); // Subtract A1*A2 and B1*B2 from (A1+B1)*(A2+B2) to get (A1*B2 + A2*B1) // in odd slots of output for (unsigned long i = 0; i < len1/2 + len2/2 - 1; i++) { mpz_sub(out[2*i*skip + skip], out[2*i*skip + skip], out[2*(i+1)*skip]); mpz_sub(out[2*i*skip + skip], out[2*i*skip + skip], scratch[2*i*skip]); } // Add A1*A2 to x^2*(B1*B2) into even slots of output mpz_set(out[0], scratch[0]); for (unsigned long i = 1; i < len1/2 + len2/2 - 1; i++) mpz_add(out[2*i*skip], out[2*i*skip], scratch[2*i*skip]); // Now we have the product (A1(x^2) + x*B1(x^2)) * (A2(x^2) + x*B2(x^2)) // in the output array. Still need to handle C1 and C2 terms. if (len1 & 1) { if (len2 & 1) { // terms from x^(len1-1)*C1 * (A2(x^2) + x*B2(x^2)) mpz_t* term1 = in1 + skip*(len1-1); for (unsigned long i = 0; i < len2-2; i++) mpz_addmul(out[(i+len1-1)*skip], *term1, in2[i*skip]); mpz_mul(out[(len1+len2-3)*skip], *term1, in2[(len2-2)*skip]); // terms from x^(len2-1)*C2 * (A1(x^2) + x*B1(x^2)) mpz_t* term2 = in2 + skip*(len2-1); for (unsigned long i = 0; i < len1-1; i++) mpz_addmul(out[(i+len2-1)*skip], *term2, in1[i*skip]); // final C1*C2 term mpz_mul(out[(len1+len2-2)*skip], *term1, *term2); } else { // terms from x^(len1-1)*C1 * (A2(x^2) + x*B2(x^2)) mpz_t* term = in1 + skip*(len1-1); for (unsigned long i = 0; i < len2-1; i++) mpz_addmul(out[(i+len1-1)*skip], *term, in2[i*skip]); mpz_mul(out[(len1+len2-2)*skip], *term, in2[(len2-1)*skip]); } } else if (len2 & 1) { // terms from x^(len2-1)*C2 * (A1(x^2) + x*B1(x^2)) mpz_t* term = in2 + skip*(len2-1); for (unsigned long i = 0; i < len1-1; i++) mpz_addmul(out[(i+len2-1)*skip], *term, in1[i*skip]); mpz_mul(out[(len1+len2-2)*skip], *term, in1[(len1-1)*skip]); } } unsigned long _mpz_poly_mul_karatsuba_crossover(unsigned long limbs) { unsigned long crossover; if (limbs >= mpz_poly_kara_crossover_table_size) crossover = 0; else { if (limbs == 0) limbs = 1; crossover = mpz_poly_kara_crossover_table[limbs - 1]; } return crossover * crossover; } void mpz_poly_mul_karatsuba(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) { if (!poly1->length || !poly2->length) { // one of the polys is zero res->length = 0; return; } if (poly1 == poly2) { // polys are identical, so call specialised squaring routine mpz_poly_sqr_karatsuba(res, poly1); return; } // rearrange parameters to make poly1 no longer than poly2 if (poly1->length > poly2->length) SWAP_MPZ_POLY_PTRS(poly1, poly2); // number of output coefficients, and a rough upper bound on the number // of limbs needed for each one unsigned long length = poly1->length + poly2->length - 1; unsigned long limbs = mpz_poly_product_max_limbs(poly1, poly2); // allocate scratch space for lower-level karatsuba routine mpz_t* scratch = (mpz_t*) flint_stack_alloc_bytes((length+1) * sizeof(mpz_t)); for (unsigned long i = 0; i <= length; i++) mpz_init2(scratch[i], limbs * FLINT_BITS); // look up crossover parameter (i.e. when to switch from classical to // karatsuba multiplication) based on coefficient size unsigned long crossover = _mpz_poly_mul_karatsuba_crossover(limbs/2); if (res == poly1 || res == poly2) { // output is inplace, so need a temporary mpz_poly_t temp; mpz_poly_init3(temp, length, FLINT_BITS*limbs); _mpz_poly_mul_kara_recursive( temp->coeffs, poly1->coeffs, poly1->length, poly2->coeffs, poly2->length, scratch, 1, crossover); mpz_poly_swap(temp, res); mpz_poly_clear(temp); } else { // output not inplace // allocate more coefficients if necessary mpz_poly_ensure_alloc(res, length); _mpz_poly_mul_kara_recursive( res->coeffs, poly1->coeffs, poly1->length, poly2->coeffs, poly2->length, scratch, 1, crossover); } res->length = length; for (unsigned long i = 0; i <= length; i++) mpz_clear(scratch[i]); flint_stack_release(); } void mpz_poly_mul_SS(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) { abort(); } void mpz_poly_sqr_SS(mpz_poly_t res, mpz_poly_t poly) { abort(); } void mpz_poly_sqr_karatsuba(mpz_poly_t res, mpz_poly_t poly) { abort(); } // ============================================================================ // // Naive KS multiplication and support routines /* Sets y = \sum_{i=0}^{len-1} x[i] * 2^(ki) */ void mpz_poly_mul_naive_KS_pack(mpz_t y, mpz_t* x, unsigned long len, unsigned long k) { if (len == 1) mpz_set(y, x[0]); else { mpz_t temp; mpz_init(temp); unsigned long half = len/2; mpz_poly_mul_naive_KS_pack(temp, x, half, k); mpz_poly_mul_naive_KS_pack(y, x + half, len - half, k); mpz_mul_2exp(y, y, half*k); mpz_add(y, y, temp); mpz_clear(temp); } } /* Inverse operation of mpz_poly_mul_naive_KS_pack (note: y is destroyed) */ void mpz_poly_mul_naive_KS_unpack(mpz_t* x, unsigned long len, mpz_t y, unsigned long k) { if (len == 1) mpz_set(x[0], y); else { mpz_t temp; mpz_init(temp); unsigned long half = len/2; if (mpz_tstbit(y, k*half - 1)) { mpz_cdiv_q_2exp(temp, y, half*k); mpz_cdiv_r_2exp(y, y, half*k); } else { mpz_fdiv_q_2exp(temp, y, half*k); mpz_fdiv_r_2exp(y, y, half*k); } mpz_poly_mul_naive_KS_unpack(x, half, y, k); mpz_poly_mul_naive_KS_unpack(x + half, len - half, temp, k); mpz_clear(temp); } } /* Counts maximum number of bits in abs(x->coeffs[i]) todo: isn't this subsumed into mpz_poly_max_bits()? */ unsigned long mpz_poly_mul_naive_KS_get_max_bits(mpz_poly_t x) { unsigned long bits = 0, temp, i; for (i = 0; i < x->length; i++) { temp = mpz_sizeinbase(x->coeffs[i], 2); if (temp > bits) bits = temp; } return bits; } void mpz_poly_mul_naive_KS(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) { if (poly1 == poly2) { mpz_poly_sqr_naive_KS(res, poly1); return; } if (!poly1->length || !poly2->length) { // one of the polys is zero res->length = 0; return; } mpz_t z1; mpz_t z2; mpz_init(z1); mpz_init(z2); unsigned long out_len = poly1->length + poly2->length - 1; unsigned long bits1 = mpz_poly_mul_naive_KS_get_max_bits(poly1); unsigned long bits2 = mpz_poly_mul_naive_KS_get_max_bits(poly2); unsigned long bits = bits1 + bits2 + 1 + ceil_log2(FLINT_MIN(poly1->length, poly2->length)); mpz_poly_mul_naive_KS_pack(z1, poly1->coeffs, poly1->length, bits); mpz_poly_mul_naive_KS_pack(z2, poly2->coeffs, poly2->length, bits); mpz_mul(z1, z1, z2); mpz_poly_ensure_alloc(res, out_len); mpz_poly_mul_naive_KS_unpack(res->coeffs, out_len, z1, bits); res->length = out_len; mpz_clear(z1); mpz_clear(z2); } void mpz_poly_sqr_naive_KS(mpz_poly_t res, mpz_poly_t poly) { if (!poly->length) { // poly is zero res->length = 0; return; } mpz_t z; mpz_init(z); unsigned long out_len = 2*poly->length - 1; unsigned long bits = 2 * mpz_poly_mul_naive_KS_get_max_bits(poly) + 1 + ceil_log2(poly->length); mpz_poly_mul_naive_KS_pack(z, poly->coeffs, poly->length, bits); mpz_mul(z, z, z); mpz_poly_ensure_alloc(res, out_len); mpz_poly_mul_naive_KS_unpack(res->coeffs, out_len, z, bits); res->length = out_len; mpz_clear(z); } /**************************************************************************** Polynomial division ****************************************************************************/ /* Input is a monic polynomial "poly" of degree n, and a nonzero polynomial Q1 of degree k1 such that x^(k1+n) = poly*Q1 + R where deg(R) < n. Output is a nonzero polynomial Q2 of degree k2 such that x^(k2+n) = poly*Q2 + S where deg(S) < n. PRECONDITIONS: k2 >= k1 poly and Q1 must be normalised Q1, Q2, poly must not alias each other */ void mpz_poly_monic_inverse_newton_extend( mpz_poly_t Q2, mpz_poly_t Q1, mpz_poly_t poly, unsigned long k2) { FLINT_ASSERT(poly != Q1); FLINT_ASSERT(poly != Q2); FLINT_ASSERT(Q1 != Q2); FLINT_ASSERT(mpz_poly_normalised(poly)); FLINT_ASSERT(mpz_poly_normalised(Q1)); FLINT_ASSERT(Q1->length >= 1); unsigned long k1 = Q1->length - 1; FLINT_ASSERT(k2 >= k1); unsigned long n = poly->length - 1; if (k2 <= 2*k1) { // only one newton iteration is needed // temp := top k2+1 coefficients of Q1^2 mpz_poly_t temp; mpz_poly_init(temp); mpz_poly_sqr(temp, Q1); mpz_poly_rshift(temp, temp, temp->length - (k2+1)); // temp := top k2+1 coefficients of Q1^2*poly if (poly->length > k2+1) { // first get top k2+1 coefficients of poly mpz_poly_t top; mpz_poly_init(top); mpz_poly_rshift(top, poly, poly->length - (k2+1)); // now get top k2+1 coefficients of Q1^2*poly mpz_poly_mul(temp, temp, top); mpz_poly_rshift(temp, temp, temp->length - (k2+1)); mpz_poly_clear(top); } else { mpz_poly_mul(temp, temp, poly); mpz_poly_rshift(temp, temp, temp->length - (k2+1)); } // Q2 = top k2+1 coefficients of 2*Q1*x^(k1+n) - Q1^2*poly mpz_poly_ensure_alloc(Q2, k2+1); mpz_t x; mpz_init(x); unsigned long i; for (i = 0; i <= k1; i++) { mpz_add(x, Q1->coeffs[k1-i], Q1->coeffs[k1-i]); mpz_sub(Q2->coeffs[k2-i], x, temp->coeffs[k2-i]); } for (; i <= k2; i++) { mpz_neg(Q2->coeffs[k2-i], temp->coeffs[k2-i]); } Q2->length = k2+1; mpz_clear(x); mpz_poly_clear(temp); } else { // more than one newton iteration is needed, so recurse mpz_poly_t temp; mpz_poly_init(temp); mpz_poly_monic_inverse_newton_extend(temp, Q1, poly, (k2+1)/2); mpz_poly_monic_inverse_newton_extend(Q2, temp, poly, k2); mpz_poly_clear(temp); } } void mpz_poly_monic_inverse(mpz_poly_t res, mpz_poly_t poly, unsigned long k) { // todo: remove the following restrictions FLINT_ASSERT(k >= 2); FLINT_ASSERT(poly->length >= 2); FLINT_ASSERT(poly != res); // if poly is x^n + a*x^(n-1) + ..., then first approximation // to res is given by x - a mpz_poly_t temp; mpz_poly_init(temp); mpz_poly_pad(temp, 2); mpz_set_ui(temp->coeffs[1], 1); mpz_neg(temp->coeffs[0], poly->coeffs[poly->length-2]); temp->length = 2; // extend the approximation using newton's method mpz_poly_monic_inverse_newton_extend(res, temp, poly, k); mpz_poly_clear(temp); } void mpz_poly_pseudo_inverse(mpz_poly_t res, mpz_poly_t poly, unsigned long k) { abort(); } void mpz_poly_monic_div(mpz_poly_t quot, mpz_poly_t poly1, mpz_poly_t poly2) { abort(); } void mpz_poly_pseudo_div(mpz_poly_t quot, mpz_poly_t poly1, mpz_poly_t poly2) { abort(); } void mpz_poly_monic_rem(mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) { abort(); } void mpz_poly_pseudo_rem(mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) { abort(); } void mpz_poly_monic_div_rem(mpz_poly_t quot, mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) { abort(); } void mpz_poly_pseudo_div_rem(mpz_poly_t quot, mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) { abort(); } void mpz_poly_monic_inverse_naive(mpz_poly_t res, mpz_poly_t poly, unsigned long k) { abort(); } void mpz_poly_pseudo_inverse_naive(mpz_poly_t res, mpz_poly_t poly, unsigned long k) { abort(); } void mpz_poly_monic_div_naive(mpz_poly_t quot, mpz_poly_t poly1, mpz_poly_t poly2) { abort(); } void mpz_poly_pseudo_div_naive(mpz_poly_t quot, mpz_poly_t poly1, mpz_poly_t poly2) { abort(); } void mpz_poly_monic_rem_naive(mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) { abort(); } void mpz_poly_pseudo_rem_naive(mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) { abort(); } void mpz_poly_monic_div_rem_naive(mpz_poly_t quot, mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) { abort(); } void mpz_poly_pseudo_div_rem_naive(mpz_poly_t quot, mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) { abort(); } /**************************************************************************** GCD and extended GCD ****************************************************************************/ void mpz_poly_content(mpz_t x, mpz_poly_t poly) { abort(); } unsigned long mpz_poly_content_ui(mpz_poly_t poly) { abort(); } void mpz_poly_gcd(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) { abort(); } void mpz_poly_xgcd(mpz_poly_t res, mpz_poly_t a, mpz_poly_t b, mpz_poly_t poly1, mpz_poly_t poly2) { abort(); } /**************************************************************************** Miscellaneous ****************************************************************************/ unsigned long mpz_poly_max_limbs(mpz_poly_t poly) { if (!poly->length) return 0; unsigned long temp, limbs = mpz_size(poly->coeffs[0]); for (unsigned long i = 1; i < poly->length; i++) { temp = mpz_size(poly->coeffs[i]); if (temp > limbs) limbs = temp; } return limbs; } unsigned long mpz_poly_max_bits(mpz_poly_t poly) { abort(); } unsigned long mpz_poly_product_max_limbs(mpz_poly_t poly1, mpz_poly_t poly2) { unsigned long limbs1 = mpz_poly_max_limbs(poly1); unsigned long limbs2 = mpz_poly_max_limbs(poly2); // we're assuming poly lengths are at most 2^FLINT_BITS return limbs1 + limbs2 + 1; } unsigned long mpz_poly_product_max_bits(mpz_poly_t poly1, mpz_poly_t poly2) { unsigned long bits1 = mpz_poly_max_bits(poly1); unsigned long bits2 = mpz_poly_max_bits(poly2); return bits1 + bits2 + ceil_log2(FLINT_MAX(poly1->length, poly2->length)); } // *************** end of file flint-1.011/expmod.c0000644017361200017500000000416011025357254014164 0ustar tabbotttabbott/*============================================================================ Copyright (C) 2007, William Hart This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ #include #include #include "mpz_extras.h" /*int main(void) { mpz_t a, exp, m, res; mpz_init(a); mpz_init(exp); mpz_init(m); mpz_init(res); printf("Enter a: "); gmp_scanf("%Zd", a); printf("Enter exponent: "); gmp_scanf("%Zd", exp); printf("Enter modulus: "); gmp_scanf("%Zd", m); F_mpz_expmod_mont(res, a, exp, m); gmp_printf("a^exp mod m is %Zd\n", res); mpz_clear(a); mpz_clear(exp); mpz_clear(m); mpz_clear(res); }*/ int main(void) { mpz_t a, exp, temp, p, res; mpz_init(a); mpz_init(exp); mpz_init(p); mpz_init(temp); mpz_init(res); mpz_set_ui(p, 1); mpz_mul_2exp(p, p, 29440); mpz_set_ui(temp, 1); mpz_mul_2exp(temp, temp, 27392); mpz_sub(p, p, temp); mpz_add_ui(p, p, 1); mpz_set_ui(a, 1); mpz_mul_2exp(a, a, 32); mpz_sub_ui(exp, p, 1); mpz_fdiv_q_2exp(exp, exp, 10); F_mpz_expmod_BZ(res, a, exp, p); //F_mpz_expmod_mont(res, a, exp, p); //mpz_powm(res, a, exp, p); mpz_clear(a); mpz_clear(exp); mpz_clear(p); mpz_init(temp); mpz_clear(res); } flint-1.011/bernoulli_fmpz.c0000644017361200017500000002354511025357254015727 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** bernoulli_fmpz.c: Finds Bernoulli numbers B_{2k} Based on the implementation in SAGE written by David Harvey Uses fmpz_poly polynomials for calculation. Copyright (C) 2007, David Howden *****************************************************************************/ #include #include #include #include #include "flint.h" #include "long_extras.h" #include "fmpz_poly.h" #define TRUE 1; #define FALSE 0; /* Debugging function */ // void print_var(char *name, unsigned long value) // { // printf("%s = %d\n", name, value); // } /* Computes the bernoulli numbers B_0, B_2, ..., B_{p-3} for prime p Requires that res be allocated for (p-1)/2 unsigned longs which will hold the result. If returns 0, then the factoring of p has failed, otherwise will always return 1. */ int bernoulli_mod_p(unsigned long *res, unsigned long p) { FLINT_ASSERT(p > 2); FLINT_ASSERT(z_isprime(p) == 1); unsigned long g, g_inv, g_sqr, g_sqr_inv; double p_inv = z_precompute_inverse(p); g = z_primitive_root(p); if(!g) { return FALSE; } g_inv = z_invert(g, p); g_sqr = z_mulmod_precomp(g, g, p, p_inv); g_sqr_inv = z_mulmod_precomp(g_inv, g_inv, p, p_inv); unsigned long poly_size = (p-1)/2; int is_odd = poly_size % 2; unsigned long g_power, g_power_inv; g_power = g_inv; g_power_inv = 1; // constant is (g-1)/2 mod p unsigned long constant; if(g % 2) { constant = (g-1)/2; } else { constant = (g+p-1)/2; } // fudge holds g^{i^2}, fudge_inv holds g^{-i^2} unsigned long fudge, fudge_inv; fudge = fudge_inv = 1; // compute the polynomials F(X) and G(X) fmpz_poly_t F, G; fmpz_poly_init2(F, poly_size, 1); fmpz_poly_init2(G, poly_size, 1); unsigned long i, temp, h; for(i = 0; i < poly_size; i++) { // compute h(g^i)/g^i (h(x) is as in latex notes) temp = g * g_power; h = z_mulmod_precomp(p + constant - (temp / p), g_power_inv, p, p_inv); g_power = z_mod_precomp(temp, p, p_inv); g_power_inv = z_mulmod_precomp(g_power_inv, g_inv, p, p_inv); // store coefficient g^{i^2} h(g^i)/g^i fmpz_poly_set_coeff_ui(G, i, z_mulmod_precomp(h, fudge, p, p_inv)); fmpz_poly_set_coeff_ui(F, i, fudge_inv); // update fudge and fudge_inv fudge = z_mulmod_precomp(z_mulmod_precomp(fudge, g_power, p, p_inv), z_mulmod_precomp(g_power, g, p, p_inv), p, p_inv); fudge_inv = z_mulmod_precomp(z_mulmod_precomp(fudge_inv, g_power_inv, p, p_inv), z_mulmod_precomp(g_power_inv, g, p, p_inv), p, p_inv); } fmpz_poly_set_coeff_ui(F, 0, 0); // step 2: multiply the polynomials... fmpz_poly_t product; fmpz_poly_init(product); fmpz_poly_mul(product, G, F); // step 3: assemble the result... unsigned long g_sqr_power, value; g_sqr_power = g_sqr; fudge = g; res[0] = 1; mpz_t value_coeff; mpz_init(value_coeff); unsigned long value_coeff_ui; unsigned long value2; // know that there are either 1 limbs per coeff or 2 limbs per coeff (since we have a limit on p) //if(_fmpz_poly_limbs(product) == 1) //{ for(i = 1; i < poly_size; i++) { fmpz_poly_get_coeff_mpz(value_coeff, product, i + poly_size); //value = z_mod_precomp(_fmpz_poly_get_coeff_ui(product, i+poly_size), p, p_inv); //value_coeff_ui = z_mod_precomp(_fmpz_poly_get_coeff_ui(product, i), p, p_inv); value = mpz_fdiv_ui(value_coeff, p); //if(value != value2) printf("ERROR!!!! %d != %d\n", value, value2); fmpz_poly_get_coeff_mpz(value_coeff, product, i); if(is_odd) { value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + p - value, p, p_inv); //value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + value_coeff_ui + p - value, p, p_inv); } else { value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + value, p, p_inv); //value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + value_coeff_ui + value, p, p_inv); } value = z_mulmod_precomp(z_mulmod_precomp(z_mulmod_precomp(4, i, p, p_inv), fudge, p, p_inv), value, p, p_inv); value = z_mulmod_precomp(value, z_invert(p+1-g_sqr_power, p), p, p_inv); res[i] = value; g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); fudge = z_mulmod_precomp(fudge, g_sqr_power, p, p_inv); g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); } // } // else // { // for(i = 1; i < poly_size; i++) // { // //fmpz_poly_get_coeff_mpz(value_coeff, product, i + poly_size); // // value2 = z_ll_mod_precomp(product->coeffs[i+poly_size], product->coeffs[i+poly_size + 1], p, p_inv); // value_coeff_ui = z_ll_mod_precomp(product->coeffs[i], product->coeffs[i+1], p, p_inv); // // //value = mpz_fdiv_ui(value_coeff, p); // //if(value != value2) printf("ERROR!!!! %d != %d\n", value, value2); // // // //fmpz_poly_get_coeff_mpz(value_coeff, product, i); // if(is_odd) // { // //value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + p - value, p, p_inv); // value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + value_coeff_ui + p - value, p, p_inv); // } // else // { // //value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + value, p, p_inv); // value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + value_coeff_ui + value, p, p_inv); // } // // value = z_mulmod_precomp(z_mulmod_precomp(z_mulmod_precomp(4, i, p, p_inv), fudge, p, p_inv), value, p, p_inv); // // value = z_mulmod_precomp(value, z_invert(p+1-g_sqr_power, p), p, p_inv); // // res[i] = value; // // g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); // fudge = z_mulmod_precomp(fudge, g_sqr_power, p, p_inv); // g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); // } // // } mpz_clear(value_coeff); fmpz_poly_clear(F); fmpz_poly_clear(G); fmpz_poly_clear(product); return TRUE; } /* Verifies that the ouput of bernoulli_mod_p above is correct. Takes the result from bernoulli_mod_p (res - an array of (p-1)/2 unsigned longs), and the prime p. Returns 0 if res is incorrect, 1 if res is correct. */ int verify_bernoulli_mod_p(unsigned long *res, unsigned long p) { unsigned long N, i, product, sum, value, element; double p_inv; N = (p-1)/2; product = 1; sum = 0; p_inv = z_precompute_inverse(p); for(i = 0; i < N; i++) { element = res[i]; // if((signed long)element < 0) // { // printf("NEGATIVE NUMBER!!!!!\n"); // } // if(element > p) // { // printf("OVERFLOW!!!!!\n"); // } value = z_mulmod_precomp(z_mulmod_precomp(product, 2*i+1, p, p_inv), element, p, p_inv); sum = z_mod_precomp(sum + value, p, p_inv); product = z_mulmod_precomp(product, 4, p, p_inv); } if(z_mod_precomp(sum + 2, p, p_inv)) { return FALSE; } // i = 0; // printf("Error occurred, output:\n"); // while (i < N) // { // printf("%d\n", res[i]); // i++; // } return TRUE; } /* Test function for bernoulli_mod_p Calculates bernoulli_mod_p for the prime p and verifies the result. Returs 0 if incorrect, and 1 if correct. */ int test_bernoulli_mod_p(unsigned long p) { unsigned long *res = (unsigned long*) flint_stack_alloc((p-1)/2); if(!bernoulli_mod_p(res, p)) { printf("Could not factor p = %d\n", p); flint_stack_release(); return FALSE; } int result = verify_bernoulli_mod_p(res, p); flint_stack_release(); return result; } int main (int argc, char const *argv[]) { if (argc == 2) { unsigned long n = atoi(argv[1]); n = z_nextprime(n); printf("Computing bernoulli_mod_p(%ld)... ", n); if (!test_bernoulli_mod_p(n)) { printf("Failed\n"); } else { printf("Done\n"); } return 0; } unsigned long p = 2; unsigned long tests = 1000; unsigned long fail = 0; for(unsigned long i = 0; i < tests; i++) { p = z_nextprime(p); //if(p == 2053) return; if(!test_bernoulli_mod_p(p)) { printf("Fails on p = %d\n", p); fail++; } else { printf("Works on p = %d\n", p); } } printf("\nResults: %d OK, %d FAILED.\n", tests - fail, fail); return 0; } flint-1.011/ZmodF_mul-profile.c0000644017361200017500000001734011025357254016226 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** ZmodF_mul-profile.c Profiling for ZmodF_mul Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #include "profiler-main.h" #include "ZmodF_mul.h" #include "flint.h" #include #include /* This is a helper function used by the other sampler functions below. */ void sample_ZmodF_mul_helper(ZmodF_mul_info_t info, unsigned long n, unsigned long count) { mp_limb_t* x1 = (mp_limb_t*) malloc((n+1) * sizeof(mp_limb_t)); mp_limb_t* x2 = (mp_limb_t*) malloc((n+1) * sizeof(mp_limb_t)); mp_limb_t* x3 = (mp_limb_t*) malloc((n+1) * sizeof(mp_limb_t)); profiler_random_limbs(x1, n); x1[n] = 0; profiler_random_limbs(x2, n); x2[n] = 0; prof_start(); for (unsigned long i = 0; i < count; i++) ZmodF_mul_info_mul(info, x3, x1, x2); prof_stop(); free(x3); free(x2); free(x1); } /* This is a helper function used by the other sampler functions below. */ void sample_ZmodF_sqr_helper(ZmodF_mul_info_t info, unsigned long n, unsigned long count) { mp_limb_t* x1 = (mp_limb_t*) malloc((n+1) * sizeof(mp_limb_t)); mp_limb_t* x3 = (mp_limb_t*) malloc((n+1) * sizeof(mp_limb_t)); profiler_random_limbs(x1, n); x1[n] = 0; prof_start(); for (unsigned long i = 0; i < count; i++) ZmodF_mul_info_mul(info, x3, x1, x1); prof_stop(); free(x3); free(x1); } // ============================================================================ void sample_ZmodF_mul_plain(unsigned long n, void* arg, unsigned long count) { ZmodF_mul_info_t info; ZmodF_mul_info_init_plain(info, n, 0); sample_ZmodF_mul_helper(info, n, count); ZmodF_mul_info_clear(info); } char* profDriverString_ZmodF_mul_plain(char* params) { return "ZmodF_mul using plain algorithm.\n" "Parameters: n_min, n_max, n_skip.\n"; } char* profDriverDefaultParams_ZmodF_mul_plain() { return "1 1000 1"; } void profDriver_ZmodF_mul_plain(char* params) { unsigned long n_min, n_max, n_skip; sscanf(params, "%ld %ld %ld", &n_min, &n_max, &n_skip); prof1d_set_sampler(sample_ZmodF_mul_plain); for (unsigned long n = n_min; n <= n_max; n += n_skip) prof1d_sample(n, NULL); } // ============================================================================ void sample_ZmodF_mul_threeway(unsigned long n, void* arg, unsigned long count) { ZmodF_mul_info_t info; ZmodF_mul_info_init_threeway(info, n, 0); sample_ZmodF_mul_helper(info, n, count); ZmodF_mul_info_clear(info); } char* profDriverString_ZmodF_mul_threeway(char* params) { return "ZmodF_mul using threeway algorithm.\n" "Parameters: n_min, n_max, n_skip.\n" "Note: n not divisible by 3 are skipped.\n"; } char* profDriverDefaultParams_ZmodF_mul_threeway() { return "1 1000 1"; } void profDriver_ZmodF_mul_threeway(char* params) { unsigned long n_min, n_max, n_skip; sscanf(params, "%ld %ld %ld", &n_min, &n_max, &n_skip); prof1d_set_sampler(sample_ZmodF_mul_threeway); // round up n_min so we start on a permissible value while (n_min % 3) n_min++; for (unsigned long n = n_min; n <= n_max; n += n_skip) { if (n % 3 == 0) prof1d_sample(n, NULL); } } // ============================================================================ void sample_ZmodF_mul_auto(unsigned long n, void* arg, unsigned long count) { ZmodF_mul_info_t info; ZmodF_mul_info_init(info, n, 0); sample_ZmodF_mul_helper(info, n, count); ZmodF_mul_info_clear(info); } char* profDriverString_ZmodF_mul_auto(char* params) { return "ZmodF_mul using automatically selected algorithm.\n" "Parameters: n_min, n_max, n_skip.\n"; } char* profDriverDefaultParams_ZmodF_mul_auto() { return "1 1000 1"; } void profDriver_ZmodF_mul_auto(char* params) { unsigned long n_min, n_max, n_skip; sscanf(params, "%ld %ld %ld", &n_min, &n_max, &n_skip); prof1d_set_sampler(sample_ZmodF_mul_auto); for (unsigned long n = n_min; n <= n_max; n += n_skip) prof1d_sample(n, NULL); } // ============================================================================ void sample_ZmodF_sqr_plain(unsigned long n, void* arg, unsigned long count) { ZmodF_mul_info_t info; ZmodF_mul_info_init_plain(info, n, 1); sample_ZmodF_sqr_helper(info, n, count); ZmodF_mul_info_clear(info); } char* profDriverString_ZmodF_sqr_plain(char* params) { return "ZmodF_sqr using plain algorithm.\n" "Parameters: n_min, n_max, n_skip.\n"; } char* profDriverDefaultParams_ZmodF_sqr_plain() { return "1 1000 1"; } void profDriver_ZmodF_sqr_plain(char* params) { unsigned long n_min, n_max, n_skip; sscanf(params, "%ld %ld %ld", &n_min, &n_max, &n_skip); prof1d_set_sampler(sample_ZmodF_sqr_plain); for (unsigned long n = n_min; n <= n_max; n += n_skip) prof1d_sample(n, NULL); } // ============================================================================ void sample_ZmodF_sqr_threeway(unsigned long n, void* arg, unsigned long count) { ZmodF_mul_info_t info; ZmodF_mul_info_init_threeway(info, n, 1); sample_ZmodF_sqr_helper(info, n, count); ZmodF_mul_info_clear(info); } char* profDriverString_ZmodF_sqr_threeway(char* params) { return "ZmodF_sqr using threeway algorithm.\n" "Parameters: n_min, n_max, n_skip.\n" "Note: n not divisible by 3 are skipped.\n"; } char* profDriverDefaultParams_ZmodF_sqr_threeway() { return "1 1000 1"; } void profDriver_ZmodF_sqr_threeway(char* params) { unsigned long n_min, n_max, n_skip; sscanf(params, "%ld %ld %ld", &n_min, &n_max, &n_skip); prof1d_set_sampler(sample_ZmodF_sqr_threeway); // round up n_min so we start on a permissible value while (n_min % 3) n_min++; for (unsigned long n = n_min; n <= n_max; n += n_skip) { if (n % 3 == 0) prof1d_sample(n, NULL); } } // ============================================================================ void sample_ZmodF_sqr_auto(unsigned long n, void* arg, unsigned long count) { ZmodF_mul_info_t info; ZmodF_mul_info_init(info, n, 1); sample_ZmodF_sqr_helper(info, n, count); ZmodF_mul_info_clear(info); } char* profDriverString_ZmodF_sqr_auto(char* params) { return "ZmodF_sqr using automatically selected algorithm.\n" "Parameters: n_min, n_max, n_skip.\n"; } char* profDriverDefaultParams_ZmodF_sqr_auto() { return "1 1000 1"; } void profDriver_ZmodF_sqr_auto(char* params) { unsigned long n_min, n_max, n_skip; sscanf(params, "%ld %ld %ld", &n_min, &n_max, &n_skip); prof1d_set_sampler(sample_ZmodF_sqr_auto); for (unsigned long n = n_min; n <= n_max; n += n_skip) prof1d_sample(n, NULL); } // end of file **************************************************************** flint-1.011/fmpz_poly.c0000644017361200017500000103051411025357254014712 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** fmpz_poly.c: Polynomials over Z, implemented as contiguous block of fmpz_t's Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #include #include #include #include "mpz_poly.h" #include "flint.h" #include "fmpz.h" #include "fmpz_poly.h" #include "mpn_extras.h" #include "longlong_wrapper.h" #include "longlong.h" #include "memory-manager.h" #include "ZmodF_poly.h" #include "long_extras.h" #include "zmod_poly.h" /**************************************************************************** Conversion Routines ****************************************************************************/ /* Convert length coefficients of an fmpz_poly_t to an already initialised ZmodF_poly_t. Each coefficient will be represented mod p = 2^Bn+1 where n is given by the field n of the ZmodF_poly_t. Coefficients will be assumed to be in the range [-p/2, p/2]. Assumes 0 < length <= poly_fmpz->length */ long fmpz_poly_to_ZmodF_poly(ZmodF_poly_t poly_f, const fmpz_poly_t poly_fmpz, const unsigned long length) { unsigned long size_f = poly_f->n + 1; unsigned long size_m = poly_fmpz->limbs+1; mp_limb_t * coeffs_m = poly_fmpz->coeffs; ZmodF_t * coeffs_f = poly_f->coeffs; unsigned long mask = -1L; long bits = 0; long limbs = 0; long sign = 1; long size_j; for (unsigned long i = 0, j = 0; i < length; i++, j += size_m) { size_j = coeffs_m[j]; if ((long) size_j < 0) sign = -1L; if (ABS(size_j) > limbs + 1) { limbs = ABS(size_j) - 1; bits = FLINT_BIT_COUNT(coeffs_m[j+ABS(size_j)]); if (bits == FLINT_BITS) mask = 0L; else mask = -1L - ((1L<length = length; return sign*(FLINT_BITS*limbs+bits); } /* Convert a ZmodF_poly_t to an fmpz_poly_t. Coefficients will be taken to be in the range [-p/2, p/2] where p = 2^nB+1. Assumes 0 < poly_f->length */ void ZmodF_poly_to_fmpz_poly(fmpz_poly_t poly_fmpz, const ZmodF_poly_t poly_f, const long sign) { unsigned long n = poly_f->n; unsigned long size_m = poly_fmpz->limbs+1; unsigned long limbs = FLINT_MIN(n, size_m-1); mp_limb_t * coeffs_m = poly_fmpz->coeffs; ZmodF_t * coeffs_f = poly_f->coeffs; if (sign) { for (unsigned long i = 0, j = 0; i < poly_f->length; i++, j += size_m) { ZmodF_normalise(coeffs_f[i], n); if (coeffs_f[i][n-1]>>(FLINT_BITS-1) || coeffs_f[i][n]) { F_mpn_negate(coeffs_m + j + 1, coeffs_f[i], limbs); mpn_add_1(coeffs_m + j + 1, coeffs_m + j + 1, limbs, 1L); coeffs_m[j] = -limbs; NORM(coeffs_m + j); } else { F_mpn_copy(coeffs_m + j + 1, coeffs_f[i], limbs); coeffs_m[j] = limbs; NORM(coeffs_m + j); } } } else { for (unsigned long i = 0, j = 0; i < poly_f->length; i++, j += size_m) { ZmodF_normalise(coeffs_f[i], n); F_mpn_copy(coeffs_m + j + 1, coeffs_f[i], limbs); coeffs_m[j] = limbs; NORM(coeffs_m + j); } } poly_fmpz->length = poly_f->length; _fmpz_poly_normalise(poly_fmpz); } static inline long __get_next_coeff(const mp_limb_t * coeff_m, long * borrow, long * coeff, const long mask, const long negate) { if ((long) coeff_m[0] == 0) *coeff = -*borrow; else if ((((long) coeff_m[0]) ^ negate) >= 0L) *coeff = coeff_m[1] - *borrow; else *coeff = (-coeff_m[1] - *borrow); *borrow = 0UL; if (*coeff < 0) { *borrow = 1UL; } *coeff&=mask; return *coeff; } static inline long __get_next_coeff_unsigned(const mp_limb_t * coeff_m, long * coeff) { if ((long) coeff_m[0] == 0) *coeff = 0; else *coeff = coeff_m[1]; return *coeff; } void fmpz_poly_bit_pack(ZmodF_poly_t poly_f, const fmpz_poly_t poly_fmpz, const unsigned long bundle, const long bitwidth, const unsigned long length, const long negate) { unsigned long i, k, skip; unsigned long n = poly_f->n; mp_limb_t * coeff_m = poly_fmpz->coeffs; mp_limb_t * array, * next_point; unsigned long temp; half_ulong lower; long coeff; long borrow; mp_limb_t extend; long bits = bitwidth; int sign = (bits < 0); if (sign) bits = ABS(bits); unsigned long coeffs_per_limb = FLINT_BITS/bits; const unsigned long mask = (1UL<length = 0; i=0; while (coeff_m < poly_fmpz->coeffs + 2*length) { k=0; skip=0; coeff = 0; borrow = 0L; temp = 0; array = poly_f->coeffs[i]; i++; next_point = coeff_m + 2*bundle; if (next_point >= poly_fmpz->coeffs + 2*length) next_point = poly_fmpz->coeffs + 2*length; else for (unsigned long j = 0; j < n; j += 8) FLINT_PREFETCH(poly_f->coeffs[i+1], j); while (coeff_m < next_point) { if ((unsigned long)coeff_m&7 == 0) FLINT_PREFETCH(coeff_m,64); // k is guaranteed to be less than FLINT_BITS at this point while ((kFLINT_BITS) { // if k > FLINT_BITS write out a whole limb and read in remaining bits of coeff array[skip] = temp; skip++; temp=(coeff>>(bits+FLINT_BITS-k)); k=(k-FLINT_BITS); // k < HALF_FLINT_BITS } else { // k <= FLINT_BITS if (k >= HALF_FLINT_BITS) { // if k >= HALF_FLINT_BITS store bottom HALF_FLINT_BITS bits lower = (half_ulong)temp; k-=HALF_FLINT_BITS; temp>>=HALF_FLINT_BITS; // k is now <= HALF_FLINT_BITS while ((kFLINT_BITS) { // if k > FLINT_BITS, write out bottom HALF_FLINT_BITS bits (along with HALF_FLINT_BITS bits from lower) // read remaining bits from coeff and reduce k by HALF_FLINT_BITS array[skip] = (temp<>=HALF_FLINT_BITS; temp+=((coeff>>(bits+FLINT_BITS-k))<= HALF_FLINT_BITS) { // k <= FLINT_BITS // if k >= HALF_FLINT_BITS write out bottom HALF_FLINT_BITS bits (along with lower) // and reduce k by HALF_FLINT_BITS k-=HALF_FLINT_BITS; array[skip] = (temp<>=HALF_FLINT_BITS; skip++; // k is now less than or equal to HALF_FLINT_BITS and we are now ready to read // the next coefficient if there is one } else { // k < HALF_FLINT_BITS // there isn't enough to write out a whole FLINT_BITS bits, so put it all // together in temp temp = (temp<length++; } // while // sign extend the last FLINT_BITS bits we write out if (skip < n) { if (borrow) temp+= (-1UL<coeffs[0][i]); } printf("\n"); #endif } void fmpz_poly_bit_unpack(fmpz_poly_t poly_fmpz, const ZmodF_poly_t poly_f, const unsigned long bundle, const unsigned long bits) { unsigned long k, skip; unsigned long temp2; unsigned long temp; unsigned long full_limb; unsigned long carry; mp_limb_t* array; const unsigned long mask = (1UL<coeffs; mp_limb_t * next_point; unsigned long size_m = poly_fmpz->limbs+1; unsigned long n = poly_f->n; #if DEBUG for (unsigned long i = 0; i < n+1; i++) { printf("%lx ",poly_f->coeffs[0][i]); } printf("\n"); #endif for (unsigned long i = 0; coeff_m < poly_fmpz->coeffs + poly_fmpz->length*size_m; i++) { array = poly_f->coeffs[i]; ZmodF_normalise(array, n); k=0; skip=0; carry = 0UL; temp2 = 0; next_point = coeff_m + size_m*bundle; if (next_point >= poly_fmpz->coeffs + poly_fmpz->length*size_m) next_point = poly_fmpz->coeffs + poly_fmpz->length*size_m; else for (unsigned long j = 0; j < n; j += 8) FLINT_PREFETCH(poly_f->coeffs[i+1], j); while (coeff_m < next_point) { // read in a full limb full_limb = array[skip]; temp2 += l_shift(full_limb,k); s=FLINT_BITS-k; k+=s; while ((k >= bits)&&(coeff_m < next_point)) { if (!(temp2&sign_mask)) { fmpz_add_ui_inplace(coeff_m, (temp2&mask)+carry); carry = 0UL; } else { temp = ((-temp2)&mask)-carry; fmpz_sub_ui_inplace(coeff_m, temp); carry = 1UL; } coeff_m += size_m; temp2>>=bits; k-=bits; } // k is now less than bits // read in remainder of full_limb temp2 += l_shift(r_shift(full_limb,s),k); k+=(FLINT_BITS-s); while ((k >= bits)&&(coeff_m < next_point)) { if (!(temp2&sign_mask)) { fmpz_add_ui_inplace(coeff_m, (temp2&mask)+carry); carry = 0UL; } else { temp = ((-temp2)&mask)-carry; fmpz_sub_ui_inplace(coeff_m, temp); carry = 1UL; } coeff_m += size_m; temp2>>=bits; k-=bits; } // k is now less than bits skip++; } } _fmpz_poly_normalise(poly_fmpz); } void fmpz_poly_bit_unpack_unsigned(fmpz_poly_t poly_fmpz, const ZmodF_poly_t poly_f, const unsigned long bundle, const unsigned long bits) { unsigned long k, l, skip; unsigned long temp2; unsigned long temp; unsigned long full_limb; mp_limb_t* array; const unsigned long mask = (1UL<coeffs; fmpz_t next_point; unsigned long size_m = poly_fmpz->limbs+1; unsigned long n = poly_f->n; for (unsigned long i = 0; coeff_m < poly_fmpz->coeffs + poly_fmpz->length*size_m; i++) { array = poly_f->coeffs[i]; ZmodF_normalise(array, n); k=0; skip=0; temp2 = 0; next_point = coeff_m + size_m*bundle; if (next_point >= poly_fmpz->coeffs + poly_fmpz->length*size_m) next_point = poly_fmpz->coeffs + poly_fmpz->length*size_m; else for (unsigned long j = 0; j < n; j += 8) FLINT_PREFETCH(poly_f->coeffs[i+1], j); while (coeff_m < next_point) { if (skip&7 == 0) FLINT_PREFETCH(array+skip,64); // read in a full limb full_limb = array[skip]; temp2 += l_shift(full_limb,k); s=FLINT_BITS-k; k+=s; while ((k >= bits)&&(coeff_m < next_point)) { __fmpz_add_ui_inplace(coeff_m, (temp2&mask)); coeff_m += size_m; temp2>>=bits; k-=bits; } // k is now less than bits // read in remainder of full_limb temp2 += l_shift(r_shift(full_limb,s),k); k+=(FLINT_BITS-s); while ((k >= bits)&&(coeff_m < next_point)) { __fmpz_add_ui_inplace(coeff_m, temp2&mask); coeff_m += size_m; temp2>>=bits; l++; k-=bits; } // k is now less than bits skip++; } } _fmpz_poly_normalise(poly_fmpz); } void fmpz_poly_limb_pack(ZmodF_poly_t poly_f, const fmpz_poly_t poly_fmpz, const unsigned long bundle, const long limbs) { unsigned long size_m = poly_fmpz->limbs + 1; long size_j; fmpz_t coeffs_m = poly_fmpz->coeffs; fmpz_t coeffs_f = poly_f->coeffs[0]; long carry = 0; for (unsigned long i = 0, j = 0, k = 0; i < bundle; i++, j += size_m, k += limbs) { size_j = (long) coeffs_m[j]; if (size_j < 0) { F_mpn_negate(coeffs_f + k, coeffs_m + j + 1, ABS(size_j)); F_mpn_set(coeffs_f + k + ABS(size_j), limbs - ABS(size_j)); if (carry) mpn_sub_1(coeffs_f + k, coeffs_f + k, limbs, 1L); carry = 1L; } else if (size_j > 0) { F_mpn_copy(coeffs_f + k, coeffs_m + j + 1, ABS(size_j)); F_mpn_clear(coeffs_f + k + ABS(size_j), limbs - ABS(size_j)); if (carry) mpn_sub_1(coeffs_f + k, coeffs_f + k, limbs, 1L); carry = 0L; } else { if (carry) { F_mpn_set(coeffs_f + k, limbs); carry = 1L; } else { F_mpn_clear(coeffs_f + k, limbs); carry = 0L; } } } } void fmpz_poly_limb_unpack(fmpz_poly_t poly_fmpz, const ZmodF_poly_t poly_f, const unsigned long bundle, const unsigned long limbs) { unsigned long size_m = poly_fmpz->limbs + 1; unsigned long n = poly_f->n; fmpz_t coeffs_m = poly_fmpz->coeffs; fmpz_t coeffs_f = poly_f->coeffs[0]; unsigned long carry = 0L; for (unsigned long i = 0, j = 0, k = 0; i < bundle; i++, j += size_m, k += limbs) { if (carry) mpn_add_1(coeffs_f + k, coeffs_f + k, n - k, 1L); if (coeffs_f[k+limbs-1]>>(FLINT_BITS-1)) { F_mpn_negate(coeffs_m + j + 1, coeffs_f + k, limbs); coeffs_m[j] = -limbs; NORM(coeffs_m + j); carry = 1L; } else { F_mpn_copy(coeffs_m + j + 1, coeffs_f + k, limbs); coeffs_m[j] = limbs; NORM(coeffs_m + j); carry = 0L; } } _fmpz_poly_normalise(poly_fmpz); } void fmpz_poly_limb_unpack_unsigned(fmpz_poly_t poly_fmpz, const ZmodF_poly_t poly_f, const unsigned long bundle, const unsigned long limbs) { unsigned long size_m = poly_fmpz->limbs + 1; unsigned long n = poly_f->n; fmpz_t coeffs_m = poly_fmpz->coeffs; fmpz_t coeffs_f = poly_f->coeffs[0]; for (unsigned long i = 0, j = 0, k = 0; i < bundle; i++, j += size_m, k += limbs) { F_mpn_copy(coeffs_m + j + 1, coeffs_f+k, limbs); coeffs_m[j] = limbs; NORM(coeffs_m + j); } _fmpz_poly_normalise(poly_fmpz); } void __fmpz_poly_write_next_limb(fmpz_t array, unsigned long * temp, unsigned long * offset_limb, const unsigned long next_limb, const unsigned long shift_1, const unsigned long shift_2) { *temp += l_shift(next_limb, shift_1); array[*offset_limb] = *temp + ((l_shift(1UL,shift_1)-1)&array[*offset_limb]); (*offset_limb)++; *temp = r_shift(next_limb, shift_2); } void __fmpz_poly_write_whole_limb(fmpz_t array, unsigned long * temp, unsigned long * offset_limb, const unsigned long next_limb, const unsigned long shift_1, const unsigned long shift_2) { *temp += l_shift(next_limb,shift_1); array[*offset_limb] = *temp; (*offset_limb)++; *temp = r_shift(next_limb,shift_2); } void fmpz_poly_byte_pack(ZmodF_poly_t poly_f, const fmpz_poly_t poly_fmpz, const unsigned long bundle, const unsigned long coeff_bytes, const unsigned long length, const long negate) { unsigned long size_m = poly_fmpz->limbs+1; unsigned long total_limbs = poly_f->n + 1; fmpz_t coeff_m = poly_fmpz->coeffs; fmpz_t array; const unsigned long limbs_per_coeff = (coeff_bytes>>FLINT_LG_BYTES_PER_LIMB); const unsigned long extra_bytes_per_coeff = coeff_bytes - (limbs_per_coeff<coeffs + size_m*length; i = 0; poly_f->length = 0; while (coeff_m < end) { coeff_limb = 0; coeff_byte = 0; offset_limb = 0; temp = 0; borrow = 0; array = poly_f->coeffs[i]; i++; fmpz_t next_point = coeff_m + size_m*bundle; if (next_point > end) next_point = end; while (coeff_m < next_point) { // compute shifts to be used shift_1 = coeff_byte<<3; shift_2 = FLINT_BITS-shift_1; /* Coefficient is negative after borrow */ if (((negate > 0L) && ((long) coeff_m[0] - (long) borrow < 0L)) || ((negate < 0L) && ((long) -coeff_m[0] - (long) borrow < 0L))) { // mpz_t's store the absolute value only, so add 1 then complement if (borrow) { if (coeff_m[0] == 0) next_limb = ~0L; else next_limb = ~coeff_m[1]; co = coeff_m; } else { if (negate > 0L) fmpz_add_ui(scratch, coeff_m, 1L); else fmpz_sub_ui(scratch, coeff_m, 1L); if (scratch[0] == 0) next_limb = ~0L; else next_limb = ~scratch[1]; co = scratch; } // deal with first limb of coefficient if (limbs_per_coeff == 0) { if (coeff_m == next_point-size_m) { __fmpz_poly_write_next_limb(array, &temp, &offset_limb, next_limb, shift_1, shift_2); temp += l_shift(-1UL,shift_1); array[offset_limb] = temp; offset_limb++; extend = -1L; } else { next_limb &= ((1UL<<(extra_bytes_per_coeff<<3))-1); __fmpz_poly_write_next_limb(array, &temp, &offset_limb, next_limb, shift_1, shift_2); array[offset_limb] = temp; } } else { __fmpz_poly_write_next_limb(array, &temp, &offset_limb, next_limb, shift_1, shift_2); // deal with remaining limbs for (j = 1; j < ABS(co[0]); j++) { next_limb = ~co[j+1]; __fmpz_poly_write_whole_limb(array, &temp, &offset_limb, next_limb, shift_1, shift_2); } // write remaining part of coefficient and fill // remainder of coeff_bytes with binary 1's if ((offset_limb< 0L) fmpz_sub_ui(scratch, coeff_m, 1L); else fmpz_add_ui(scratch, coeff_m, 1L); co = scratch; } else { co = coeff_m; } /* Coefficient is positive after borrow */ if (co[0] != 0L) { // deal with first limb of coefficient next_limb = co[1]; __fmpz_poly_write_next_limb(array, &temp, &offset_limb, next_limb, shift_1, shift_2); if (shift_2 == FLINT_BITS) temp = 0; // deal with remaining limbs for (j = 1; j < ABS(co[0]); j++) { next_limb = co[j+1]; __fmpz_poly_write_whole_limb(array, &temp, &offset_limb, next_limb, shift_1, shift_2); } // write remaining part of coefficient array[offset_limb] = temp; offset_limb++; for (; offset_limb < coeff_limb + limbs_per_coeff; offset_limb++) { array[offset_limb] = 0UL; } while ((offset_limb< FLINT_BYTES_PER_LIMB) { coeff_byte -= FLINT_BYTES_PER_LIMB; coeff_limb++; } offset_limb = coeff_limb; coeff_m += size_m; } poly_f->length++; } flint_stack_release(); } static inline void __fmpz_poly_unpack_bytes(mp_limb_t* output, const mp_limb_t* array, const unsigned long limb_start, const unsigned long byte_start, const unsigned long num_bytes) { const unsigned long limbs_to_extract = (num_bytes>>FLINT_LG_BYTES_PER_LIMB); const unsigned long extra_bytes_to_extract = num_bytes - (limbs_to_extract<>FLINT_LG_BYTES_PER_LIMB); const unsigned long extra_bytes_to_extract = num_bytes - (limbs_to_extract< FLINT_BYTES_PER_LIMB) { sign = array[limb_start+limbs_to_extract+1]&(1UL<<(((byte_start + extra_bytes_to_extract - FLINT_BYTES_PER_LIMB)<<3)-1)); } else if (byte_start + extra_bytes_to_extract == FLINT_BYTES_PER_LIMB) { sign = array[limb_start+limbs_to_extract]&(1UL<<(FLINT_BITS-1)); } else if (byte_start + extra_bytes_to_extract == 0) { sign = array[limb_start+limbs_to_extract-1]&(1UL<<(FLINT_BITS-1)); } else { sign = array[limb_start+limbs_to_extract]&(1UL<<(((byte_start + extra_bytes_to_extract)<<3)-1)); } if (sign) { temp = ~array[coeff_limb]; coeff_limb++; while (output_limb < limbs_to_extract) { next_limb = r_shift(temp,shift_1); temp = ~array[coeff_limb]; coeff_limb++; next_limb += l_shift(temp,shift_2); output[output_limb] = next_limb; output_limb++; } if (extra_bytes_to_extract <= FLINT_BYTES_PER_LIMB - byte_start) { next_limb = r_shift(temp,shift_1); output[output_limb] = next_limb&((1UL<<(extra_bytes_to_extract<<3))-1); } else { next_limb = r_shift(temp,shift_1); temp = ~array[coeff_limb]; next_limb += l_shift(temp,shift_2); output[output_limb] = next_limb&((1UL<<(extra_bytes_to_extract<<3))-1); } } else { temp = array[coeff_limb]; coeff_limb++; while (output_limb < limbs_to_extract) { next_limb = r_shift(temp,shift_1); temp = array[coeff_limb]; coeff_limb++; next_limb += l_shift(temp,shift_2); output[output_limb] = next_limb; output_limb++; } if (extra_bytes_to_extract <= FLINT_BYTES_PER_LIMB - byte_start) { next_limb = r_shift(temp,shift_1); output[output_limb] = next_limb&((1UL<<(extra_bytes_to_extract<<3))-1); } else { next_limb = r_shift(temp,shift_1); temp = array[coeff_limb]; next_limb += l_shift(temp,shift_2); output[output_limb] = next_limb&((1UL<<(extra_bytes_to_extract<<3))-1); } } return sign; } void fmpz_poly_byte_unpack_unsigned(fmpz_poly_t poly_m, const mp_limb_t* array, const unsigned long bundle, const unsigned long coeff_bytes) { const unsigned long limbs_per_coeff = (coeff_bytes>>FLINT_LG_BYTES_PER_LIMB); const unsigned long extra_bytes_per_coeff = coeff_bytes - (limbs_per_coeff<>FLINT_LG_BYTES_PER_LIMB) + 1; mp_limb_t* temp = (mp_limb_t*) flint_stack_alloc(limbs+2); unsigned long limb_upto = 0; unsigned long byte_offset = 0; fmpz_t coeff_m = poly_m->coeffs; unsigned long size_m = poly_m->limbs+1; poly_m->length = bundle; for (unsigned long i = 0; i < bundle; i++) { F_mpn_clear(temp, limbs+2); __fmpz_poly_unpack_bytes(temp + 1, array, limb_upto, byte_offset, coeff_bytes); temp[0] = limbs; NORM(temp); fmpz_add(coeff_m, coeff_m, temp); limb_upto += limbs_per_coeff; if (byte_offset + extra_bytes_per_coeff >= FLINT_BYTES_PER_LIMB) { limb_upto++; byte_offset = byte_offset + extra_bytes_per_coeff - FLINT_BYTES_PER_LIMB; } else { byte_offset = byte_offset + extra_bytes_per_coeff; } coeff_m += size_m; } flint_stack_release(); _fmpz_poly_normalise(poly_m); } void fmpz_poly_byte_unpack(fmpz_poly_t poly_m, const mp_limb_t* array, const unsigned long bundle, const unsigned long coeff_bytes) { const unsigned long limbs_per_coeff = (coeff_bytes>>FLINT_LG_BYTES_PER_LIMB); const unsigned long extra_bytes_per_coeff = coeff_bytes - (limbs_per_coeff<>FLINT_LG_BYTES_PER_LIMB) + 1; mp_limb_t* temp = (mp_limb_t*) flint_stack_alloc(limbs+2); unsigned long limb_upto = 0; unsigned long byte_offset = 0; unsigned long sign; unsigned long borrow = 0; fmpz_t coeff_m = poly_m->coeffs; unsigned long size_m = poly_m->limbs+1; poly_m->length = bundle; for (unsigned long i = 0; i < bundle; i++) { F_mpn_clear(temp,limbs+2); sign = __fmpz_poly_unpack_signed_bytes(temp + 1, array, limb_upto, byte_offset, coeff_bytes); if (sign) temp[0] = -limbs; else temp[0] = limbs; NORM(temp); if (sign) { fmpz_sub_ui_inplace(temp, 1); } if (borrow) fmpz_add_ui_inplace(temp, 1); fmpz_add(coeff_m, coeff_m, temp); borrow = 0; if (sign) borrow = 1; limb_upto += limbs_per_coeff; if (byte_offset + extra_bytes_per_coeff >= FLINT_BYTES_PER_LIMB) { limb_upto++; byte_offset = byte_offset + extra_bytes_per_coeff - FLINT_BYTES_PER_LIMB; } else { byte_offset = byte_offset + extra_bytes_per_coeff; } coeff_m += size_m; } flint_stack_release(); _fmpz_poly_normalise(poly_m); } void fmpz_poly_split(ZmodF_poly_t poly_f, fmpz_poly_t poly_fmpz, unsigned long bundle, unsigned long limbs) { abort(); } void fmpz_poly_unsplit(ZmodF_poly_t poly_f, fmpz_poly_t poly_fmpz, unsigned long bundle, unsigned long limbs) { abort(); } void fmpz_poly_to_zmod_poly(zmod_poly_t zpol, fmpz_poly_t fpol) { unsigned long p = zpol->p; if (fpol->length == 0) { zmod_poly_zero(zpol); return; } zmod_poly_fit_length(zpol, fpol->length); unsigned long sizef = fpol->limbs+1; fmpz_t fcoeff = fpol->coeffs; unsigned long * zcoeff = zpol->coeffs; for (unsigned long i = 0; i < fpol->length; i++) { zcoeff[i] = fmpz_mod_ui(fcoeff, p); fcoeff += sizef; } zpol->length = fpol->length; __zmod_poly_normalise(zpol); } void fmpz_poly_to_zmod_poly_no_red(zmod_poly_t zpol, fmpz_poly_t fpol) { unsigned long p = zpol->p; if (fpol->length == 0) { zmod_poly_zero(zpol); return; } zmod_poly_fit_length(zpol, fpol->length); unsigned long sizef = fpol->limbs+1; fmpz_t fcoeff = fpol->coeffs; unsigned long * zcoeff = zpol->coeffs; for (unsigned long i = 0; i < fpol->length; i++) { if (fcoeff[0] == 0) zcoeff[i] = 0; else if ((long)fcoeff[0] < 0L) zcoeff[i] = p - fcoeff[1]; else zcoeff[i] = fcoeff[1]; fcoeff += sizef; } zpol->length = fpol->length; __zmod_poly_normalise(zpol); } void zmod_poly_to_fmpz_poly_unsigned(fmpz_poly_t fpol, zmod_poly_t zpol) { unsigned long p = zpol->p; if (zpol->length == 0) { fmpz_poly_zero(fpol); return; } fmpz_poly_fit_length(fpol, zpol->length); fmpz_poly_fit_limbs(fpol, 1); unsigned long sizef = fpol->limbs+1; fmpz_t fcoeff = fpol->coeffs; unsigned long * zcoeff = zpol->coeffs; for (unsigned long i = 0; i < zpol->length; i++) { if (zcoeff[i]) { fcoeff[0] = 1L; fcoeff[1] = zcoeff[i]; } else fcoeff[0] = 0L; fcoeff += sizef; } fpol->length = zpol->length; } void zmod_poly_to_fmpz_poly(fmpz_poly_t fpol, zmod_poly_t zpol) { unsigned long p = zpol->p; unsigned long pdiv2 = p/2; if (zpol->length == 0) { fmpz_poly_zero(fpol); return; } fmpz_poly_fit_length(fpol, zpol->length); fmpz_poly_fit_limbs(fpol, 1); unsigned long sizef = fpol->limbs+1; fmpz_t fcoeff = fpol->coeffs; unsigned long * zcoeff = zpol->coeffs; for (unsigned long i = 0; i < zpol->length; i++) { if (zcoeff[i] == 0) { fcoeff[0] = 0L; } else if (zcoeff[i] > pdiv2) { fcoeff[0] = -1L; fcoeff[1] = p - zcoeff[i]; } else { fcoeff[0] = 1L; fcoeff[1] = zcoeff[i]; } fcoeff += sizef; } fpol->length = zpol->length; } int fmpz_poly_CRT_unsigned(fmpz_poly_t res, fmpz_poly_t fpol, zmod_poly_t zpol, fmpz_t newmod, fmpz_t oldmod) { unsigned long p = zpol->p; double pre = zpol->p_inv; unsigned long c, r1; fmpz_poly_t out; c = fmpz_mod_ui(oldmod, p); c = z_invert(c, p); fmpz_mul_ui(newmod, oldmod, p); unsigned long shortest = (fpol->length < zpol->length) ? fpol->length : zpol->length; unsigned limbs = FLINT_ABS(newmod[0]); if (res == fpol) { fmpz_poly_init2(out, FLINT_MAX(fpol->length, zpol->length), limbs); } else { _fmpz_poly_attach(out, res); } fmpz_poly_fit_length(res, FLINT_MAX(fpol->length, zpol->length)); fmpz_poly_fit_limbs(res, limbs); unsigned long sizef = fpol->limbs+1; unsigned long sizeo = out->limbs+1; fmpz_t fcoeff = fpol->coeffs; fmpz_t ocoeff = out->coeffs; unsigned long * zcoeff = zpol->coeffs; #if FLINT_BITS == 64 if (FLINT_BIT_COUNT(p) > FLINT_D_BITS-1) { for (unsigned long i = 0; i < shortest; i++) { fmpz_CRT_ui2_precomp(ocoeff, fcoeff, oldmod, zcoeff[i], p, c, pre); fcoeff += sizef; ocoeff += sizeo; } } else #endif for (unsigned long i = 0; i < shortest; i++) { fmpz_CRT_ui_precomp(ocoeff, fcoeff, oldmod, zcoeff[i], p, c, pre); fcoeff += sizef; ocoeff += sizeo; } /* fpol is longer */ #if FLINT_BITS == 64 if (FLINT_BIT_COUNT(p) > FLINT_D_BITS-1) { for (unsigned long i = shortest; i < fpol->length; i++) { fmpz_CRT_ui2_precomp(ocoeff, fcoeff, oldmod, 0L, p, c, pre); fcoeff += sizef; ocoeff += sizeo; } } else #endif for (unsigned long i = shortest; i < fpol->length; i++) { fmpz_CRT_ui_precomp(ocoeff, fcoeff, oldmod, 0L, p, c, pre); fcoeff += sizef; ocoeff += sizeo; } /* zpol is longer */ unsigned long s; #if FLINT_BITS == 64 if (FLINT_BIT_COUNT(p) > FLINT_D_BITS-1) { for (unsigned long i = shortest; i < zpol->length; i++) { s = z_mulmod2_precomp(zcoeff[i], c, p, pre); fmpz_mul_ui(ocoeff, oldmod, s); ocoeff += sizeo; } } else #endif for (unsigned long i = shortest; i < zpol->length; i++) { s = z_mulmod_precomp(zcoeff[i], c, p, pre); fmpz_mul_ui(ocoeff, oldmod, s); ocoeff += sizeo; } int same; if (res == fpol) { out->length = FLINT_MAX(fpol->length, zpol->length); _fmpz_poly_normalise(out); same = fmpz_poly_equal(fpol, out); fmpz_poly_set(res, out); fmpz_poly_clear(out); } else { res->length = FLINT_MAX(fpol->length, zpol->length); _fmpz_poly_normalise(res); same = fmpz_poly_equal(fpol, res); } return same; } int fmpz_poly_CRT(fmpz_poly_t res, fmpz_poly_t fpol, zmod_poly_t zpol, fmpz_t newmod, fmpz_t oldmod) { unsigned long p = zpol->p; double pre = zpol->p_inv; unsigned long c, r1; fmpz_poly_t out; c = fmpz_mod_ui(oldmod, p); c = z_invert(c, p); fmpz_mul_ui(newmod, oldmod, p); unsigned long shortest = (fpol->length < zpol->length) ? fpol->length : zpol->length; unsigned limbs = FLINT_ABS(newmod[0]); if (res == fpol) { fmpz_poly_init2(out, FLINT_MAX(fpol->length, zpol->length), limbs); } else { _fmpz_poly_attach(out, res); } fmpz_poly_fit_length(res, FLINT_MAX(fpol->length, zpol->length)); fmpz_poly_fit_limbs(res, limbs); unsigned long sizef = fpol->limbs+1; unsigned long sizeo = out->limbs+1; fmpz_t fcoeff = fpol->coeffs; fmpz_t ocoeff = out->coeffs; unsigned long * zcoeff = zpol->coeffs; fmpz_t moddiv2 = fmpz_init(newmod[0]); fmpz_div_2exp(moddiv2, newmod, 1); #if FLINT_BITS == 64 if (FLINT_BIT_COUNT(p) > FLINT_D_BITS-1) { for (unsigned long i = 0; i < shortest; i++) { fmpz_CRT_ui2_precomp(ocoeff, fcoeff, oldmod, zcoeff[i], p, c, pre); if (fmpz_cmpabs(ocoeff, moddiv2) > 0) fmpz_sub(ocoeff, ocoeff, newmod); fcoeff += sizef; ocoeff += sizeo; } } else #endif for (unsigned long i = 0; i < shortest; i++) { fmpz_CRT_ui_precomp(ocoeff, fcoeff, oldmod, zcoeff[i], p, c, pre); if (fmpz_cmpabs(ocoeff, moddiv2) > 0) fmpz_sub(ocoeff, ocoeff, newmod); fcoeff += sizef; ocoeff += sizeo; } /* fpol is longer */ #if FLINT_BITS == 64 if (FLINT_BIT_COUNT(p) > FLINT_D_BITS-1) { for (unsigned long i = shortest; i < fpol->length; i++) { fmpz_CRT_ui2_precomp(ocoeff, fcoeff, oldmod, 0L, p, c, pre); if (fmpz_cmpabs(ocoeff, moddiv2) > 0) fmpz_sub(ocoeff, ocoeff, newmod); fcoeff += sizef; ocoeff += sizeo; } } else #endif for (unsigned long i = shortest; i < fpol->length; i++) { fmpz_CRT_ui_precomp(ocoeff, fcoeff, oldmod, 0L, p, c, pre); if (fmpz_cmpabs(ocoeff, moddiv2) > 0) fmpz_sub(ocoeff, ocoeff, newmod); fcoeff += sizef; ocoeff += sizeo; } /* zpol is longer */ unsigned long s; #if FLINT_BITS == 64 if (FLINT_BIT_COUNT(p) > FLINT_D_BITS-1) { for (unsigned long i = shortest; i < zpol->length; i++) { s = z_mulmod2_precomp(zcoeff[i], c, p, pre); fmpz_mul_ui(ocoeff, oldmod, s); if (fmpz_cmpabs(ocoeff, moddiv2) > 0) fmpz_sub(ocoeff, ocoeff, newmod); ocoeff += sizeo; } } else #endif for (unsigned long i = shortest; i < zpol->length; i++) { s = z_mulmod_precomp(zcoeff[i], c, p, pre); fmpz_mul_ui(ocoeff, oldmod, s); if (fmpz_cmpabs(ocoeff, moddiv2) > 0) fmpz_sub(ocoeff, ocoeff, newmod); ocoeff += sizeo; } int same; if (res == fpol) { out->length = FLINT_MAX(fpol->length, zpol->length); _fmpz_poly_normalise(out); same = fmpz_poly_equal(fpol, out); fmpz_poly_set(res, out); fmpz_poly_clear(out); } else { res->length = FLINT_MAX(fpol->length, zpol->length); _fmpz_poly_normalise(res); same = fmpz_poly_equal(fpol, res); } fmpz_clear(moddiv2); return same; } /**************************************************************************** _fmpz_poly_* layer All inputs are assumed to be normalised. All outputs are normalised given this assumption. ****************************************************************************/ /* Create a polynomial of length zero with "alloc" allocated coefficients each with enough space for limbs limbs */ void _fmpz_poly_stack_init(fmpz_poly_t poly, const unsigned long alloc, const unsigned long limbs) { if ((alloc) && (limbs)) poly->coeffs = (fmpz_t) flint_stack_alloc(alloc*(limbs+1)); else poly->coeffs = NULL; poly->alloc = alloc; poly->length = 0; poly->limbs = limbs; } void _fmpz_poly_stack_clear(fmpz_poly_t poly) { if (poly->coeffs) flint_stack_release(); poly->coeffs = NULL; } void _fmpz_poly_check(const fmpz_poly_t poly) { if ((long) poly->length < 0) { printf("Error: Poly length < 0\n"); abort(); } if ((long) poly->limbs < 0) { printf("Error: Poly limbs < 0\n"); abort(); } for (unsigned long i = 0; i < poly->length; i++) { if (FLINT_ABS(poly->coeffs[i*(poly->limbs+1)]) > poly->limbs) { printf("Error: coefficient %ld is too large (%ld limbs vs %ld limbs)\n", i, FLINT_ABS(poly->coeffs[i*(poly->limbs+1)]), poly->limbs); abort(); } } } void _fmpz_poly_check_normalisation(const fmpz_poly_t poly) { if (poly->length) { if (!poly->coeffs[(poly->length-1)*(poly->limbs+1)]) { printf("Error: Poly not normalised\n"); abort(); } } if ((long) poly->length < 0) { printf("Error: Poly length < 0\n"); abort(); } if ((long) poly->limbs < 0) { printf("Error: Poly limbs < 0\n"); abort(); } for (unsigned long i = 0; i < poly->length; i++) { if (FLINT_ABS(poly->coeffs[i*(poly->limbs+1)]) > poly->limbs) { printf("Error: coefficient %ld is too large (%ld limbs vs %ld limbs)\n", i, FLINT_ABS(poly->coeffs[i*(poly->limbs+1)]), poly->limbs); abort(); } } } // Retrieves the n-th coefficient as an mpz void _fmpz_poly_get_coeff_mpz(mpz_t x, const fmpz_poly_t poly, const unsigned long n) { if (n >= poly->length) { mpz_set_ui(x, 0); return; } fmpz_to_mpz(x, poly->coeffs + n*(poly->limbs + 1)); } /* Set a coefficient to the given unsigned value. If x is nonzero, poly->limbs must be positive. Assumes the polynomial length is greater than n. */ void _fmpz_poly_set_coeff_ui(fmpz_poly_t poly, const unsigned long n, const unsigned long x) { FLINT_ASSERT(poly->length > n); fmpz_set_ui(poly->coeffs + n*(poly->limbs + 1), x); if ((x==0L) && (poly->length == n+1)) _fmpz_poly_normalise(poly); } /* Set a coefficient to the given signed value. If x is nonzero, poly->limbs must be positive. Assumes the polynomial length is greater than n. Normalises only if the leading coefficient is set to zero. */ void _fmpz_poly_set_coeff_si(fmpz_poly_t poly, const unsigned long n, const long x) { FLINT_ASSERT(poly->length > n); fmpz_set_si(poly->coeffs + n*(poly->limbs + 1), x); if ((x==0L) && (poly->length == n+1)) _fmpz_poly_normalise(poly); } /* Set a coefficient to the given fmpz_t. Assumes the polynomial length is greater than n. Normalises only if the leading coefficient is set to zero. */ void _fmpz_poly_set_coeff_fmpz(fmpz_poly_t poly, const unsigned long n, fmpz_t x) { FLINT_ASSERT(poly->length > n); fmpz_set(poly->coeffs + n*(poly->limbs + 1), x); if (fmpz_is_zero(x) && (poly->length == n+1)) _fmpz_poly_normalise(poly); } // Retrieves the n-th coefficient as an fmpz void _fmpz_poly_get_coeff_fmpz(fmpz_t x, const fmpz_poly_t poly, const unsigned long n) { if (n >= poly->length) { fmpz_set_ui(x, 0); return; } fmpz_set(x, poly->coeffs + n*(poly->limbs + 1)); } void _fmpz_poly_get_coeff_mpz_read_only(mpz_t x, const fmpz_poly_t poly, const unsigned long n) { mp_limb_t * coeff = _fmpz_poly_get_coeff_ptr(poly, n); if (poly->length) { x->_mp_d = coeff + 1; x->_mp_size = coeff[0]; x->_mp_alloc = poly->limbs; } else { x->_mp_d = (mp_limb_t *) &poly; // We need to point to something, and at least this exists x->_mp_size = 0; x->_mp_alloc = FLINT_MAX(1, poly->limbs); } } void _fmpz_poly_normalise(fmpz_poly_t poly) { while (poly->length && poly->coeffs[(poly->length-1)*(poly->limbs+1)] == 0) poly->length--; } /* Sets the output poly to equal the input poly Assumes the output poly is big enough to hold the nonzero limbs of the input poly */ void _fmpz_poly_set(fmpz_poly_t output, const fmpz_poly_t input) { if (input->length == 0) { output->length = 0; return; } if (output != input) { unsigned long input_size = input->limbs + 1; unsigned long output_size = output->limbs + 1; if ((output->coeffs < input->coeffs) || (output->coeffs >= input->coeffs + input->length*(input->limbs+1))) { for (long i = 0; i < input->length; i++) { if (!input->coeffs[i*input_size]) output->coeffs[i*output_size] = 0; else F_mpn_copy(output->coeffs+i*output_size, input->coeffs+i*input_size, ABS(input->coeffs[i*input_size])+1); } } else { for (long i = input->length - 1; i >= 0; i--) { if (!input->coeffs[i*input_size]) output->coeffs[i*output_size] = 0; else F_mpn_copy(output->coeffs+i*output_size, input->coeffs+i*input_size, ABS(input->coeffs[i*input_size])+1); } } } output->length = input->length; } /* Determines the maximum number of bits in any coefficient of poly_fmpz. This function assumes every coefficient fits in a limb. The returned value is negative if any of the coefficients was negative. */ long _fmpz_poly_max_bits1(const fmpz_poly_t poly_fmpz) { unsigned long mask = -1L; long bits = 0; long sign = 1; fmpz_t coeffs_m = poly_fmpz->coeffs; long i, j; for (i = 0, j = 0; i < poly_fmpz->length; i++, j += 2) { if (i&3 == 0) FLINT_PREFETCH(coeffs_m+j,64); if ((long) coeffs_m[j] < 0) sign = -1L; if (coeffs_m[j]) { if (coeffs_m[j+1] & mask) { bits = FLINT_BIT_COUNT(coeffs_m[j+1]); if (bits == FLINT_BITS) break; else mask = -1L - ((1L<length; i++, j += 2) { if ((long) coeffs_m[j] < 0) { sign = -1L; break; } } } return sign*bits; } /* Determines the maximum number of bits in a coefficient of poly_fmpz. The returned value is negative if any of the coefficients was negative. */ long _fmpz_poly_max_bits(const fmpz_poly_t poly_fmpz) { if (poly_fmpz->limbs == 0) return 0; if (poly_fmpz->limbs == 1) return _fmpz_poly_max_bits1(poly_fmpz); unsigned long mask = -1L; long bits = 0; long sign = 1; long limbs = 0; long size_j; fmpz_t coeffs_m = poly_fmpz->coeffs; unsigned long size_m = poly_fmpz->limbs+1; long i, j; for (i = 0, j = 0; i < poly_fmpz->length; i++, j += size_m) { size_j = (long) coeffs_m[j]; if (size_j < 0) sign = -1L; if (ABS(size_j) > limbs + 1) { limbs = ABS(size_j) - 1; bits = FLINT_BIT_COUNT(coeffs_m[j+ABS(size_j)]); if (bits == FLINT_BITS) mask = 0L; else mask = -1L - ((1L<length; i++, j += size_m) { if ((long) coeffs_m[j] < 0) { sign = -1L; break; } } } return sign*(FLINT_BITS*limbs+bits); } /* Returns the maximum number of limbs of any coefficient of poly. Does not count the sign/size limb. */ unsigned long _fmpz_poly_max_limbs(const fmpz_poly_t poly) { unsigned long limbs = poly->limbs; unsigned long max_limbs = 0; unsigned long next_limbs; for (long i = 0; (i < poly->length) && (max_limbs != limbs); i++) { next_limbs = ABS(poly->coeffs[i*(limbs+1)]); if (next_limbs > max_limbs) max_limbs = next_limbs; } return max_limbs; } /* Checks if two polynomials are arithmetically equal and returns 1 if they are, 0 otherwise. */ int _fmpz_poly_equal(const fmpz_poly_t input1, const fmpz_poly_t input2) { if (input1 == input2) return 1; if (input1->length != input2->length) return 0; long i,j; for (i = 0; i < input1->length; i++) { for (j = 0; j < ABS(input1->coeffs[i*(input1->limbs+1)])+1; j++) { if (input1->coeffs[i*(input1->limbs+1)+j] != input2->coeffs[i*(input2->limbs+1)+j]) return 0; } } return 1; } /* Truncate the polynomial to trunc terms and normalise */ void _fmpz_poly_truncate(fmpz_poly_t poly, const unsigned long trunc) { if (poly->length > trunc) poly->length = trunc; _fmpz_poly_normalise(poly); } /* Set output to -input */ void _fmpz_poly_neg(fmpz_poly_t output, const fmpz_poly_t input) { if (input == output) { for (long i = 0; i < input->length; i++) output->coeffs[i*(output->limbs+1)] = -output->coeffs[i*(output->limbs+1)]; } else { unsigned long input_size = input->limbs + 1; unsigned long output_size = output->limbs + 1; for (long i = 0; i < input->length; i++) { if (!input->coeffs[i*input_size]) output->coeffs[i*output_size] = 0; else { output->coeffs[i*output_size] = -input->coeffs[i*input_size]; F_mpn_copy(output->coeffs+i*output_size+1, input->coeffs+i*input_size+1, ABS(input->coeffs[i*input_size])); } } } output->length = input->length; } /* Set n of the coefficients of poly to zero starting with the constant term *regardless of the original length*. Normalises if n >= poly->length-1 */ void _fmpz_poly_zero_coeffs(fmpz_poly_t poly, const unsigned long n) { unsigned long size = poly->limbs+1; fmpz_t coeff = poly->coeffs; for (long i = 0; i < n; i++) { coeff[0] = 0; coeff+=size; } if (n >= poly->length-1) _fmpz_poly_normalise(poly); } /* Multiplies input by x^n and sets output to the result Assumes output is large enough to contain the result */ void _fmpz_poly_left_shift(fmpz_poly_t output, const fmpz_poly_t input, const unsigned long n) { fmpz_poly_t part; part->length = input->length; part->limbs = output->limbs; part->coeffs = output->coeffs + n*(output->limbs+1); _fmpz_poly_set(part, input); for (long i = 0; i < n; i++) output->coeffs[i*(output->limbs+1)] = 0; if (input->length > 0) output->length = input->length + n; else (output->length = 0); } /* Divides input by x^n losing the remainder and sets output to the result Assumes output is large enough to contain the result */ void _fmpz_poly_right_shift(fmpz_poly_t output, const fmpz_poly_t input, const unsigned long n) { if (input->length <= n) { _fmpz_poly_zero(output); return; } fmpz_poly_t part; part->length = input->length - n; part->limbs = input->limbs; part->coeffs = input->coeffs + n*(input->limbs + 1); _fmpz_poly_set(output, part); } /* Sets output to the reverse of input (i.e. reverse the order of the coefficients) assuming input to be a polynomial with _length_ coefficients (it may have a length that is less than _length_). */ void _fmpz_poly_reverse(fmpz_poly_t output, const fmpz_poly_t input, const unsigned long length) { unsigned long coeff_limbs; unsigned long size_in = input->limbs + 1; unsigned long size_out = output->limbs + 1; long i; if (input != output) { for (i = 0; i < FLINT_MIN(length, input->length); i++) { coeff_limbs = ABS(input->coeffs[i*size_in]) + 1; F_mpn_copy(output->coeffs + (length - i - 1)*size_out, input->coeffs + i*size_in, coeff_limbs); } for ( ; i < length; i++) { output->coeffs[(length - i - 1)*size_out] = 0L; } output->length = length; _fmpz_poly_normalise(output); } else { fmpz_t temp = (fmpz_t) flint_stack_alloc(size_in); unsigned long coeff_limbs2; for (i = 0; i < length/2; i++) { if (i < input->length) { coeff_limbs = ABS(input->coeffs[i*size_in]) + 1; F_mpn_copy(temp, input->coeffs + i*size_in, coeff_limbs); } else { coeff_limbs = 1; temp[0] = 0; } if (length - i - 1 < input->length) { coeff_limbs2 = ABS(input->coeffs[(length - i - 1)*size_in]) + 1; F_mpn_copy(input->coeffs + i*size_in, input->coeffs + (length - i - 1)*size_in, coeff_limbs2); } else { input->coeffs[i*size_in] = 0; } F_mpn_copy(input->coeffs + (length - i - 1)*size_in, temp, coeff_limbs); } if ((length & 1) && (i >= input->length)) input->coeffs[i*size_in] = 0; output->length = length; _fmpz_poly_normalise(output); flint_stack_release(); } } /* Add two polynomials together */ void _fmpz_poly_add(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2) { if (input1 == input2) { _fmpz_poly_scalar_mul_ui(output, input1, 2UL); return; } unsigned long size1, size2, shorter, size_out; fmpz_t coeffs1, coeffs2, coeffs_out; size1 = input1->limbs+1; size2 = input2->limbs+1; coeffs1 = input1->coeffs; coeffs2 = input2->coeffs; size_out = output->limbs+1; coeffs_out = output->coeffs; shorter = (input1->length > input2->length) ? input2->length : input1->length; for (long i = 0; i < shorter; i++) { fmpz_add(coeffs_out+i*size_out, coeffs1+i*size1, coeffs2+i*size2); } if (input1 != output) { for (long i = shorter; i < input1->length; i++) { F_mpn_copy(coeffs_out+i*size_out, coeffs1+i*size1, ABS(coeffs1[i*size1])+1); } } if (input2 != output) { for (unsigned long i = shorter; i < input2->length; i++) { F_mpn_copy(coeffs_out+i*size_out, coeffs2+i*size2, ABS(coeffs2[i*size2])+1); } } if (input1->length == input2->length) { output->length = input1->length; _fmpz_poly_normalise(output); } else { output->length = (input1->length > input2->length) ? input1->length : input2->length; } } /* Subtract two polynomials */ void _fmpz_poly_sub(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2) { if (input1 == input2) { _fmpz_poly_zero_coeffs(output, input1->length); _fmpz_poly_zero(output); return; } unsigned long size1, size2, shorter, size_out; fmpz_t coeffs1, coeffs2, coeffs_out; size1 = input1->limbs+1; size2 = input2->limbs+1; coeffs1 = input1->coeffs; coeffs2 = input2->coeffs; size_out = output->limbs+1; coeffs_out = output->coeffs; shorter = (input1->length > input2->length) ? input2->length : input1->length; for (long i = 0; i < shorter; i++) { fmpz_sub(coeffs_out+i*size_out, coeffs1+i*size1, coeffs2+i*size2); } if (input1 != output) { for (long i = shorter; i < input1->length; i++) { F_mpn_copy(coeffs_out+i*size_out, coeffs1+i*size1, ABS(coeffs1[i*size1])+1); } } if (input2 != output) { for (long i = shorter; i < input2->length; i++) { F_mpn_copy(coeffs_out+i*size_out+1, coeffs2+i*size2+1, ABS(coeffs2[i*size2])); coeffs_out[i*size_out] = -coeffs2[i*size2]; } } else { for (long i = shorter; i < input2->length; i++) { coeffs_out[i*size_out] = -coeffs2[i*size2]; } } if (input1->length == input2->length) { output->length = input1->length; _fmpz_poly_normalise(output); } else { output->length = (input1->length > input2->length) ? input1->length : input2->length; } } void _fmpz_poly_scalar_mul_ui(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long x) { if (x == 0) { unsigned long size = output->limbs + 1; for (long i = 0; i < poly->length; i++) { output->coeffs[i*size] = 0L; } output->length = 0; return; } fmpz_t coeffs1 = poly->coeffs; fmpz_t coeffs_out = output->coeffs; unsigned long size1 = poly->limbs+1; unsigned long size_out = output->limbs+1; mp_limb_t mslimb; for (long i = 0; i < poly->length; i++) { if ((coeffs_out[i*size_out] = coeffs1[i*size1])) { mslimb = mpn_mul_1(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1]), x); if (mslimb) { coeffs_out[i*size_out+ABS(coeffs1[i*size1])+1] = mslimb; if ((long) coeffs_out[i*size_out] > 0) coeffs_out[i*size_out]++; else coeffs_out[i*size_out]--; } } } output->length = poly->length; } void _fmpz_poly_scalar_mul_si(fmpz_poly_t output, const fmpz_poly_t poly, const long x) { if (x == 0) { unsigned long size = output->limbs + 1; for (long i = 0; i < poly->length; i++) { output->coeffs[i*size] = 0L; } output->length = 0; return; } fmpz_t coeffs1 = poly->coeffs; fmpz_t coeffs_out = output->coeffs; unsigned long size1 = poly->limbs+1; unsigned long size_out = output->limbs+1; mp_limb_t mslimb; for (long i = 0; i < poly->length; i++) { if (x < 0) { if ((coeffs_out[i*size_out] = -coeffs1[i*size1])) { mslimb = mpn_mul_1(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1]), -x); if (mslimb) { coeffs_out[i*size_out+ABS(coeffs1[i*size1])+1] = mslimb; if ((long) coeffs_out[i*size_out] > 0) coeffs_out[i*size_out]++; else coeffs_out[i*size_out]--; } } } else { if ((coeffs_out[i*size_out] = coeffs1[i*size1])) { mslimb = mpn_mul_1(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1]), x); if (mslimb) { coeffs_out[i*size_out+ABS(coeffs1[i*size1])+1] = mslimb; if ((long) coeffs_out[i*size_out] > 0) coeffs_out[i*size_out]++; else coeffs_out[i*size_out]--; } } } } output->length = poly->length; } /* Scalar multiplication of a polynomial by a scalar */ void _fmpz_poly_scalar_mul_fmpz(fmpz_poly_t output, const fmpz_poly_t poly, const fmpz_t x) { if (poly->length == 0) { output->length = 0; return; } if (x[0] == 0) { unsigned long size = output->limbs + 1; for (long i = 0; i < poly->length; i++) { output->coeffs[i*size] = 0L; } output->length = 0; return; } unsigned long x0 = ABS(x[0]); while ((!x[x0]) && (x0)) x0--; unsigned long limbs1 = x0; unsigned long limbs2 = poly->limbs; unsigned long total_limbs; unsigned long msl; unsigned long limbs_out = output->limbs+1; fmpz_t coeffs_out = output->coeffs; fmpz_t coeffs2 = poly->coeffs; long sign1 = x[0]; if (limbs1 == 1) { for (long i = 0; i < poly->length; i++) { total_limbs = 1 + ABS(coeffs2[i*(limbs2+1)]); if (total_limbs != 1) { msl = mpn_mul_1(coeffs_out + i*limbs_out + 1, coeffs2 + i*(limbs2+1) + 1, ABS(coeffs2[i*(limbs2+1)]), x[1]); if (msl) coeffs_out[i*limbs_out+ABS(coeffs2[i*(limbs2+1)])+1] = msl; if (((long) coeffs2[i*(limbs2+1)] ^ sign1) < 0) coeffs_out[i*limbs_out] = -total_limbs + (msl == 0L); else coeffs_out[i*limbs_out] = total_limbs - (msl == 0L); } else coeffs_out[i*limbs_out] = 0L; } } else if (limbs1 + limbs2 > 1000) { F_mpn_precomp_t precomp; F_mpn_mul_precomp_init(precomp, x+1, limbs1, limbs2); for (long i = 0; i < poly->length; i++) { total_limbs = limbs1 + ABS(coeffs2[i*(limbs2+1)]); if (total_limbs != limbs1) { msl = F_mpn_mul_precomp(coeffs_out + i*limbs_out + 1, coeffs2 + i*(limbs2+1) + 1, ABS(coeffs2[i*(limbs2+1)]), precomp); if (((long) coeffs2[i*(limbs2+1)] ^ sign1) < 0) coeffs_out[i*limbs_out] = -total_limbs + (msl == 0L); else coeffs_out[i*limbs_out] = total_limbs - (msl == 0L); } else coeffs_out[i*limbs_out] = 0L; } F_mpn_mul_precomp_clear(precomp); } else { if (poly != output) { for (long i = 0; i < poly->length - 1; i++) { __fmpz_mul(coeffs_out + i*limbs_out, coeffs2 + i*(limbs2+1), x); } fmpz_mul(coeffs_out + (poly->length - 1)*limbs_out, coeffs2 + (poly->length - 1)*(limbs2+1), x); } else { for (long i = 0; i < poly->length; i++) { fmpz_mul(coeffs_out + i*limbs_out, coeffs2 + i*(limbs2+1), x); } } } output->length = poly->length; } void _fmpz_poly_scalar_div_exact_ui(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long x) { if (poly->length == 0) { output->length = 0; return; } unsigned long size_out = output->limbs+1; unsigned long size1 = poly->limbs+1; fmpz_t coeffs_out = output->coeffs; fmpz_t coeffs1 = poly->coeffs; if (size_out != size1) { for (unsigned long i = 0; i < poly->length; i++) { mpn_divmod_1(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1]), x); coeffs_out[i*size_out] = coeffs1[i*size1]; NORM(coeffs_out+i*size_out); } } else { if (coeffs_out != coeffs1) { coeffs_out[0] = 0; for (unsigned long i = 0; i < poly->length-1; i++) { F_mpn_copy(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1])); F_mpn_clear(coeffs_out+i*size_out+ABS(coeffs1[i*size1])+1, size_out-ABS(coeffs1[i*size1])); } F_mpn_copy(coeffs_out+(poly->length-1)*size_out+1, coeffs1+(poly->length-1)*size1+1, ABS(coeffs1[(poly->length-1)*size1])); if (size_out > ABS(coeffs1[(poly->length-1)*size1])+1) F_mpn_clear(coeffs_out+(poly->length-1)*size_out+ABS(coeffs1[(poly->length-1)*size1])+1, size_out-ABS(coeffs1[(poly->length-1)*size1])-1); mpn_divmod_1(coeffs_out, coeffs_out, size_out*poly->length, x); for (unsigned long i = 0; i < poly->length; i++) { coeffs_out[i*size_out] = coeffs1[i*size1]; NORM(coeffs_out+i*size_out); } } else { fmpz_t signs = (fmpz_t) flint_stack_alloc(poly->length); signs[0] = coeffs1[0]; coeffs_out[0] = 0; for (unsigned long i = 0; i < poly->length-1; i++) { signs[i+1] = coeffs1[(i+1)*size1]; F_mpn_clear(coeffs_out+i*size_out+ABS(signs[i])+1, size_out-ABS(signs[i])); } if (size_out > ABS(signs[poly->length-1])+1) F_mpn_clear(coeffs_out+(poly->length-1)*size_out+ABS(signs[poly->length-1])+1, size_out-ABS(signs[poly->length-1])-1); mpn_divmod_1(coeffs_out, coeffs_out, size_out*poly->length, x); for (unsigned long i = 0; i < poly->length; i++) { coeffs_out[i*size_out] = signs[i]; NORM(coeffs_out+i*size_out); } flint_stack_release(); } } output->length = poly->length; } void _fmpz_poly_scalar_div_exact_si(fmpz_poly_t output, const fmpz_poly_t poly, const long x) { if (poly->length == 0) { output->length = 0; return; } unsigned long size_out = output->limbs+1; unsigned long size1 = poly->limbs+1; fmpz_t coeffs_out = output->coeffs; fmpz_t coeffs1 = poly->coeffs; if (size_out != size1) { for (unsigned long i = 0; i < poly->length; i++) { if (x < 0) { mpn_divmod_1(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1]), -x); coeffs_out[i*size_out] = -coeffs1[i*size1]; } else { mpn_divmod_1(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1]), x); coeffs_out[i*size_out] = coeffs1[i*size1]; } NORM(coeffs_out+i*size_out); } } else { if (coeffs_out != coeffs1) { coeffs_out[0] = 0; for (unsigned long i = 0; i < poly->length-1; i++) { F_mpn_copy(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1])); F_mpn_clear(coeffs_out+i*size_out+ABS(coeffs1[i*size1])+1, size_out-ABS(coeffs1[i*size1])); } F_mpn_copy(coeffs_out+(poly->length-1)*size_out+1, coeffs1+(poly->length-1)*size1+1, ABS(coeffs1[(poly->length-1)*size1])); if (size_out > ABS(coeffs1[(poly->length-1)*size1])+1) F_mpn_clear(coeffs_out+(poly->length-1)*size_out+ABS(coeffs1[(poly->length-1)*size1])+1, size_out-ABS(coeffs1[(poly->length-1)*size1])-1); if (x < 0) mpn_divmod_1(coeffs_out, coeffs_out, size_out*poly->length, -x); else mpn_divmod_1(coeffs_out, coeffs_out, size_out*poly->length, x); for (unsigned long i = 0; i < poly->length; i++) { if (x < 0) coeffs_out[i*size_out] = -coeffs1[i*size1]; else coeffs_out[i*size_out] = coeffs1[i*size1]; NORM(coeffs_out+i*size_out); } } else { fmpz_t signs = (fmpz_t) flint_stack_alloc(poly->length); signs[0] = coeffs1[0]; coeffs_out[0] = 0; for (long i = 0; i < poly->length-1; i++) { signs[i+1] = coeffs1[(i+1)*size1]; F_mpn_clear(coeffs_out+i*size_out+ABS(signs[i])+1, size_out-ABS(signs[i])); } if (size_out > ABS(signs[poly->length-1])+1) F_mpn_clear(coeffs_out+(poly->length-1)*size_out+ABS(signs[poly->length-1])+1, size_out-ABS(signs[poly->length-1])-1); if (x < 0) mpn_divmod_1(coeffs_out, coeffs_out, size_out*poly->length, -x); else mpn_divmod_1(coeffs_out, coeffs_out, size_out*poly->length, x); for (long i = 0; i < poly->length; i++) { if (x < 0) coeffs_out[i*size_out] = -signs[i]; else coeffs_out[i*size_out] = signs[i]; NORM(coeffs_out+i*size_out); } flint_stack_release(); } } output->length = poly->length; } /* Does scalar division of a polynomial by a limb x. Rounding is done towards zero. */ void _fmpz_poly_scalar_tdiv_ui(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long x) { if (poly->length == 0) { output->length = 0; return; } unsigned long size_out = output->limbs+1; unsigned long size1 = poly->limbs+1; fmpz_t coeffs_out = output->coeffs; fmpz_t coeffs1 = poly->coeffs; if (poly->length > FLINT_POL_DIV_1_LENGTH) { unsigned long norm; mp_limb_t xinv; unsigned long xnorm; count_lead_zeros(norm, x); xnorm = (x<length; i++) { coeffs_out[i*size_out] = coeffs1[i*size1]; F_mpn_divrem_ui_precomp(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1]), x, xinv); NORM(coeffs_out+i*size_out); } } else { for (unsigned long i = 0; i < poly->length; i++) { coeffs_out[i*size_out] = coeffs1[i*size1]; mpn_divmod_1(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1]), x); NORM(coeffs_out+i*size_out); } } output->length = poly->length; _fmpz_poly_normalise(output); } /* Does scalar division of a polynomial by a limb x. Rounding is done towards minus infinity so that the remainder is positive. */ void _fmpz_poly_scalar_div_ui(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long x) { if (poly->length == 0) { output->length = 0; return; } unsigned long size_out = output->limbs+1; unsigned long size1 = poly->limbs+1; fmpz_t coeffs_out = output->coeffs; fmpz_t coeffs1 = poly->coeffs; mp_limb_t rem; if (poly->length > FLINT_POL_DIV_1_LENGTH) { unsigned long norm; mp_limb_t xinv; unsigned long xnorm; count_lead_zeros(norm, x); xnorm = (x<length; i++) { coeffs_out[i*size_out] = coeffs1[i*size1]; rem = F_mpn_divrem_ui_precomp(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1]), x, xinv); if (((long) coeffs_out[i*size_out] < 0L) && (rem)) { NORM(coeffs_out+i*size_out); fmpz_sub_ui_inplace(coeffs_out+i*size_out, 1UL); } else { NORM(coeffs_out+i*size_out); } } } else { for (unsigned long i = 0; i < poly->length; i++) { coeffs_out[i*size_out] = coeffs1[i*size1]; rem = mpn_divmod_1(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1]), x); if (((long) coeffs_out[i*size_out] < 0L) && (rem)) { NORM(coeffs_out+i*size_out); fmpz_sub_ui_inplace(coeffs_out+i*size_out, 1UL); } else { NORM(coeffs_out+i*size_out); } } } output->length = poly->length; _fmpz_poly_normalise(output); } /* Divide each coefficient by the signed scalar, rounding the quotient towards zero */ void _fmpz_poly_scalar_tdiv_si(fmpz_poly_t output, const fmpz_poly_t poly, const long scalar) { long x = scalar; if (poly->length == 0) { output->length = 0; return; } unsigned long size_out = output->limbs+1; unsigned long size1 = poly->limbs+1; fmpz_t coeffs_out = output->coeffs; fmpz_t coeffs1 = poly->coeffs; int sign = (x < 0); if (sign) x = -x; if (poly->length > FLINT_POL_DIV_1_LENGTH) { unsigned long norm; mp_limb_t xinv; unsigned long xnorm; count_lead_zeros(norm, (unsigned long) x); xnorm = ((unsigned long) x<length; i++) { if (sign) coeffs_out[i*size_out] = -coeffs1[i*size1]; else coeffs_out[i*size_out] = coeffs1[i*size1]; F_mpn_divrem_ui_precomp(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1]), x, xinv); NORM(coeffs_out+i*size_out); } } else { for (unsigned long i = 0; i < poly->length; i++) { if (sign) coeffs_out[i*size_out] = -coeffs1[i*size1]; else coeffs_out[i*size_out] = coeffs1[i*size1]; mpn_divmod_1(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1]), x); NORM(coeffs_out+i*size_out); } } output->length = poly->length; _fmpz_poly_normalise(output); } /* Divide each coefficient by the signed scalar, rounding the quotient towards minus infinity */ void _fmpz_poly_scalar_div_si(fmpz_poly_t output, const fmpz_poly_t poly, const long scalar) { long x = scalar; if (poly->length == 0) { output->length = 0; return; } unsigned long size_out = output->limbs+1; unsigned long size1 = poly->limbs+1; fmpz_t coeffs_out = output->coeffs; fmpz_t coeffs1 = poly->coeffs; int sign = (x < 0L); if (sign) x = -x; mp_limb_t rem; if (poly->length > FLINT_POL_DIV_1_LENGTH) { unsigned long norm; mp_limb_t xinv; unsigned long xnorm; count_lead_zeros(norm, (unsigned long) x); xnorm = ((unsigned long) x<length; i++) { if (sign) coeffs_out[i*size_out] = -coeffs1[i*size1]; else coeffs_out[i*size_out] = coeffs1[i*size1]; rem = F_mpn_divrem_ui_precomp(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1]), x, xinv); if (((long) coeffs_out[i*size_out] < 0L) && (rem)) { NORM(coeffs_out+i*size_out); fmpz_sub_ui_inplace(coeffs_out+i*size_out, 1UL); } else { NORM(coeffs_out+i*size_out); } } } else { for (unsigned long i = 0; i < poly->length; i++) { if (sign) coeffs_out[i*size_out] = -coeffs1[i*size1]; else coeffs_out[i*size_out] = coeffs1[i*size1]; rem = mpn_divmod_1(coeffs_out+i*size_out+1, coeffs1+i*size1+1, ABS(coeffs1[i*size1]), x); if (((long) coeffs_out[i*size_out] < 0L) && (rem)) { NORM(coeffs_out+i*size_out); fmpz_sub_ui_inplace(coeffs_out+i*size_out, 1UL); } else { NORM(coeffs_out+i*size_out); } } } output->length = poly->length; _fmpz_poly_normalise(output); } /* Divide each coefficient of poly by scalar. Rounds towards minus infinity. */ void _fmpz_poly_scalar_div_fmpz(fmpz_poly_t output, const fmpz_poly_t poly, const fmpz_t scalar) { if (scalar[0] == 1L) { _fmpz_poly_scalar_div_ui(output, poly, scalar[1]); return; } if ((scalar[0] == -1L) && (fmpz_bits(scalar) < FLINT_BITS)) { _fmpz_poly_scalar_div_si(output, poly, -scalar[1]); return; } if (poly == output) { fmpz_poly_t temp; fmpz_poly_init(temp); fmpz_poly_set(temp, poly); for (unsigned long i = 0; i < temp->length; i++) { fmpz_fdiv(output->coeffs+i*(output->limbs+1), temp->coeffs+i*(temp->limbs+1), scalar); } fmpz_poly_clear(temp); } else { for (unsigned long i = 0; i < poly->length; i++) { fmpz_fdiv(output->coeffs+i*(output->limbs+1), poly->coeffs+i*(poly->limbs+1), scalar); } } output->length = poly->length; _fmpz_poly_normalise(output); } /* Multiply two polynomials using the classical technique. Currently doesn't allow aliasing */ void _fmpz_poly_mul_classical(fmpz_poly_t output, const fmpz_poly_t poly1, const fmpz_poly_t poly2) { if ((poly1->length == 0) || (poly2->length == 0)) { _fmpz_poly_zero(output); return; } fmpz_poly_t input1, input2; if (output == poly1) { _fmpz_poly_stack_init(input1, poly1->length, poly1->limbs); _fmpz_poly_set(input1, poly1); if (output == poly2) { _fmpz_poly_attach(input2, input1); } else _fmpz_poly_attach(input2, poly2); } else if (output == poly2) { _fmpz_poly_stack_init(input2, poly2->length, poly2->limbs); _fmpz_poly_set(input2, poly2); _fmpz_poly_attach(input1, poly1); } else { _fmpz_poly_attach(input1, poly1); _fmpz_poly_attach(input2, poly2); } fmpz_t coeffs_out = output->coeffs; fmpz_t coeffs1, coeffs2; unsigned long len1, len2; unsigned long lenm1; coeffs1 = input1->coeffs; coeffs2 = input2->coeffs; len1 = input1->length; len2 = input2->length; // Special case if the length of both inputs is 1 if ((len1 == 1) && (len2 == 1)) { if ((coeffs1[0] == 0) || (coeffs2[0] == 0)) { coeffs_out[0] = 0; } else { fmpz_mul(coeffs_out, coeffs1, coeffs2); } } // Ordinary case else { unsigned long size_out = output->limbs+1; unsigned long size1, size2; size1 = input1->limbs+1; size2 = input2->limbs+1; lenm1 = input1->length-1; fmpz_t temp; long i, j; for (i = 0; i < len1; i++) { /* Set out[i] = in1[i]*in2[0] */ if ((coeffs1[i*size1] == 0) || (coeffs2[0] == 0)) { coeffs_out[i*size_out] = 0; } else { __fmpz_mul(coeffs_out+i*size_out, coeffs1+i*size1, coeffs2); } } for (i = 1; i < len2 - 1; i++) { /* Set out[i+in1->length-1] = in1[in1->length-1]*in2[i] */ if ((coeffs1[lenm1*size1] == 0) || (coeffs2[i*size2] == 0)) { coeffs_out[(i+lenm1)*size_out]=0; } else { __fmpz_mul(coeffs_out+(i+lenm1)*size_out, coeffs1+lenm1*size1, coeffs2+i*size2); } } /* The above coefficient multiplications overwrite the first limb of the next coefficient in each case, using the function __fmpz_mul. The final multiplication cannot do this however. */ if ((coeffs1[lenm1*size1] == 0) || (coeffs2[(len2-1)*size2] == 0)) { coeffs_out[(len2+lenm1-1)*size_out]=0; } else { fmpz_mul(coeffs_out+(len2+lenm1-1)*size_out, coeffs1+lenm1*size1, coeffs2+(len2-1)*size2); } for (i = 0; i < lenm1; i++) { for (j = 1; j < len2; j++) { /* out[i+j] += in1[i]*in2[j] */ if ((coeffs1[i*size1] != 0) && (coeffs2[j*size2] != 0)) { if (!coeffs_out[(i+j)*size_out]) { fmpz_mul(coeffs_out+(i+j)*size_out, coeffs1+i*size1, coeffs2+j*size2); } else { fmpz_addmul(coeffs_out+(i+j)*size_out, coeffs1+i*size1, coeffs2+j*size2); } } } } } output->length = len1 + len2 - 1; if (poly1 == output) _fmpz_poly_stack_clear(input1); else if (poly2 == output) _fmpz_poly_stack_clear(input2); } /* Multiply two polynomials using the classical technique truncating the result to trunc terms. Currently doesn't allow aliasing */ void _fmpz_poly_mul_classical_trunc(fmpz_poly_t output, const fmpz_poly_t poly1, const fmpz_poly_t poly2, const unsigned long trunc) { fmpz_t coeffs_out = output->coeffs; unsigned long size_out = output->limbs+1; fmpz_t coeffs1, coeffs2; unsigned long size1, size2; unsigned long len1, len2; unsigned long lenm1; if (trunc == 0) { _fmpz_poly_zero(output); return; } if ((poly1->length == 0) || (poly2->length == 0)) { for (unsigned long i = 0; i < trunc; i++) { coeffs_out[i*size_out] = 0; } _fmpz_poly_zero(output); return; } fmpz_poly_t input1, input2; if (output == poly1) { _fmpz_poly_stack_init(input1, poly1->length, poly1->limbs); _fmpz_poly_set(input1, poly1); if (output == poly2) { _fmpz_poly_attach(input2, input1); } else _fmpz_poly_attach(input2, poly2); } else if (output == poly2) { _fmpz_poly_stack_init(input2, poly2->length, poly2->limbs); _fmpz_poly_set(input2, poly2); _fmpz_poly_attach(input1, poly1); } else { _fmpz_poly_attach(input1, poly1); _fmpz_poly_attach(input2, poly2); } coeffs1 = input1->coeffs; coeffs2 = input2->coeffs; size1 = input1->limbs+1; size2 = input2->limbs+1; lenm1 = input1->length-1; len1 = input1->length; len2 = input2->length; long i, j; fmpz_t temp; // Special case if the length of both inputs is 1 if ((len1 == 1) && (len2 == 1)) { if ((coeffs1[0] == 0) || (coeffs2[0] == 0)) { coeffs_out[0] = 0; } else { fmpz_mul(coeffs_out, coeffs1, coeffs2); } } // Ordinay case else { for (i = 0; (i < len1) && (i < trunc - 1); i++) { /* Set out[i] = in1[i]*in2[0] */ if ((coeffs1[i*size1] == 0) || (coeffs2[0] == 0)) { coeffs_out[i*size_out] = 0; } else { __fmpz_mul(coeffs_out+i*size_out, coeffs1+i*size1, coeffs2); } } if (i != len1) { if ((coeffs1[i*size1] == 0) || (coeffs2[0] == 0)) { coeffs_out[i*size_out] = 0; } else { fmpz_mul(coeffs_out+i*size_out, coeffs1+i*size1, coeffs2); } } else { for (i = 1; (i < len2 - 1) && (i + lenm1 < trunc - 1); i++) { /* Set out[i+in1->length-1] = in1[in1->length-1]*in2[i] */ if ((coeffs1[lenm1*size1] == 0) || (coeffs2[i*size2] == 0)) { coeffs_out[(i+lenm1)*size_out] = 0; } else { __fmpz_mul(coeffs_out+(i+lenm1)*size_out, coeffs1+lenm1*size1, coeffs2+i*size2); } } /* The above coefficient multiplications overwrite the first limb of the next coefficient in each case, using the function __fmpz_mul. The final multiplication cannot do this however. */ if ((coeffs1[lenm1*size1] == 0) || (coeffs2[i*size2] == 0)) { coeffs_out[(i+lenm1)*size_out] = 0; } else { fmpz_mul(coeffs_out+(i+lenm1)*size_out, coeffs1+lenm1*size1, coeffs2+i*size2); } } for (i = 0; i < lenm1; i++) { for (j = 1; (j < len2) && (i + j < trunc); j++) { /* out[i+j] += in1[i]*in2[j] */ if ((coeffs1[i*size1] != 0) && (coeffs2[j*size2] != 0)) { if (!coeffs_out[(i+j)*size_out]) { fmpz_mul(coeffs_out+(i+j)*size_out, coeffs1+i*size1, coeffs2+j*size2); } else { fmpz_addmul(coeffs_out+(i+j)*size_out, coeffs1+i*size1, coeffs2+j*size2); } } } } } output->length = FLINT_MIN(len1 + len2 - 1, trunc); _fmpz_poly_normalise(output); if (poly1 == output) _fmpz_poly_stack_clear(input1); else if (poly2 == output) _fmpz_poly_stack_clear(input2); } /* Multiply two polynomials using the classical technique truncating the result so that the first trunc terms are zero. Currently doesn't allow aliasing */ void _fmpz_poly_mul_classical_trunc_left(fmpz_poly_t output, const fmpz_poly_t poly1, const fmpz_poly_t poly2, const unsigned long trunc) { fmpz_t coeffs_out = output->coeffs; unsigned long size_out = output->limbs+1; fmpz_t coeffs1, coeffs2; unsigned long size1, size2; unsigned long len1, len2; unsigned long lenm1; if ((poly1->length == 0) || (poly2->length == 0) || (trunc >= poly1->length + poly2->length - 1)) { for (long i = 0; i < (long) (poly1->length + poly2->length - 1); i++) { coeffs_out[i*size_out] = 0L; } _fmpz_poly_zero(output); return; } fmpz_poly_t input1, input2; if (output == poly1) { _fmpz_poly_stack_init(input1, poly1->length, poly1->limbs); _fmpz_poly_set(input1, poly1); if (output == poly2) { _fmpz_poly_attach(input2, input1); } else _fmpz_poly_attach(input2, poly2); } else if (output == poly2) { _fmpz_poly_stack_init(input2, poly2->length, poly2->limbs); _fmpz_poly_set(input2, poly2); _fmpz_poly_attach(input1, poly1); } else { _fmpz_poly_attach(input1, poly1); _fmpz_poly_attach(input2, poly2); } coeffs1 = input1->coeffs; coeffs2 = input2->coeffs; size1 = input1->limbs+1; size2 = input2->limbs+1; lenm1 = input1->length-1; len1 = input1->length; len2 = input2->length; long i, j; fmpz_t temp; // Special case if the length of both inputs is 1 if ((len1 == 1) && (len2 == 1)) { if ((coeffs1[0] == 0) || (coeffs2[0] == 0)) { coeffs_out[0] = 0; } else { fmpz_mul(coeffs_out, coeffs1, coeffs2); } } // Ordinay case else { for (i = trunc; (i < len1); i++) { /* Set out[i] = in1[i]*in2[0] */ if ((coeffs1[i*size1] == 0) || (coeffs2[0] == 0)) { coeffs_out[i*size_out] = 0; } else { __fmpz_mul(coeffs_out+i*size_out, coeffs1+i*size1, coeffs2); } } for (i = 1; i < len2 - 1; i++) { if (i + lenm1 >= trunc) { /* Set out[i+in1->length-1] = in1[in1->length-1]*in2[i] */ if ((coeffs1[lenm1*size1] == 0) || (coeffs2[i*size2] == 0)) { coeffs_out[(i+lenm1)*size_out] = 0; } else { __fmpz_mul(coeffs_out+(i+lenm1)*size_out, coeffs1+lenm1*size1, coeffs2+i*size2); } } } if (len2 == 1) i = 0; /* The above coefficient multiplications overwrite the first limb of the next coefficient in each case, using the function __fmpz_mul. The final multiplication cannot do this however. */ if ((coeffs1[lenm1*size1] == 0) || (coeffs2[i*size2] == 0)) { coeffs_out[(i+lenm1)*size_out] = 0; } else { fmpz_mul(coeffs_out+(i+lenm1)*size_out, coeffs1+lenm1*size1, coeffs2+i*size2); } for (i = 0; i < lenm1; i++) { for (j = 1; j < len2; j++) { /* out[i+j] += in1[i]*in2[j] */ if ((coeffs1[i*size1] != 0) && (coeffs2[j*size2] != 0) && (i + j >= trunc)) { if (!coeffs_out[(i+j)*size_out]) { fmpz_mul(coeffs_out+(i+j)*size_out, coeffs1+i*size1, coeffs2+j*size2); } else { fmpz_addmul(coeffs_out+(i+j)*size_out, coeffs1+i*size1, coeffs2+j*size2); } } } } } for (i = 0; (i < trunc) && (i < len1 + len2 - 1); i++) { coeffs_out[i*size_out] = 0; } output->length = len1 + len2 - 1; if (trunc >= output->length) _fmpz_poly_normalise(output); if (poly1 == output) _fmpz_poly_stack_clear(input1); else if (poly2 == output) _fmpz_poly_stack_clear(input2); } void __fmpz_poly_karamul_recursive(fmpz_poly_t res, const fmpz_poly_t a, const fmpz_poly_t b, fmpz_poly_t scratch, fmpz_poly_t scratchb, const unsigned long crossover) { fmpz_poly_t temp; if ((crossover < 4) && (a->length == 2 && b->length == 2)) { const unsigned long asize = a->limbs+1; const unsigned long bsize = b->limbs+1; const unsigned long rsize = res->limbs+1; const unsigned long ssize = scratchb->limbs+1; __fmpz_mul(res->coeffs, a->coeffs, b->coeffs); fmpz_add(scratchb->coeffs, a->coeffs, a->coeffs+asize); fmpz_mul(res->coeffs+2*rsize, a->coeffs+asize, b->coeffs+bsize); fmpz_add(scratchb->coeffs+ssize, b->coeffs, b->coeffs+bsize); fmpz_mul(res->coeffs+rsize, scratchb->coeffs, scratchb->coeffs+ssize); fmpz_sub(res->coeffs+rsize, res->coeffs+rsize, res->coeffs); fmpz_sub(res->coeffs+rsize, res->coeffs+rsize, res->coeffs+2*rsize); res->length = a->length + b->length - 1; return; } if ((a->length+b->length <= crossover) || (a->length <= 1) || (b->length <= 1) || ((a->length == 2) || (b->length == 2))) { _fmpz_poly_mul_classical(res, a, b); return; } fmpz_poly_t a1,a2,b1,b2; unsigned long l2 = 0; a1->length = (a->length+1)/2; a2->length = a->length-a1->length; a1->coeffs = a->coeffs; a2->coeffs = a->coeffs+a1->length*(a->limbs+1); a1->limbs = a->limbs; a2->limbs = a->limbs; if (a1->length < b->length) //ordinary case { /* (a1+a2*x)*(b1+b2*x) = a1*b1 + a2*b2*x^2 + (a1+a2)*(b1+b2)*x-a1*b1*x-a2*b2*x; */ b1->length = a1->length; b2->length = b->length - b1->length; b1->coeffs = b->coeffs; b2->coeffs = b->coeffs + b1->length*(b->limbs+1); b1->limbs = b->limbs; b2->limbs = b->limbs; /* from 0 for 2 * a1->length - 1, from 2 * a1->length for a2->length + b2->length - 1 will be written directly to, so we need to clean the coefficient in between */ res->coeffs[((a1->length<<1)-1)*(res->limbs+1)] = 0; fmpz_poly_t asum, bsum, prodsum, scratch2, scratch3; asum->length = a1->length; asum->coeffs = scratchb->coeffs; asum->limbs = scratchb->limbs; bsum->length = a1->length; bsum->coeffs = scratchb->coeffs + a1->length*(scratchb->limbs+1); bsum->limbs = scratchb->limbs; prodsum->length = (a1->length<<1)-1; prodsum->coeffs = scratch->coeffs;// + (a1->length<<1)*(scratch->limbs+1); prodsum->limbs = scratch->limbs; // res_lo = a1*b1 __fmpz_poly_karamul_recursive(res, a1, b1, scratch, scratchb, crossover); // res_hi = a2*b2 temp->coeffs = res->coeffs+(a1->length<<1)*(res->limbs+1); temp->limbs = res->limbs; __fmpz_poly_karamul_recursive(temp, a2, b2, scratch, scratchb, crossover); // asum = a1+a2 _fmpz_poly_add(asum, a1, a2); // bsum = b1+b2 _fmpz_poly_add(bsum, b1, b2); // prodsum = asum*bsum scratch3->coeffs = scratchb->coeffs+(a1->length<<1)*(scratchb->limbs+1); scratch3->limbs = scratchb->limbs; scratch2->limbs = scratch->limbs; scratch2->coeffs = scratch->coeffs+((a1->length<<1)-1)*(scratch->limbs+1); if (asum->length > bsum->length) __fmpz_poly_karamul_recursive(prodsum, asum, bsum, scratch2, scratch3, crossover); else __fmpz_poly_karamul_recursive(prodsum, bsum, asum, scratch2, scratch3, crossover); for (long i = prodsum->length; i < (a1->length<<1)-1; i++) prodsum->coeffs[i*(prodsum->limbs+1)] = 0L; // prodsum = prodsum - res_lo temp->coeffs = res->coeffs; temp->length = (a1->length<<1)-1; _fmpz_poly_sub(prodsum, prodsum, temp); // prodsum = prodsum - res_hi temp->coeffs = res->coeffs + (a1->length<<1)*(res->limbs+1); temp->length = a2->length+b2->length-1; _fmpz_poly_sub(prodsum, prodsum, temp); // res_mid += prodsum temp->coeffs = res->coeffs + a1->length*(res->limbs+1); temp->length = prodsum->length; _fmpz_poly_add(temp, temp, prodsum); res->length = a->length + b->length - 1; } else { fmpz_poly_t scratch2, temp1; while ((1<length) l2++; if ((1<length) a1->length = (1<length = a->length-a1->length; a1->coeffs = a->coeffs; a2->coeffs = a->coeffs+a1->length*(a->limbs+1); /* The first a1->length + b->length - 1 coefficients will be written to directly, so we need to clean the remaining coefficients */ for (unsigned long i = a1->length + b->length - 1; i < a->length + b->length - 1; i++) res->coeffs[i*(res->limbs+1)] = 0L; // res_lo = a1*b __fmpz_poly_karamul_recursive(res, a1, b, scratch, scratchb, crossover); //temp = a2*b temp->coeffs = scratch->coeffs; temp->length = a2->length + b->length - 1; temp->limbs = scratch->limbs; scratch2->coeffs = scratch->coeffs+temp->length*(scratch->limbs+1); scratch2->limbs = scratch->limbs; if (b->length <= a2->length) __fmpz_poly_karamul_recursive(temp, a2, b, scratch2, scratchb, crossover); else __fmpz_poly_karamul_recursive(temp, b, a2, scratch2, scratchb, crossover); // res_mid += temp temp1->coeffs = res->coeffs+a1->length*(res->limbs+1); temp1->length = temp->length; temp1->limbs = res->limbs; _fmpz_poly_add(temp1, temp1, temp); res->length = a->length + b->length - 1; } } void _fmpz_poly_mul_karatsuba(fmpz_poly_t output, const fmpz_poly_t poly1, const fmpz_poly_t poly2) { if ((poly1->length == 0) || (poly2->length == 0)) { _fmpz_poly_zero(output); return; } unsigned long limbs = output->limbs; unsigned long log_length = 0; unsigned long crossover; fmpz_poly_t input1, input2; if (output == poly1) { _fmpz_poly_stack_init(input1, poly1->length, poly1->limbs); _fmpz_poly_set(input1, poly1); if (output == poly2) { _fmpz_poly_attach(input2, input1); } else _fmpz_poly_attach(input2, poly2); } else if (output == poly2) { _fmpz_poly_stack_init(input2, poly2->length, poly2->limbs); _fmpz_poly_set(input2, poly2); _fmpz_poly_attach(input1, poly1); } else { _fmpz_poly_attach(input1, poly1); _fmpz_poly_attach(input2, poly2); } fmpz_poly_t scratch, scratchb, temp; scratch->coeffs = (fmpz_t) flint_stack_alloc(5*FLINT_MAX(input1->length,input2->length)*(limbs+1)); scratch->limbs = limbs + 1; scratchb->limbs = FLINT_MAX(input1->limbs, input2->limbs) + 1; scratchb->coeffs = (fmpz_t) flint_stack_alloc(5*FLINT_MAX(input1->length, input2->length)*(scratchb->limbs+1)); if (_fmpz_poly_max_limbs(input1) + _fmpz_poly_max_limbs(input2) >= 19) crossover = 0; else crossover = 19 - _fmpz_poly_max_limbs(input1) - _fmpz_poly_max_limbs(input2); if (input1->length >= input2->length) __fmpz_poly_karamul_recursive(output, input1, input2, scratch, scratchb, crossover); else __fmpz_poly_karamul_recursive(output, input2, input1, scratch, scratchb, crossover); flint_stack_release(); flint_stack_release(); if (poly1 == output) _fmpz_poly_stack_clear(input1); else if (poly2 == output) _fmpz_poly_stack_clear(input2); } void __fmpz_poly_karatrunc_recursive(fmpz_poly_t res, const fmpz_poly_t a, const fmpz_poly_t b, fmpz_poly_t scratch, fmpz_poly_t scratchb, const unsigned long crossover, const unsigned long trunc) { fmpz_poly_t temp, temp2; if ((a->length <= 1) || (b->length <= 1)) { unsigned long trunc_next = FLINT_MIN(trunc, a->length + b->length - 1); _fmpz_poly_mul_classical_trunc(res, a, b, trunc_next); res->length = FLINT_MIN(a->length+b->length-1, trunc_next); return; } if (((a->length == 2 && b->length == 2) && (crossover < 4)) || (trunc == 1)) { const unsigned long asize = a->limbs+1; const unsigned long bsize = b->limbs+1; const unsigned long rsize = res->limbs+1; const unsigned long ssize = scratchb->limbs+1; if (trunc > 1) { __fmpz_mul(res->coeffs, a->coeffs, b->coeffs); fmpz_add(scratchb->coeffs, a->coeffs, a->coeffs+asize); fmpz_add(scratchb->coeffs+ssize, b->coeffs, b->coeffs+bsize); if (trunc > 2) fmpz_mul(res->coeffs+2*rsize, a->coeffs+asize, b->coeffs+bsize); else fmpz_mul(scratch->coeffs, a->coeffs+asize, b->coeffs+bsize); fmpz_mul(res->coeffs+rsize, scratchb->coeffs, scratchb->coeffs+ssize); fmpz_sub(res->coeffs+rsize, res->coeffs+rsize, res->coeffs); if (trunc > 2) fmpz_sub(res->coeffs+rsize, res->coeffs+rsize, res->coeffs+2*rsize); else fmpz_sub(res->coeffs+rsize, res->coeffs+rsize, scratch->coeffs); } else { fmpz_mul(res->coeffs, a->coeffs, b->coeffs); } res->length = FLINT_MIN(a->length + b->length - 1, trunc); return; } if ((a->length+b->length <= crossover) || ((a->length == 2) && (b->length == 2))) { unsigned long trunc_next = FLINT_MIN(trunc, a->length + b->length - 1); _fmpz_poly_mul_classical_trunc(res, a, b, trunc_next); res->length = FLINT_MIN(a->length+b->length-1, trunc_next); return; } fmpz_poly_t a1, a2, b1, b2; unsigned long l2 = 0; unsigned long sa = FLINT_MIN(a->length, trunc); unsigned long sb = FLINT_MIN(b->length, trunc); unsigned long old_length; a1->length = (sa+1)/2; a2->length = sa-a1->length; a1->coeffs = a->coeffs; a2->coeffs = a->coeffs+a1->length*(a->limbs+1); a1->limbs = a->limbs; a2->limbs = a->limbs; if (a1->length < sb) //ordinary case { /* (a1+a2*x)*(b1+b2*x) = a1*b1 + a2*b2*x^2 + (a1+a2)*(b1+b2)*x-a1*b1*x-a2*b2*x; */ b1->length = a1->length; b2->length = sb - b1->length; b1->coeffs = b->coeffs; b2->coeffs = b->coeffs + b1->length*(b->limbs+1); b1->limbs = b->limbs; b2->limbs = b->limbs; /* from 0 for 2 * a1->length - 1, from 2 * a1->length for a2->length + b2->length - 1 will be written directly to, so we need to clean the coefficient in between */ if ((a1->length<<1)-1 < trunc) res->coeffs[((a1->length<<1)-1)*(res->limbs+1)] = 0; fmpz_poly_t asum, bsum, prodsum, scratch2, scratch3; asum->length = a1->length; asum->coeffs = scratchb->coeffs; asum->limbs = scratchb->limbs; bsum->length = a1->length; bsum->coeffs = scratchb->coeffs + a1->length*(scratchb->limbs+1); bsum->limbs = scratchb->limbs; prodsum->length = (a1->length<<1)-1; prodsum->coeffs = scratch->coeffs+(a2->length+b2->length-1)*(scratch->limbs+1); prodsum->limbs = scratch->limbs; // res_lo = a1*b1 __fmpz_poly_karatrunc_recursive(res, a1, b1, scratch, scratchb, crossover, trunc); // res_hi = a2*b2 temp->coeffs = scratch->coeffs; temp->limbs = scratch->limbs; scratch2->limbs = scratch->limbs; scratch2->coeffs = scratch->coeffs + (a2->length+b2->length-1)*(scratch->limbs+1); __fmpz_poly_karatrunc_recursive(temp, a2, b2, scratch2, scratchb, crossover, trunc - a1->length); temp2->limbs = res->limbs; if (trunc > (a1->length<<1)) { old_length = temp->length; temp->length = FLINT_MIN(old_length, trunc-(a1->length<<1)); temp2->coeffs = res->coeffs+(a1->length<<1)*(res->limbs+1); _fmpz_poly_set(temp2, temp); temp->length = old_length; } // asum = a1+a2 _fmpz_poly_add(asum, a1, a2); // bsum = b1+b2 _fmpz_poly_add(bsum, b1, b2); // prodsum = asum*bsum scratch3->coeffs = scratchb->coeffs+(a1->length<<1)*(scratchb->limbs+1); scratch3->limbs = scratchb->limbs; scratch2->coeffs = scratch->coeffs + (sa+sb-2)*(scratch->limbs+1); if (asum->length > bsum->length) __fmpz_poly_karatrunc_recursive(prodsum, asum, bsum, scratch2, scratch3, crossover, trunc - a1->length); else __fmpz_poly_karatrunc_recursive(prodsum, bsum, asum, scratch2, scratch3, crossover, trunc - a1->length); for (long i = prodsum->length; i < trunc - a1->length; i++) prodsum->coeffs[i*(prodsum->limbs+1)] = 0L; // prodsum = prodsum - res_lo temp2->coeffs = res->coeffs; temp2->length = (a1->length<<1)-1; _fmpz_poly_sub(prodsum, prodsum, temp2); // prodsum = prodsum - res_hi _fmpz_poly_sub(prodsum, prodsum, temp); // res_mid += prodsum prodsum->length = FLINT_MIN(prodsum->length, trunc - a1->length); temp2->coeffs = res->coeffs + a1->length*(res->limbs+1); temp2->length = prodsum->length; _fmpz_poly_add(temp2, temp2, prodsum); res->length = FLINT_MIN(a->length + b->length - 1, trunc); } else { fmpz_poly_t scratch2, temp1; while ((1<length) l2++; if ((1<length = (1<length = sa - a1->length; a1->coeffs = a->coeffs; a2->coeffs = a->coeffs+a1->length*(a->limbs+1); /* The first a1->length + b->length - 1 coefficients will be written to directly, so we need to clean the remaining coefficients */ if (trunc > a1->length + sb - 1) for (unsigned long i = a1->length + sb - 1; i < FLINT_MIN(sa + sb - 1, trunc); i++) res->coeffs[i*(res->limbs+1)] = 0; temp->coeffs = b->coeffs; temp->limbs = b->limbs; temp->length = sb; // res_lo = a1*b if (sb <= a1->length) __fmpz_poly_karatrunc_recursive(res, a1, temp, scratch, scratchb, crossover, trunc); else __fmpz_poly_karatrunc_recursive(res, temp, a1, scratch, scratchb, crossover, trunc); //temp2 = a2*b temp2->coeffs = scratch->coeffs; temp2->length = FLINT_MIN(a2->length + sb - 1, trunc - a1->length); temp2->limbs = scratch->limbs; scratch2->coeffs = scratch->coeffs+temp2->length*(scratch->limbs+1); scratch2->limbs = scratch->limbs; if (sb <= a2->length) __fmpz_poly_karatrunc_recursive(temp2, a2, temp, scratch2, scratchb, crossover, trunc - a1->length); else __fmpz_poly_karatrunc_recursive(temp2, temp, a2, scratch2, scratchb, crossover, trunc - a1->length); // res_mid += 2temp temp1->coeffs = res->coeffs+a1->length*(res->limbs+1); temp1->length = FLINT_MIN(temp2->length, trunc - a1->length); temp1->limbs = res->limbs; _fmpz_poly_add(temp1, temp1, temp2); res->length = FLINT_MIN(sa + sb - 1, trunc); } } void _fmpz_poly_mul_karatsuba_trunc(fmpz_poly_t output, const fmpz_poly_t poly1, const fmpz_poly_t poly2, const unsigned long trunc) { if ((poly1->length == 0) || (poly2->length == 0) || (trunc == 0)) { _fmpz_poly_zero(output); return; } unsigned long limbs = output->limbs; unsigned long log_length = 0; unsigned long crossover; fmpz_poly_t input1, input2; if (output == poly1) { _fmpz_poly_stack_init(input1, poly1->length, poly1->limbs); _fmpz_poly_set(input1, poly1); if (output == poly2) { _fmpz_poly_attach(input2, input1); } else _fmpz_poly_attach(input2, poly2); } else if (output == poly2) { _fmpz_poly_stack_init(input2, poly2->length, poly2->limbs); _fmpz_poly_set(input2, poly2); _fmpz_poly_attach(input1, poly1); } else { _fmpz_poly_attach(input1, poly1); _fmpz_poly_attach(input2, poly2); } fmpz_poly_t scratch, scratchb, temp; scratch->coeffs = (fmpz_t) flint_stack_alloc(6*FLINT_MAX(input1->length,input2->length)*(limbs+1)); scratch->limbs = limbs+1; scratchb->limbs = FLINT_MAX(input1->limbs,input2->limbs)+1; scratchb->coeffs = (fmpz_t) flint_stack_alloc(6*FLINT_MAX(input1->length,input2->length)*(scratchb->limbs+1)); if (_fmpz_poly_max_limbs(input1) + _fmpz_poly_max_limbs(input2) >= 19) crossover = 0; else crossover = 19 - _fmpz_poly_max_limbs(input1) - _fmpz_poly_max_limbs(input2); if (input1->length >= input2->length) __fmpz_poly_karatrunc_recursive(output, input1, input2, scratch, scratchb, crossover, trunc); else __fmpz_poly_karatrunc_recursive(output, input2, input1, scratch, scratchb, crossover, trunc); flint_stack_release(); flint_stack_release(); _fmpz_poly_normalise(output); if (poly1 == output) _fmpz_poly_stack_clear(input1); else if (poly2 == output) _fmpz_poly_stack_clear(input2); } void __fmpz_poly_karatrunc_left_recursive(fmpz_poly_t res, const fmpz_poly_t a, const fmpz_poly_t b, fmpz_poly_t scratch, fmpz_poly_t scratchb, const unsigned long crossover, const unsigned long trunc) { fmpz_poly_t temp, temp2; long non_zero = a->length + b->length - trunc - 1; if (non_zero <= 0) { for (long i = 0; i < (long) (a->length + b->length - 1); i++) { res->coeffs[i*(res->limbs+1)] = 0; } res->length = 0; return; } if ((a->length <= 1) || (b->length <= 1) || (non_zero == 1)) { _fmpz_poly_mul_classical_trunc_left(res, a, b, trunc); return; } if ((a->length == 2 && b->length == 2) && (crossover < 4) && (!trunc)) { const unsigned long asize = a->limbs+1; const unsigned long bsize = b->limbs+1; const unsigned long rsize = res->limbs+1; const unsigned long ssize = scratchb->limbs+1; __fmpz_mul(res->coeffs, a->coeffs, b->coeffs); fmpz_add(scratchb->coeffs, a->coeffs, a->coeffs+asize); fmpz_add(scratchb->coeffs+ssize, b->coeffs, b->coeffs+bsize); fmpz_mul(res->coeffs+2*rsize, a->coeffs+asize, b->coeffs+bsize); fmpz_mul(res->coeffs+rsize, scratchb->coeffs, scratchb->coeffs+ssize); fmpz_sub(res->coeffs+rsize, res->coeffs+rsize, res->coeffs); fmpz_sub(res->coeffs+rsize, res->coeffs+rsize, res->coeffs+2*rsize); res->length = a->length + b->length - 1; return; } if ((a->length+b->length <= crossover) || ((a->length == 2) && (b->length == 2))) { _fmpz_poly_mul_classical_trunc_left(res, a, b, trunc); return; } fmpz_poly_t a1, a2, b1, b2; unsigned long l2 = 0; unsigned long old_length; a1->length = (a->length+1)/2; a2->length = a->length-a1->length; a1->coeffs = a->coeffs; a2->coeffs = a->coeffs+a1->length*(a->limbs+1); a1->limbs = a->limbs; a2->limbs = a->limbs; if (a1->length < b->length) //ordinary case { /* (a1+a2*x)*(b1+b2*x) = a1*b1 + a2*b2*x^2 + (a1+a2)*(b1+b2)*x-a1*b1*x-a2*b2*x; */ b1->length = a1->length; b2->length = b->length - b1->length; b1->coeffs = b->coeffs; b2->coeffs = b->coeffs + b1->length*(b->limbs+1); b1->limbs = b->limbs; b2->limbs = b->limbs; /* from 0 for 2 * a1->length - 1, from 2 * a1->length for a2->length + b2->length - 1 will be written directly to, so we need to clean the coefficient in between */ res->coeffs[((a1->length<<1)-1)*(res->limbs+1)] = 0; fmpz_poly_t asum, bsum, prodsum, scratch2, scratch3; asum->length = a1->length; asum->coeffs = scratchb->coeffs; asum->limbs = scratchb->limbs; bsum->length = a1->length; bsum->coeffs = scratchb->coeffs + a1->length*(scratchb->limbs+1); bsum->limbs = scratchb->limbs; prodsum->length = (a1->length<<1)-1; prodsum->coeffs = scratch->coeffs; prodsum->limbs = scratch->limbs; /* (a1+a2*x)*(b1+b2*x) = a1*b1 + a2*b2*x^2 + (a1+a2)*(b1+b2)*x-a1*b1*x-a2*b2*x; */ // res_lo = a1*b1 if (trunc > a1->length) __fmpz_poly_karatrunc_left_recursive(res, a1, b1, scratch, scratchb, crossover, trunc - a1->length); else __fmpz_poly_karatrunc_left_recursive(res, a1, b1, scratch, scratchb, crossover, 0); // res_hi = a2*b2 temp->coeffs = res->coeffs+(a1->length<<1)*(res->limbs+1); temp->limbs = res->limbs; if (trunc > a1->length*2) __fmpz_poly_karatrunc_left_recursive(temp, a2, b2, scratch, scratchb, crossover, trunc - a1->length*2); else __fmpz_poly_karatrunc_left_recursive(temp, a2, b2, scratch, scratchb, crossover, 0); if (trunc < 3*a1->length - 1) { // asum = a1+a2 _fmpz_poly_add(asum, a1, a2); // bsum = b1+b2 _fmpz_poly_add(bsum, b1, b2); // prodsum = asum*bsum scratch3->coeffs = scratchb->coeffs+(a1->length<<1)*(scratchb->limbs+1); scratch3->limbs = scratchb->limbs; scratch2->limbs = scratch->limbs; scratch2->coeffs = scratch->coeffs+((a1->length<<1)-1)*(scratch->limbs+1); if (trunc > a1->length) { if (asum->length > bsum->length) __fmpz_poly_karatrunc_left_recursive(prodsum, asum, bsum, scratch2, scratch3, crossover, trunc - a1->length); else __fmpz_poly_karatrunc_left_recursive(prodsum, bsum, asum, scratch2, scratch3, crossover, trunc - a1->length); } else { if (asum->length > bsum->length) __fmpz_poly_karatrunc_left_recursive(prodsum, asum, bsum, scratch2, scratch3, crossover, 0); else __fmpz_poly_karatrunc_left_recursive(prodsum, bsum, asum, scratch2, scratch3, crossover, 0); } for (long i = prodsum->length; i < (a1->length<<1)-1; i++) prodsum->coeffs[i*(prodsum->limbs+1)] = 0L; // prodsum = prodsum - res_lo temp->coeffs = res->coeffs; temp->length = (a1->length<<1)-1; _fmpz_poly_sub(prodsum, prodsum, temp); // prodsum = prodsum - res_hi temp->coeffs = res->coeffs + (a1->length<<1)*(res->limbs+1); temp->length = a2->length+b2->length-1; _fmpz_poly_sub(prodsum, prodsum, temp); // res_mid += prodsum temp->coeffs = res->coeffs + a1->length*(res->limbs+1); temp->length = prodsum->length; _fmpz_poly_add(temp, temp, prodsum); } res->length = a->length + b->length - 1; } else { fmpz_poly_t scratch2, temp1; while ((1<length) l2++; if ((1<length) a1->length = (1<length = a->length-a1->length; a1->coeffs = a->coeffs; a2->coeffs = a->coeffs+a1->length*(a->limbs+1); /* The first a1->length + b->length - 1 coefficients will be written to directly, so we need to clean the remaining coefficients */ for (unsigned long i = 0; i < a->length + b->length - 1; i++) // <- Bug (should only need a1->length + b->length to a->length+b->length-1 res->coeffs[i*(res->limbs+1)] = 0L; // res_lo = a1*b if (trunc < a1->length + b->length - 1) __fmpz_poly_karatrunc_left_recursive(res, a1, b, scratch, scratchb, crossover, trunc); //temp = a2*b temp->coeffs = scratch->coeffs; temp->length = a2->length + b->length - 1; temp->limbs = scratch->limbs; scratch2->coeffs = scratch->coeffs+temp->length*(scratch->limbs+1); scratch2->limbs = scratch->limbs; if (trunc > a1->length) { if (b->length <= a2->length) __fmpz_poly_karatrunc_left_recursive(temp, a2, b, scratch2, scratchb, crossover, trunc - a1->length); else __fmpz_poly_karatrunc_left_recursive(temp, b, a2, scratch2, scratchb, crossover, trunc - a1->length); } else { if (b->length <= a2->length) __fmpz_poly_karatrunc_left_recursive(temp, a2, b, scratch2, scratchb, crossover, 0); else __fmpz_poly_karatrunc_left_recursive(temp, b, a2, scratch2, scratchb, crossover, 0); } // res_mid += temp temp1->coeffs = res->coeffs+a1->length*(res->limbs+1); temp1->length = temp->length; temp1->limbs = res->limbs; _fmpz_poly_add(temp1, temp1, temp); res->length = a->length + b->length - 1; } for (unsigned long i = 0; i < trunc; i++) { res->coeffs[i*(res->limbs+1)] = 0L; } } void _fmpz_poly_mul_karatsuba_trunc_left(fmpz_poly_t output, const fmpz_poly_t poly1, const fmpz_poly_t poly2, const unsigned long trunc) { if ((poly1->length == 0) || (poly2->length == 0)) { _fmpz_poly_zero(output); return; } unsigned long limbs = output->limbs; unsigned long log_length = 0; unsigned long crossover; fmpz_poly_t input1, input2; if (output == poly1) { _fmpz_poly_stack_init(input1, poly1->length, poly1->limbs); _fmpz_poly_set(input1, poly1); if (output == poly2) { _fmpz_poly_attach(input2, input1); } else _fmpz_poly_attach(input2, poly2); } else if (output == poly2) { _fmpz_poly_stack_init(input2, poly2->length, poly2->limbs); _fmpz_poly_set(input2, poly2); _fmpz_poly_attach(input1, poly1); } else { _fmpz_poly_attach(input1, poly1); _fmpz_poly_attach(input2, poly2); } fmpz_poly_t scratch, scratchb, temp; scratch->coeffs = (fmpz_t) flint_stack_alloc(5*FLINT_MAX(input1->length,input2->length)*(limbs+1)); scratch->limbs = limbs + 1; scratchb->limbs = FLINT_MAX(input1->limbs, input2->limbs) + 1; scratchb->coeffs = (fmpz_t) flint_stack_alloc(5*FLINT_MAX(input1->length, input2->length)*(scratchb->limbs+1)); if (_fmpz_poly_max_limbs(input1) + _fmpz_poly_max_limbs(input2) >= 19) crossover = 0; else crossover = 19 - _fmpz_poly_max_limbs(input1) - _fmpz_poly_max_limbs(input2); if (input1->length >= input2->length) __fmpz_poly_karatrunc_left_recursive(output, input1, input2, scratch, scratchb, crossover, trunc); else __fmpz_poly_karatrunc_left_recursive(output, input2, input1, scratch, scratchb, crossover, trunc); flint_stack_release(); flint_stack_release(); if (trunc >= input1->length+input2->length-1) _fmpz_poly_normalise(output); if (poly1 == output) _fmpz_poly_stack_clear(input1); else if (poly2 == output) _fmpz_poly_stack_clear(input2); } void _fmpz_poly_mul_KS(fmpz_poly_t output, const fmpz_poly_t in1, const fmpz_poly_t in2) { long sign1 = 1L; long sign2 = 1L; unsigned long length1 = in1->length; unsigned long length2 = in2->length; unsigned long final_length = length1 + length2 - 1; while ((length1) && (in1->coeffs[(length1-1)*(in1->limbs+1)] == 0L)) length1--; while ((length2) && (in2->coeffs[(length2-1)*(in2->limbs+1)] == 0L)) length2--; if ((length1 == 0) || (length2 == 0)) { _fmpz_poly_zero(output); return; } fmpz_poly_t input1, input2; if (length2 > length1) { unsigned long temp = length1; length1 = length2; length2 = temp; _fmpz_poly_attach(input1, in2); _fmpz_poly_attach(input2, in1); } else { _fmpz_poly_attach(input1, in1); _fmpz_poly_attach(input2, in2); } long bits1, bits2; int bitpack = 0; bits1 = _fmpz_poly_max_bits(input1); bits2 = (in1 == in2) ? bits1 : _fmpz_poly_max_bits(input2); unsigned long sign = ((bits1 < 0) || (bits2 < 0)); unsigned long length = length2; unsigned long log_length = 0L; while ((1<limbs == 1) && (input2->limbs == 1) && (output->limbs == 1)) bitpack = 1; unsigned long bytes = ((bits-1)>>3)+1; if ((long) input1->coeffs[(length1-1)*(input1->limbs+1)] < 0L) sign1 = -1L; if (in1 != in2) { if ((long) input2->coeffs[(length2-1)*(input2->limbs+1)] < 0L) sign2 = -1L; } else sign2 = sign1; ZmodF_poly_t poly1, poly2, poly3; if (bitpack) { ZmodF_poly_stack_init(poly1, 0, (bits*length1-1)/FLINT_BITS+1, 0); if (in1 != in2) ZmodF_poly_stack_init(poly2, 0, (bits*length2-1)/FLINT_BITS+1, 0); if (sign) bits = -1L*bits; if (in1 != in2) fmpz_poly_bit_pack(poly2, input2, length2, bits, length2, sign2); fmpz_poly_bit_pack(poly1, input1, length1, bits, length1, sign1); bits=ABS(bits); } else { ZmodF_poly_stack_init(poly1, 0, ((bytes*length1-1)>>FLINT_LG_BYTES_PER_LIMB)+1, 0); if (in1 != in2) ZmodF_poly_stack_init(poly2, 0, ((bytes*length2-1)>>FLINT_LG_BYTES_PER_LIMB)+1, 0); fmpz_poly_byte_pack(poly1, input1, length1, bytes, length1, sign1); if (in1 != in2) fmpz_poly_byte_pack(poly2, input2, length2, bytes, length2, sign2); } if (in1 == in2) { poly2->coeffs = poly1->coeffs; poly2->n = poly1->n; } ZmodF_poly_stack_init(poly3, 0, poly1->n + poly2->n, 0); mp_limb_t msl = F_mpn_mul(poly3->coeffs[0], poly1->coeffs[0], poly1->n, poly2->coeffs[0], poly2->n); poly3->coeffs[0][poly1->n+poly2->n-1] = msl; poly3->coeffs[0][poly1->n+poly2->n] = 0L; poly3->length = 1; output->length = length1 + length2 - 1; for (unsigned long i = 0; i < output->length; i++) { output->coeffs[i*(output->limbs+1)] = 0L; } if (bitpack) { if (sign) fmpz_poly_bit_unpack(output, poly3, length1+length2-1, bits); else fmpz_poly_bit_unpack_unsigned(output, poly3, length1+length2-1, bits); } else { if (sign) fmpz_poly_byte_unpack(output, poly3->coeffs[0], length1+length2-1, bytes); else fmpz_poly_byte_unpack_unsigned(output, poly3->coeffs[0], length1+length2-1, bytes); } ZmodF_poly_stack_clear(poly3); if (in1 != in2) ZmodF_poly_stack_clear(poly2); ZmodF_poly_stack_clear(poly1); if ((long) (sign1 ^ sign2) < 0L) _fmpz_poly_neg(output, output); output->length = length1 + length2 - 1; } void _fmpz_poly_mul_KS_trunc(fmpz_poly_t output, const fmpz_poly_t in1, const fmpz_poly_t in2, const unsigned long trunc) { long sign1 = 1L; long sign2 = 1L; unsigned long length1 = FLINT_MIN(in1->length, trunc); unsigned long length2 = FLINT_MIN(in2->length, trunc); while ((length1) && (in1->coeffs[(length1-1)*(in1->limbs+1)] == 0)) length1--; while ((length2) && (in2->coeffs[(length2-1)*(in2->limbs+1)] == 0)) length2--; if ((length1 == 0) || (length2 == 0)) { _fmpz_poly_zero(output); return; } fmpz_poly_t input1, input2; if (length2 > length1) { unsigned long temp = length1; length1 = length2; length2 = temp; _fmpz_poly_attach(input1, in2); _fmpz_poly_attach(input2, in1); } else { _fmpz_poly_attach(input1, in1); _fmpz_poly_attach(input2, in2); } long bits1, bits2; int bitpack = 0; bits1 = _fmpz_poly_max_bits(input1); bits2 = (in1 == in2) ? bits1 : _fmpz_poly_max_bits(input2); unsigned long sign = ((bits1 < 0) || (bits2 < 0)); unsigned long length = length2; unsigned log_length = 0; while ((1<limbs == 1) && (input2->limbs == 1) && (output->limbs == 1)) bitpack = 1; unsigned long bytes = ((bits-1)>>3)+1; if ((long) input1->coeffs[(length1-1)*(input1->limbs+1)] < 0) sign1 = -1L; if (in1 != in2) { if ((long) input2->coeffs[(length2-1)*(input2->limbs+1)] < 0) sign2 = -1L; } else sign2 = sign1; ZmodF_poly_t poly1, poly2, poly3; if (bitpack) { ZmodF_poly_stack_init(poly1, 0, (bits*length1-1)/FLINT_BITS+1, 0); if (in1 != in2) ZmodF_poly_stack_init(poly2, 0, (bits*length2-1)/FLINT_BITS+1, 0); if (sign) bits = -bits; if (in1 != in2) fmpz_poly_bit_pack(poly2, input2, length2, bits, length2, sign2); fmpz_poly_bit_pack(poly1, input1, length1, bits, length1, sign1); bits = ABS(bits); } else { ZmodF_poly_stack_init(poly1, 0, ((bytes*length1-1)>>FLINT_LG_BYTES_PER_LIMB)+1, 0); if (in1 != in2) ZmodF_poly_stack_init(poly2, 0, ((bytes*length2-1)>>FLINT_LG_BYTES_PER_LIMB)+1, 0); fmpz_poly_byte_pack(poly1, input1, length1, bytes, length1, sign1); if (in1 != in2) fmpz_poly_byte_pack(poly2, input2, length2, bytes, length2, sign2); } if (in1 == in2) { poly2->coeffs = poly1->coeffs; poly2->n = poly1->n; } ZmodF_poly_stack_init(poly3, 0, poly1->n + poly2->n, 0); output->length = FLINT_MIN(length1+length2-1, trunc); mp_limb_t msl; if (bitpack) { msl = F_mpn_mul_trunc(poly3->coeffs[0], poly1->coeffs[0], poly1->n, poly2->coeffs[0], poly2->n, (output->length*bits-1)/FLINT_BITS+1); } else { msl = F_mpn_mul_trunc(poly3->coeffs[0], poly1->coeffs[0], poly1->n, poly2->coeffs[0], poly2->n, ((output->length*bytes-1)>>FLINT_LG_BYTES_PER_LIMB) + 1); } poly3->coeffs[0][poly1->n+poly2->n-1] = msl; poly3->coeffs[0][poly1->n+poly2->n] = 0; poly3->length = 1; for (unsigned long i = 0; i < trunc; i++) output->coeffs[i*(output->limbs+1)] = 0; if (bitpack) { if (sign) fmpz_poly_bit_unpack(output, poly3, output->length, bits); else fmpz_poly_bit_unpack_unsigned(output, poly3, output->length, bits); } else { if (sign) fmpz_poly_byte_unpack(output, poly3->coeffs[0], output->length, bytes); else fmpz_poly_byte_unpack_unsigned(output, poly3->coeffs[0], output->length, bytes); } ZmodF_poly_stack_clear(poly3); if (in1 != in2) ZmodF_poly_stack_clear(poly2); ZmodF_poly_stack_clear(poly1); if ((long) (sign1 ^ sign2) < 0) _fmpz_poly_neg(output, output); _fmpz_poly_normalise(output); } void _fmpz_poly_mul_SS(fmpz_poly_t output, const fmpz_poly_t in1, const fmpz_poly_t in2) { unsigned long length1 = in1->length; while ((length1) && (in1->coeffs[(length1-1)*(in1->limbs+1)] == 0)) length1--; unsigned long length2; if (in1 != in2) { length2= in2->length; while ((length2) && (in2->coeffs[(length2-1)*(in2->limbs+1)] == 0)) length2--; } else length2 = length1; if ((length1 == 0) || (length2 == 0)) { _fmpz_poly_zero(output); return; } fmpz_poly_t input1, input2; if (length2 > length1) { unsigned long temp = length1; length1 = length2; length2 = temp; _fmpz_poly_attach(input1, in2); _fmpz_poly_attach(input2, in1); } else { _fmpz_poly_attach(input1, in1); _fmpz_poly_attach(input2, in2); } unsigned long size1 = input1->limbs; unsigned long size2 = input2->limbs; unsigned long log_length = 0; while ((1<> (log_length-1)) + 1) << (log_length-1); else output_bits = (((output_bits - 1) >> log_length) + 1) << log_length; unsigned long n = (output_bits - 1) / FLINT_BITS + 1; ZmodF_poly_t poly1, poly2, res; long bits1, bits2; unsigned long sign = 0; ZmodF_poly_stack_init(poly1, log_length + 1, n, 1); if (in1 != in2) ZmodF_poly_stack_init(poly2, log_length + 1, n, 1); ZmodF_poly_stack_init(res, log_length + 1, n, 1); bits1 = fmpz_poly_to_ZmodF_poly(poly1, input1, length1); if (in1 != in2) bits2 = fmpz_poly_to_ZmodF_poly(poly2, input2, length2); else bits2 = bits1; if ((bits1 < 0) || (bits2 < 0)) { sign = 1; bits1 = ABS(bits1); bits2 = ABS(bits2); } /* Recompute the length of n now that we know how large everything really is */ output_bits = bits1 + bits2 + log_length2 + sign; if (output_bits <= length1) output_bits = (((output_bits - 1) >> (log_length-1)) + 1) << (log_length-1); else output_bits = (((output_bits - 1) >> log_length) + 1) << log_length; n = (output_bits - 1) / FLINT_BITS + 1; ZmodF_poly_decrease_n(poly1, n); if (in1 != in2) ZmodF_poly_decrease_n(poly2, n); ZmodF_poly_decrease_n(res, n); if (in1 != in2) ZmodF_poly_convolution(res, poly1, poly2); else ZmodF_poly_convolution(res, poly1, poly1); ZmodF_poly_normalise(res); output->length = length1 + length2 - 1; ZmodF_poly_to_fmpz_poly(output, res, sign); ZmodF_poly_stack_clear(res); if (in1 != in2) ZmodF_poly_stack_clear(poly2); ZmodF_poly_stack_clear(poly1); } void _fmpz_poly_mul_SS_trunc(fmpz_poly_t output, const fmpz_poly_t in1, const fmpz_poly_t in2, const unsigned long trunc) { unsigned long length1 = FLINT_MIN(in1->length, trunc); while ((length1) && (in1->coeffs[(length1-1)*(in1->limbs+1)] == 0)) length1--; unsigned long length2; if (in1 != in2) { length2 = FLINT_MIN(in2->length, trunc); while ((length2) && (in2->coeffs[(length2-1)*(in2->limbs+1)] == 0)) length2--; } else length2 = length1; if ((length1 == 0) || (length2 == 0)) { _fmpz_poly_zero(output); return; } fmpz_poly_t input1, input2; if (length2 > length1) { unsigned long temp = length1; length1 = length2; length2 = temp; _fmpz_poly_attach(input1, in2); _fmpz_poly_attach(input2, in1); } else { _fmpz_poly_attach(input1, in1); _fmpz_poly_attach(input2, in2); } unsigned long size1 = input1->limbs; unsigned long size2 = input2->limbs; unsigned long log_length = 0; while ((1<> (log_length-1)) + 1) << (log_length-1); else output_bits = (((output_bits - 1) >> log_length) + 1) << log_length; unsigned long n = (output_bits - 1) / FLINT_BITS + 1; ZmodF_poly_t poly1, poly2, res; long bits1, bits2; unsigned long sign = 0; ZmodF_poly_stack_init(poly1, log_length + 1, n, 1); if (in1 != in2) ZmodF_poly_stack_init(poly2, log_length + 1, n, 1); ZmodF_poly_stack_init(res, log_length + 1, n, 1); bits1 = fmpz_poly_to_ZmodF_poly(poly1, input1, length1); if (in1 != in2) bits2 = fmpz_poly_to_ZmodF_poly(poly2, input2, length2); else (bits2 = bits1); if ((bits1 < 0) || (bits2 < 0)) { sign = 1; bits1 = ABS(bits1); bits2 = ABS(bits2); } /* Recompute the length of n now that we know how large everything really is */ output_bits = bits1 + bits2 + log_length2 + sign; if (output_bits <= length1) output_bits = (((output_bits - 1) >> (log_length-1)) + 1) << (log_length-1); else output_bits = (((output_bits - 1) >> log_length) + 1) << log_length; n = (output_bits - 1) / FLINT_BITS + 1; ZmodF_poly_decrease_n(poly1, n); if (in1 != in2) ZmodF_poly_decrease_n(poly2, n); ZmodF_poly_decrease_n(res, n); if (in1 != in2) ZmodF_poly_convolution_range(res, poly1, poly2, 0, trunc); else ZmodF_poly_convolution_range(res, poly1, poly1, 0, trunc); res->length = FLINT_MIN(res->length, trunc); ZmodF_poly_normalise(res); output->length = FLINT_MIN(length1 + length2 - 1, trunc); ZmodF_poly_to_fmpz_poly(output, res, sign); ZmodF_poly_stack_clear(res); if (in1 != in2) ZmodF_poly_stack_clear(poly2); ZmodF_poly_stack_clear(poly1); _fmpz_poly_normalise(output); } void _fmpz_poly_mul(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2) { if ((input1->length == 0) || (input2->length == 0)) { _fmpz_poly_zero(output); return; } if ((input1->length <= 2) && (input2->length <= 2)) { _fmpz_poly_mul_karatsuba(output, input1, input2); return; } if ((input1->limbs <= 256/FLINT_BITS) && (input1->limbs >= 200/FLINT_BITS) && (input1->length == 256)) { _fmpz_poly_mul_SS(output, input1, input2); return; } if (input1->limbs + input2->limbs <= 512/FLINT_BITS) { _fmpz_poly_mul_KS(output, input1, input2); return; } if (input1->length + input2->length <= 32) { _fmpz_poly_mul_karatsuba(output, input1, input2); return; } unsigned long bits1 = _fmpz_poly_max_bits(input1); unsigned long bits2 = (input1 == input2) ? bits1 : _fmpz_poly_max_bits(input2); bits1 = ABS(bits1); bits2 = ABS(bits2); if (3*(bits1 + bits2) >= input1->length + input2->length) { _fmpz_poly_mul_SS(output, input1, input2); return; } _fmpz_poly_mul_KS(output, input1, input2); } /* A truncating polynomial multiplication. The number of terms require, _trunc_ can be any value, but the function is tuned for truncation to length n where both inputs have length approximately n. */ void _fmpz_poly_mul_trunc_n(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2, const unsigned long trunc) { if ((input1->length == 0) || (input2->length == 0)) { _fmpz_poly_zero(output); return; } if ((input1->length <= 3) && (input2->length <= 3)) { _fmpz_poly_mul_karatsuba_trunc(output, input1, input2, trunc); return; } unsigned long bits1 = _fmpz_poly_max_bits(input1); unsigned long bits2 = (input1 == input2) ? bits1 : _fmpz_poly_max_bits(input2); bits1 = ABS(bits1); bits2 = ABS(bits2); if ((bits1 + bits2 >= 64) && (input1->length + input2->length <= 10)) { _fmpz_poly_mul_karatsuba_trunc(output, input1, input2, trunc); return; } if ((bits1 + bits2 >= 370) && (input1->length + input2->length <= 32)) { _fmpz_poly_mul_karatsuba_trunc(output, input1, input2, trunc); return; } if (bits1 + bits2 < 512) { _fmpz_poly_mul_KS_trunc(output, input1, input2, trunc); return; } if (3*(bits1 + bits2) >= input1->length + input2->length) { _fmpz_poly_mul_SS_trunc(output, input1, input2, trunc); return; } _fmpz_poly_mul_KS_trunc(output, input1, input2, trunc); } /* A truncating polynomial multiplication which ignores the first trunc coeffs of the output (which can end up being anything - often zero). The number of zero terms, _trunc_ can be any value, but the function is tuned for truncation of length n-1 terms, where both inputs have length approximately n. */ void _fmpz_poly_mul_trunc_left_n(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2, const unsigned long trunc) { if ((input1->length == 0) || (input2->length == 0)) { _fmpz_poly_zero(output); return; } if ((input1->length <= 3) && (input2->length <= 3)) { _fmpz_poly_mul_karatsuba_trunc_left(output, input1, input2, trunc); return; } unsigned long bits1 = _fmpz_poly_max_bits(input1); unsigned long bits2 = (input1 == input2) ? bits1 : _fmpz_poly_max_bits(input2); bits1 = ABS(bits1); bits2 = ABS(bits2); if ((bits1 + bits2 >= 64) && (input1->length + input2->length <= 10)) { _fmpz_poly_mul_karatsuba_trunc_left(output, input1, input2, trunc); return; } if ((bits1 + bits2 >= 370) && (input1->length + input2->length <= 32)) { _fmpz_poly_mul_karatsuba_trunc_left(output, input1, input2, trunc); return; } if (bits1 + bits2 < 512) { _fmpz_poly_mul_KS(output, input1, input2); return; } if (3*(bits1 + bits2) >= input1->length + input2->length) { _fmpz_poly_mul_SS(output, input1, input2); return; } _fmpz_poly_mul_KS(output, input1, input2); } /*=========================================================================== fmpz_poly_* layer ===========================================================================*/ /**************************************************************************** Memory management ****************************************************************************/ /* Create a polynomial of length zero with zero allocated coefficients */ void fmpz_poly_init(fmpz_poly_t poly) { poly->coeffs = NULL; poly->alloc = 0; poly->length = 0; poly->limbs = 0; } /* Create a polynomial of length zero with "alloc" allocated coefficients each with enough space for "limbs" limbs */ void fmpz_poly_init2(fmpz_poly_t poly, const unsigned long alloc, const unsigned long limbs) { if (((long)alloc > 0) && ((long)limbs > 0)) { poly->coeffs = (fmpz_t) flint_heap_alloc(alloc*(limbs+1)); } else poly->coeffs = NULL; poly->alloc = alloc; poly->length = 0; poly->limbs = limbs; } /* Shrink or expand a polynomial to "alloc" coefficients */ void fmpz_poly_realloc(fmpz_poly_t poly, const unsigned long alloc) { if (poly->limbs > 0) { if ((long)alloc > 0) { if (poly->alloc) poly->coeffs = (mp_limb_t*) flint_heap_realloc(poly->coeffs, alloc*(poly->limbs+1)); else poly->coeffs = (mp_limb_t*) flint_heap_alloc(alloc*(poly->limbs+1)); } else { if (poly->coeffs) flint_heap_free(poly->coeffs); poly->coeffs = NULL; poly->limbs = 0; } poly->alloc = alloc; // truncate actual data if necessary if (poly->length > alloc) { poly->length = alloc; _fmpz_poly_normalise(poly); } } else { poly->alloc = alloc; } } void fmpz_poly_fit_length(fmpz_poly_t poly, const unsigned long alc) { unsigned long alloc = alc; if (alloc <= poly->alloc) return; if (alloc < 2*poly->alloc) alloc = 2*poly->alloc; fmpz_poly_realloc(poly, alloc); } void fmpz_poly_resize_limbs(fmpz_poly_t poly, const unsigned long limbs) { if ((long)limbs > 0) { if (limbs == poly->limbs) return; unsigned long i = 0; fmpz_t coeff_i; fmpz_t coeff_i_old = poly->coeffs; if (limbs < poly->limbs) { coeff_i = poly->coeffs + limbs+1; coeff_i_old += (poly->limbs+1); for (i = 1; i < poly->length; i++) { F_mpn_copy_forward(coeff_i, coeff_i_old, limbs+1); FLINT_ASSERT(ABS(coeff_i[0]) > limbs); coeff_i += (limbs+1); coeff_i_old += (poly->limbs+1); } } else { if (poly->alloc) { fmpz_t temp_coeffs = (mp_limb_t*) flint_heap_alloc(poly->alloc*(limbs+1)); coeff_i = temp_coeffs; for (i = 0; i < poly->length; i++) { F_mpn_copy(coeff_i, coeff_i_old, poly->limbs+1); coeff_i += (limbs+1); coeff_i_old += (poly->limbs+1); } if (poly->coeffs) flint_heap_free(poly->coeffs); poly->coeffs = temp_coeffs; } } for ( ; i < poly->alloc; i++) { coeff_i[0] = 0; coeff_i += (limbs+1); } poly->limbs = limbs; } else { if (poly->coeffs) flint_heap_free(poly->coeffs); poly->length = 0; poly->limbs = 0; } } void fmpz_poly_clear(fmpz_poly_t poly) { if (poly->coeffs) flint_heap_free(poly->coeffs); } /**************************************************************************** Polynomial Checking ****************************************************************************/ /* Used for debugging polynomial code Checks that length <= alloc and that both are positive or zero Checks that limbs >= 0 otherwise Checks that each coefficient has at most _limbs_ limbs */ void fmpz_poly_check(const fmpz_poly_t poly) { if ((long) poly->alloc < 0) { printf("Error: Poly alloc < 0\n"); abort(); } if ((long) poly->length < 0) { printf("Error: Poly length < 0\n"); abort(); } if (poly->length > poly->alloc) { printf("Error: Poly length = %ld > alloc = %ld\n", poly->length, poly->alloc); abort(); } if ((long) poly->limbs < 0) { printf("Error: Poly limbs < 0\n"); abort(); } for (unsigned long i = 0; i < poly->length; i++) { if (FLINT_ABS(poly->coeffs[i*(poly->limbs+1)]) > poly->limbs) { printf("Error: coefficient %ld is too large (%ld limbs vs %ld limbs)\n", i, FLINT_ABS(poly->coeffs[i*(poly->limbs+1)]), poly->limbs); abort(); } } } void fmpz_poly_check_normalisation(const fmpz_poly_t poly) { if (poly->length) { if (!poly->coeffs[(poly->length-1)*(poly->limbs+1)]) { printf("Error: Poly not normalised\n"); abort(); } } if ((long) poly->alloc < 0) { printf("Error: Poly alloc < 0\n"); abort(); } if ((long) poly->length < 0) { printf("Error: Poly length < 0\n"); abort(); } if (poly->length > poly->alloc) { printf("Error: Poly length = %ld > alloc = %ld\n", poly->length, poly->alloc); abort(); } if ((long) poly->limbs < 0) { printf("Error: Poly limbs < 0\n"); abort(); } for (unsigned long i = 0; i < poly->length; i++) { if (FLINT_ABS(poly->coeffs[i*(poly->limbs+1)]) > poly->limbs) { printf("Error: coefficient %ld is too large (%ld limbs vs %ld limbs)\n", i, FLINT_ABS(poly->coeffs[i*(poly->limbs+1)]), poly->limbs); abort(); } } } /**************************************************************************** Coefficient setting and retrieval ****************************************************************************/ void fmpz_poly_get_coeff_mpz(mpz_t x, const fmpz_poly_t poly, const unsigned long n) { if (n >= poly->length) mpz_set_ui(x, 0); else _fmpz_poly_get_coeff_mpz(x, poly, n); } void fmpz_poly_get_coeff_mpz_read_only(mpz_t x, const fmpz_poly_t poly, const unsigned long n) { if (n >= poly->length) { x->_mp_alloc = 1; x->_mp_d = (mp_limb_t *) &poly; // We need to point to something, and at least this exists x->_mp_size = 0; } else _fmpz_poly_get_coeff_mpz_read_only(x, poly, n); } /**************************************************************************** String conversions and I/O ****************************************************************************/ int fmpz_poly_from_string(fmpz_poly_t poly, const char* s) { int ok; mpz_poly_t p; mpz_poly_init(p); ok = mpz_poly_from_string(p, s); if (ok) { mpz_poly_to_fmpz_poly(poly, p); } mpz_poly_clear(p); return ok; } char* fmpz_poly_to_string(const fmpz_poly_t poly) { char* buf; mpz_poly_t m_poly; mpz_poly_init(m_poly); fmpz_poly_to_mpz_poly(m_poly, poly); buf = mpz_poly_to_string(m_poly); mpz_poly_clear(m_poly); return buf; } char* fmpz_poly_to_string_pretty(const fmpz_poly_t poly, const char * x) { char* buf; mpz_poly_t m_poly; mpz_poly_init(m_poly); fmpz_poly_to_mpz_poly(m_poly, poly); buf = mpz_poly_to_string_pretty(m_poly, x); mpz_poly_clear(m_poly); return buf; } void fmpz_poly_fprint(const fmpz_poly_t poly, FILE* f) { char* s = fmpz_poly_to_string(poly); fputs(s, f); free(s); } void fmpz_poly_fprint_pretty(const fmpz_poly_t poly, FILE* f, const char * x) { char* s = fmpz_poly_to_string_pretty(poly, x); fputs(s, f); free(s); } void fmpz_poly_print(const fmpz_poly_t poly) { fmpz_poly_fprint(poly, stdout); } void fmpz_poly_print_pretty(const fmpz_poly_t poly, const char * x) { fmpz_poly_fprint_pretty(poly, stdout, x); } int fmpz_poly_fread(fmpz_poly_t poly, FILE* f) { int ok; mpz_poly_t p; mpz_poly_init(p); ok = mpz_poly_fread(p, f); if (ok) { mpz_poly_to_fmpz_poly(poly, p); } mpz_poly_clear(p); return ok; } /**************************************************************************** Scalar multiplications and divisions ****************************************************************************/ void fmpz_poly_scalar_mul_ui(fmpz_poly_t output, const fmpz_poly_t input, unsigned long x) { if ((input->length == 0) || (x == 0)) { _fmpz_poly_zero(output); return; } unsigned long limbs, bits, max_limbs, max_bits, x_bits, top_bits; max_bits = (input->limbs << FLINT_LG_BITS_PER_LIMB); max_limbs = 0; bits = 0; top_bits = 0; x_bits = FLINT_BIT_COUNT(x); fmpz_t next_coeff = input->coeffs; unsigned long size = input->limbs+1; unsigned long i; for (i = 0; (i < input->length) && (top_bits + x_bits <= max_bits); i++) { limbs = ABS(next_coeff[0]); if ((limbs >= max_limbs) && (limbs)) { max_limbs = limbs; bits = ((limbs - 1) << FLINT_LG_BITS_PER_LIMB) + FLINT_BIT_COUNT(next_coeff[limbs]); if (bits > top_bits) top_bits = bits; } next_coeff += size; } fmpz_poly_fit_length(output, input->length); if (i < input->length) { fmpz_poly_fit_limbs(output, input->limbs + 1); } else { fmpz_poly_fit_limbs(output, ((top_bits + x_bits - 1) >> FLINT_LG_BITS_PER_LIMB) + 1); } _fmpz_poly_scalar_mul_ui(output, input, x); } void fmpz_poly_scalar_mul_si(fmpz_poly_t output, const fmpz_poly_t input, long x) { if ((input->length == 0) || (x == 0)) { _fmpz_poly_zero(output); return; } unsigned long limbs, bits, max_limbs, max_bits, x_bits, top_bits; max_bits = (input->limbs << FLINT_LG_BITS_PER_LIMB); max_limbs = 0; bits = 0; top_bits = 0; x_bits = FLINT_BIT_COUNT(FLINT_ABS(x)); fmpz_t next_coeff = input->coeffs; unsigned long size = input->limbs+1; unsigned long i; for (i = 0; (i < input->length) && (top_bits + x_bits <= max_bits); i++) { limbs = ABS(next_coeff[0]); if ((limbs >= max_limbs) && (limbs)) { max_limbs = limbs; bits = ((limbs - 1) << FLINT_LG_BITS_PER_LIMB) + FLINT_BIT_COUNT(next_coeff[limbs]); if (bits > top_bits) top_bits = bits; } next_coeff += size; } fmpz_poly_fit_length(output, input->length); if (i < input->length) { fmpz_poly_fit_limbs(output, input->limbs + 1); } else { fmpz_poly_fit_limbs(output, ((top_bits + x_bits - 1) >> FLINT_LG_BITS_PER_LIMB) + 1); } _fmpz_poly_scalar_mul_si(output, input, x); } void fmpz_poly_scalar_mul_fmpz(fmpz_poly_t output, const fmpz_poly_t input, const fmpz_t x) { if ((input->length == 0) || (x[0] == 0)) { _fmpz_poly_zero(output); return; } unsigned long limbs, bits, max_limbs, max_bits, x_bits, top_bits; max_bits = ((input->limbs + ABS(x[0]) - 1) << FLINT_LG_BITS_PER_LIMB); max_limbs = 0; bits = 0; top_bits = 0; x_bits = ABS(fmpz_bits(x)); fmpz_t next_coeff = input->coeffs; unsigned long size = input->limbs+1; unsigned long i; for (i = 0; (i < input->length) && (top_bits + x_bits <= max_bits); i++) { limbs = ABS(next_coeff[0]); if ((limbs >= max_limbs) && (limbs)) { max_limbs = limbs; bits = ((limbs - 1) << FLINT_LG_BITS_PER_LIMB) + FLINT_BIT_COUNT(next_coeff[limbs]); if (bits > top_bits) top_bits = bits; } next_coeff += size; } fmpz_poly_fit_length(output, input->length); if (i < input->length) { fmpz_poly_fit_limbs(output, input->limbs + ABS(x[0])); } else { fmpz_poly_fit_limbs(output, ((top_bits + x_bits - 1) >> FLINT_LG_BITS_PER_LIMB) + 1); } _fmpz_poly_scalar_mul_fmpz(output, input, x); } void fmpz_poly_scalar_mul_mpz(fmpz_poly_t output, const fmpz_poly_t input, const mpz_t x) { if ((input->length == 0) || (mpz_sgn(x) == 0)) { _fmpz_poly_zero(output); return; } fmpz_t x_fmpz = fmpz_init(mpz_size(x)); mpz_to_fmpz(x_fmpz, x); fmpz_poly_scalar_mul_fmpz(output, input, x_fmpz); fmpz_clear(x_fmpz); } void fmpz_poly_scalar_div_fmpz(fmpz_poly_t output, const fmpz_poly_t input, const fmpz_t x) { if (input->length == 0) { _fmpz_poly_zero(output); return; } fmpz_poly_fit_length(output, input->length); unsigned long inlimbs = _fmpz_poly_max_limbs(input); unsigned long xlimbs = ABS(x[0]); if (inlimbs >= xlimbs) fmpz_poly_fit_limbs(output, inlimbs - xlimbs + 1); else fmpz_poly_fit_limbs(output, 1); _fmpz_poly_scalar_div_fmpz(output, input, x); } void fmpz_poly_scalar_div_mpz(fmpz_poly_t output, const fmpz_poly_t input, const mpz_t x) { if (input->length == 0) { _fmpz_poly_zero(output); return; } fmpz_t x_fmpz = fmpz_init(mpz_size(x)); mpz_to_fmpz(x_fmpz, x); fmpz_poly_scalar_div_fmpz(output, input, x_fmpz); fmpz_clear(x_fmpz); } /**************************************************************************** Multiplication ****************************************************************************/ void fmpz_poly_mul(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2) { if ((input1->length == 0) || (input2->length == 0)) { fmpz_poly_fit_length(output, 1); fmpz_poly_fit_limbs(output, 1); _fmpz_poly_zero(output); return; } unsigned long limbs = input1->limbs + input2->limbs; unsigned long total_length = input1->length + input2->length - 1; long bits1, bits2; bits1 = _fmpz_poly_max_bits(input1); bits2 = (input1 == input2) ? bits1 : _fmpz_poly_max_bits(input2); unsigned long sign = ((bits1 < 0) || (bits2 < 0)); unsigned long length = (input1->length > input2->length) ? input2->length : input1->length; unsigned log_length = 0; while ((1<length + input2->length - 1); _fmpz_poly_mul(output, input1, input2); } void fmpz_poly_mul_trunc_n(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2, const unsigned long trunc) { long bits1, bits2; bits1 = _fmpz_poly_max_bits(input1); bits2 = (input1 == input2) ? bits1 : _fmpz_poly_max_bits(input2); unsigned long sign = ((bits1 < 0) || (bits2 < 0)); unsigned long length = (input1->length > input2->length) ? input2->length : input1->length; unsigned log_length = 0; while ((1<length + input2->length - 1, trunc)); _fmpz_poly_mul_trunc_n(output, input1, input2, FLINT_MIN(input1->length + input2->length - 1, trunc)); } void fmpz_poly_mul_trunc_left_n(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2, const unsigned long trunc) { unsigned long limbs = input1->limbs + input2->limbs; long bits1, bits2; bits1 = _fmpz_poly_max_bits(input1); bits2 = (input1 == input2) ? bits1 : _fmpz_poly_max_bits(input2); unsigned long sign = ((bits1 < 0) || (bits2 < 0)); unsigned long length = (input1->length > input2->length) ? input2->length : input1->length; unsigned log_length = 0; while ((1<length + input2->length) fmpz_poly_fit_length(output, input1->length + input2->length - 1); _fmpz_poly_mul_trunc_left_n(output, input1, input2, trunc); } /**************************************************************************** Division ****************************************************************************/ void fmpz_poly_divrem_classical(fmpz_poly_t Q, fmpz_poly_t R, const fmpz_poly_t A, const fmpz_poly_t B) { fmpz_poly_t qB; if (B->length == 0) { printf("Error: Divide by zero\n"); abort(); } long coeff = A->length-1; unsigned long size_A = A->limbs + 1; unsigned long size_B = B->limbs + 1; unsigned long size_R; unsigned long limbs_R; unsigned long size_Q; unsigned long limbs_Q; fmpz_t coeffs_A = A->coeffs; fmpz_t coeffs_B = B->coeffs; fmpz_t coeff_i = coeffs_A + coeff*size_A; fmpz_t B_lead = coeffs_B + (B->length-1)*size_B; fmpz_t coeff_Q; fmpz_t coeffs_R; NORM(B_lead); unsigned long size_B_lead = ABS(B_lead[0]); mp_limb_t sign_B_lead = B_lead[0]; mp_limb_t sign_quot; while (1) { if (coeff < (long) B->length - 1) break; NORM(coeff_i); if (ABS(coeff_i[0]) < size_B_lead) { coeff--; coeff_i -= size_A; } else if (ABS(coeff_i[0]) > size_B_lead) break; else if (mpn_cmp(coeff_i+1, B_lead+1, size_B_lead) >= 0) break; else { coeff--; coeff_i -= size_A; } } fmpz_t rem = (fmpz_t) flint_heap_alloc(size_B_lead); fmpz_poly_fit_length(R, A->length); fmpz_poly_fit_limbs(R, A->limbs); R->length = A->length; _fmpz_poly_set(R, A); coeffs_R = R->coeffs; size_R = R->limbs+1; if (coeff >= (long) B->length - 1) { fmpz_poly_fit_length(Q, coeff-B->length+2); fmpz_poly_fit_limbs(Q, 1); Q->length = coeff-B->length+2; size_Q = Q->limbs+1; } else _fmpz_poly_zero(Q); while (coeff >= (long) B->length - 1) { coeff_Q = Q->coeffs+(coeff-B->length+1)*size_Q; while (1) { if (coeff < (long) B->length - 1) break; NORM(coeffs_R+coeff*size_R); if (ABS(coeffs_R[coeff*size_R]) < size_B_lead) { coeff_Q[0] = 0; coeff_Q -= size_Q; coeff--; } else if (ABS(coeffs_R[coeff*size_R]) > size_B_lead) break; else if (mpn_cmp(coeffs_R+coeff*size_R+1, B_lead+1, size_B_lead) >= 0) break; else { coeff_Q[0] = 0; coeff_Q -= size_Q; coeff--; } } if (coeff >= (long) B->length - 1) { limbs_Q = ABS(coeffs_R[coeff*size_R]) - size_B_lead + 1; fmpz_poly_fit_limbs(Q, limbs_Q); size_Q = Q->limbs+1; coeff_Q = Q->coeffs+(coeff - B->length+1)*size_Q; sign_quot = ABS(coeffs_R[coeff*size_R]) - size_B_lead + 1; if (((long) (sign_B_lead ^ coeffs_R[coeff*size_R])) < 0) { mpn_tdiv_qr(coeff_Q+1, rem, 0, coeffs_R+coeff*size_R+1, ABS(coeffs_R[coeff*size_R]), B_lead+1, size_B_lead); coeff_Q[0] = -sign_quot; for (unsigned long i = 0; i < size_B_lead; i++) { if (rem[i]) { fmpz_sub_ui_inplace(coeff_Q,1); break; } } } else { mpn_tdiv_qr(coeff_Q+1, rem, 0, coeffs_R+coeff*size_R+1, ABS(coeffs_R[coeff*size_R]), B_lead+1, size_B_lead); coeff_Q[0] = sign_quot; } NORM(coeff_Q); fmpz_poly_init2(qB, B->length, B->limbs+ABS(coeff_Q[0])); _fmpz_poly_scalar_mul_fmpz(qB, B, coeff_Q); fmpz_poly_fit_limbs(R, qB->limbs+1); coeffs_R = R->coeffs; size_R = R->limbs+1; fmpz_poly_t R_sub; R_sub->coeffs = coeffs_R+(coeff - B->length + 1)*size_R; R_sub->limbs = R->limbs; R_sub->length = B->length; _fmpz_poly_sub(R_sub, R_sub, qB); coeff--; fmpz_poly_clear(qB); } } _fmpz_poly_normalise(R); flint_heap_free(rem); } /* Divides A by B and returns the quotient Q, but only the low half of the remainder R */ void fmpz_poly_divrem_classical_low(fmpz_poly_t Q, fmpz_poly_t R, const fmpz_poly_t A, const fmpz_poly_t B) { fmpz_poly_t qB; if (B->length == 0) { printf("Error: Divide by zero\n"); abort(); } long coeff = A->length-1; unsigned long size_A = A->limbs + 1; unsigned long size_B = B->limbs + 1; unsigned long size_R; unsigned long limbs_R; unsigned long size_Q; unsigned long limbs_Q; fmpz_t coeffs_A = A->coeffs; fmpz_t coeffs_B = B->coeffs; fmpz_t coeff_i = coeffs_A + coeff*size_A; fmpz_t B_lead = coeffs_B + (B->length-1)*size_B; fmpz_t coeff_Q; fmpz_t coeffs_R; NORM(B_lead); unsigned long size_B_lead = ABS(B_lead[0]); mp_limb_t sign_B_lead = B_lead[0]; mp_limb_t sign_quot; while (1) { if (coeff < (long) B->length - 1) break; NORM(coeff_i); if (ABS(coeff_i[0]) < size_B_lead) { coeff--; coeff_i -= size_A; } else if (ABS(coeff_i[0]) > size_B_lead) break; else if (mpn_cmp(coeff_i+1, B_lead+1, size_B_lead) >= 0) break; else { coeff--; coeff_i -= size_A; } } fmpz_t rem = (fmpz_t) flint_heap_alloc(size_B_lead); fmpz_poly_fit_length(R, A->length); fmpz_poly_fit_limbs(R, A->limbs); R->length = A->length; _fmpz_poly_set(R, A); coeffs_R = R->coeffs; size_R = R->limbs+1; if (coeff >= (long) B->length - 1) { fmpz_poly_fit_length(Q, coeff-B->length+2); fmpz_poly_fit_limbs(Q, 1); Q->length = coeff-B->length+2; size_Q = Q->limbs+1; } else _fmpz_poly_zero(Q); while (coeff >= (long) B->length - 1) { coeff_Q = Q->coeffs+(coeff-B->length+1)*size_Q; while (1) { if (coeff < (long) B->length - 1) break; NORM(coeffs_R+coeff*size_R); if (ABS(coeffs_R[coeff*size_R]) < size_B_lead) { coeff_Q[0] = 0; coeff_Q -= size_Q; coeff--; } else if (ABS(coeffs_R[coeff*size_R]) > size_B_lead) break; else if (mpn_cmp(coeffs_R+coeff*size_R+1, B_lead+1, size_B_lead) >= 0) break; else { coeff_Q[0] = 0; coeff_Q -= size_Q; coeff--; } } if (coeff >= (long) B->length - 1) { limbs_Q = ABS(coeffs_R[coeff*size_R]) - size_B_lead + 1; fmpz_poly_fit_limbs(Q, limbs_Q); size_Q = Q->limbs+1; coeff_Q = Q->coeffs+(coeff - B->length+1)*size_Q; sign_quot = ABS(coeffs_R[coeff*size_R]) - size_B_lead + 1; if (((long) (sign_B_lead ^ coeffs_R[coeff*size_R])) < 0) { mpn_tdiv_qr(coeff_Q+1, rem, 0, coeffs_R+coeff*size_R+1, ABS(coeffs_R[coeff*size_R]), B_lead+1, size_B_lead); coeff_Q[0] = -sign_quot; for (unsigned long i = 0; i < size_B_lead; i++) { if (rem[i]) { fmpz_sub_ui_inplace(coeff_Q,1); break; } } } else { mpn_tdiv_qr(coeff_Q+1, rem, 0, coeffs_R+coeff*size_R+1, ABS(coeffs_R[coeff*size_R]), B_lead+1, size_B_lead); coeff_Q[0] = sign_quot; } NORM(coeff_Q); fmpz_poly_t temp; fmpz_poly_init2(qB, B->length-1, B->limbs+ABS(coeff_Q[0])); temp->coeffs = B->coeffs; temp->length = B->length - 1; temp->limbs = B->limbs; _fmpz_poly_scalar_mul_fmpz(qB, temp, coeff_Q); fmpz_poly_fit_limbs(R, qB->limbs+1); coeffs_R = R->coeffs; size_R = R->limbs+1; fmpz_poly_t R_sub; R_sub->coeffs = coeffs_R+(coeff - B->length + 1)*size_R; R_sub->limbs = R->limbs; R_sub->length = B->length - 1; _fmpz_poly_sub(R_sub, R_sub, qB); coeffs_R[coeff*size_R] = 0L; coeff--; fmpz_poly_clear(qB); } } R->length = B->length - 1; _fmpz_poly_normalise(R); flint_heap_free(rem); } /* Divide the polynomial A by the polynomial B but do not compute the remainder */ void fmpz_poly_div_classical(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B) { fmpz_poly_t qB, R; fmpz_poly_init(R); if (B->length == 0) { printf("Error: Divide by zero\n"); abort(); } long coeff = A->length-1; unsigned long size_A = A->limbs + 1; unsigned long size_B = B->limbs + 1; unsigned long size_R; unsigned long limbs_R; unsigned long size_Q; unsigned long limbs_Q; fmpz_t coeffs_A = A->coeffs; fmpz_t coeffs_B = B->coeffs; fmpz_t coeff_i = coeffs_A + coeff*size_A; fmpz_t B_lead = coeffs_B + (B->length-1)*size_B; fmpz_t coeff_Q; fmpz_t coeffs_R; NORM(B_lead); unsigned long size_B_lead = ABS(B_lead[0]); mp_limb_t sign_B_lead = B_lead[0]; mp_limb_t sign_quot; // Find the first coefficient greater than B_lead while (1) { if (coeff < (long) B->length - 1) break; NORM(coeff_i); if (ABS(coeff_i[0]) < size_B_lead) { coeff--; coeff_i -= size_A; } else if (ABS(coeff_i[0]) > size_B_lead) break; else if (mpn_cmp(coeff_i+1, B_lead+1, size_B_lead) >= 0) break; else { coeff--; coeff_i -= size_A; } } fmpz_t rem = (fmpz_t) flint_heap_alloc(size_B_lead); // Set R to A fmpz_poly_fit_length(R, A->length); fmpz_poly_fit_limbs(R, A->limbs); R->length = A->length; _fmpz_poly_set(R, A); coeffs_R = R->coeffs; size_R = R->limbs+1; // Set the quotient to zero if R is shorter than B if (coeff >= (long) B->length - 1) { fmpz_poly_fit_length(Q, coeff-B->length+2); fmpz_poly_fit_limbs(Q, 1); Q->length = coeff-B->length+2; size_Q = Q->limbs+1; } else _fmpz_poly_zero(Q); while (coeff >= (long) B->length - 1) { coeff_Q = Q->coeffs+(coeff-B->length+1)*size_Q; // Set quotient coefficients to 0 if the R coefficients are already smaller than B while (1) { if (coeff < (long) B->length - 1) break; NORM(coeffs_R+coeff*size_R); if (ABS(coeffs_R[coeff*size_R]) < size_B_lead) { coeff_Q[0] = 0; coeff_Q -= size_Q; coeff--; } else if (ABS(coeffs_R[coeff*size_R]) > size_B_lead) break; else if (mpn_cmp(coeffs_R+coeff*size_R+1, B_lead+1, size_B_lead) >= 0) break; else { coeff_Q[0] = 0; coeff_Q -= size_Q; coeff--; } } if (coeff >= (long) B->length - 1) { // else compute the quotient of the coefficient by B_lead limbs_Q = ABS(coeffs_R[coeff*size_R]) - size_B_lead + 1; fmpz_poly_fit_limbs(Q, limbs_Q); size_Q = Q->limbs+1; coeff_Q = Q->coeffs+(coeff - B->length+1)*size_Q; sign_quot = ABS(coeffs_R[coeff*size_R]) - size_B_lead + 1; if (((long) (sign_B_lead ^ coeffs_R[coeff*size_R])) < 0) { mpn_tdiv_qr(coeff_Q+1, rem, 0, coeffs_R+coeff*size_R+1, ABS(coeffs_R[coeff*size_R]), B_lead+1, size_B_lead); coeff_Q[0] = -sign_quot; for (unsigned long i = 0; i < size_B_lead; i++) { if (rem[i]) { fmpz_sub_ui_inplace(coeff_Q,1); break; } } } else { mpn_tdiv_qr(coeff_Q+1, rem, 0, coeffs_R+coeff*size_R+1, ABS(coeffs_R[coeff*size_R]), B_lead+1, size_B_lead); coeff_Q[0] = sign_quot; } NORM(coeff_Q); if (coeff >= (long) B->length) { // Now multiply B by this new quotient coefficient and subtract from R fmpz_poly_t R_sub; unsigned long length = FLINT_MIN(coeff - B->length + 2, B->length); fmpz_poly_init2(qB, length, B->limbs+ABS(coeff_Q[0])+1); R_sub->coeffs = B->coeffs + (B->length - length)*(B->limbs + 1); R_sub->limbs = B->limbs; R_sub->length = length; _fmpz_poly_scalar_mul_fmpz(qB, R_sub, coeff_Q); fmpz_poly_fit_limbs(R, qB->limbs+1); coeffs_R = R->coeffs; size_R = R->limbs+1; R_sub->coeffs = coeffs_R+(coeff - length + 1)*size_R; R_sub->limbs = R->limbs; _fmpz_poly_sub(R_sub, R_sub, qB); fmpz_poly_clear(qB); } coeff--; } } fmpz_poly_clear(R); flint_heap_free(rem); } /* Integer polynomial division using a divide and conquer algorithm. Note BQ is not the remainder but it is B*Q, so the remainder R = A-BQ */ void fmpz_poly_div_divconquer_recursive(fmpz_poly_t Q, fmpz_poly_t BQ, const fmpz_poly_t A, const fmpz_poly_t B) { if (A->length < B->length) { _fmpz_poly_zero(Q); _fmpz_poly_zero(BQ); return; } // A->length is now >= B->length unsigned long crossover = 16; unsigned long crossover2 = 128; if (B->limbs > 16) crossover = 8; if ((B->length <= 12) && (B->limbs > 8)) crossover = 8; if ((B->length <= crossover) || ((A->length > 2*B->length - 1) && (A->length < crossover2))) { /* Use the classical algorithm to compute the quotient and remainder, then use A-R to compute BQ */ fmpz_poly_t Rb; fmpz_poly_init(Rb); fmpz_poly_divrem_classical(Q, Rb, A, B); fmpz_poly_fit_length(BQ, A->length); fmpz_poly_fit_limbs(BQ, FLINT_MAX(A->limbs, Rb->limbs)+1); _fmpz_poly_sub(BQ, A, Rb); fmpz_poly_clear(Rb); return; } fmpz_poly_t d1, d2, d3, d4, p1, q1, q2, dq1, dq2, d1q1, d2q1, d2q2, d1q2, t, temp; unsigned long n1 = (B->length+1)/2; unsigned long n2 = B->length - n1; /* We let B = d1*x^n2 + d2 */ _fmpz_poly_attach_shift(d1, B, n2); _fmpz_poly_attach_truncate(d2, B, n2); _fmpz_poly_attach_shift(d3, B, n1); _fmpz_poly_attach_truncate(d4, B, n1); if (A->length <= n2 + B->length - 1) { /* If A->length <= B->length + n2 - 1 then only a single quotient is needed We do a division of at most 2*n2 - 1 terms by n2 terms yielding a quotient of at most n2 terms */ // Set p1 to be A without the last // n1 coefficients // 2*n2-1 >= p1->length > 0 fmpz_poly_init(p1); fmpz_poly_fit_length(p1, A->length-n1); fmpz_poly_fit_limbs(p1, A->limbs); _fmpz_poly_right_shift(p1, A, n1); // Since A was normalised, then p1 will be // d3 is the leading terms of B and so must be normalised // d3 is length n2, so we get at most n2 terms in the quotient fmpz_poly_init(d1q1); fmpz_poly_div_divconquer_recursive(Q, d1q1, p1, d3); fmpz_poly_clear(p1); /* Compute d2q1 = Q*d4 It is of length at most n1+n2-1 terms */ fmpz_poly_init(d2q1); fmpz_poly_mul(d2q1, Q, d4); /* Compute BQ = d1q1*x^n1 + d2q1 It has length at most n1+2*n2-1 */ fmpz_poly_fit_length(BQ, FLINT_MAX(d1q1->length+n1, d2q1->length)); fmpz_poly_fit_limbs(BQ, FLINT_MAX(d1q1->limbs, d2q1->limbs)+1); _fmpz_poly_left_shift(BQ, d1q1, n1); fmpz_poly_clear(d1q1); _fmpz_poly_add(BQ, BQ, d2q1); fmpz_poly_clear(d2q1); return; } if (A->length > 2*B->length - 1) { // We shift A right until it is length 2*B->length -1 // We call this polynomial p1 unsigned long shift = A->length - 2*B->length + 1; _fmpz_poly_attach_shift(p1, A, shift); /* Set q1 to p1 div B This is a 2*B->length-1 by B->length division so q1 ends up being at most length B->length d1q1 = d1*q1 is length at most 2*B->length-1 */ fmpz_poly_init(d1q1); fmpz_poly_init(q1); fmpz_poly_div_divconquer_recursive(q1, d1q1, p1, B); /* Compute dq1 = d1*q1*x^shift dq1 is then of length at most A->length dq1 is normalised since d1q1 was */ fmpz_poly_init(dq1); fmpz_poly_fit_length(dq1, d1q1->length + shift); fmpz_poly_fit_limbs(dq1, d1q1->limbs); _fmpz_poly_left_shift(dq1, d1q1, shift); fmpz_poly_clear(d1q1); /* Compute t = A - dq1 The first B->length coefficients cancel if the division is exact, leaving A->length - B->length significant terms otherwise we truncate at this length */ fmpz_poly_init(t); fmpz_poly_sub(t, A, dq1); _fmpz_poly_truncate(t, A->length - B->length); /* Compute q2 = t div B It is a smaller division than the original since t->length <= A->length-B->length */ fmpz_poly_init(q2); fmpz_poly_init(dq2); fmpz_poly_div_divconquer_recursive(q2, dq2, t, B); fmpz_poly_clear(t); /* Write out Q = q1*x^shift + q2 Q has length at most B->length+shift Note q2 has length at most shift since at most it is an A->length-B->length by B->length division */ fmpz_poly_fit_length(Q, FLINT_MAX(q1->length+shift, q2->length)); fmpz_poly_fit_limbs(Q, FLINT_MAX(q1->limbs, q2->limbs)); _fmpz_poly_left_shift(Q, q1, shift); fmpz_poly_clear(q1); _fmpz_poly_add(Q, Q, q2); fmpz_poly_clear(q2); /* Write out BQ = dq1 + dq2 */ fmpz_poly_fit_length(BQ, FLINT_MAX(dq1->length, dq2->length)); fmpz_poly_fit_limbs(BQ, FLINT_MAX(dq1->limbs, dq2->limbs)+1); _fmpz_poly_add(BQ, dq1, dq2); fmpz_poly_clear(dq1); fmpz_poly_clear(dq2); return; } // n2 + B->length - 1 < A->length <= n1 + n2 + B->length - 1 /* We let A = a1*x^(n1+2*n2-1) + a2*x^(n1+n2-1) + a3 where a1 is length at most n1 (and at least 1), a2 is length n2 and a3 is length n1+n2-1 We set p1 = a1*x^(n1-1)+ other terms, so it has length at most 2*n1-1 */ _fmpz_poly_stack_init(p1, A->length-2*n2, A->limbs); _fmpz_poly_right_shift(p1, A, 2*n2); /* Set q1 to p1 div d1 This is at most a 2*n1-1 by n1 division so q1 ends up being at most length n1 d1q1 = d1*q1 is length at most 2*n1-1 */ fmpz_poly_init(d1q1); fmpz_poly_init(q1); fmpz_poly_div_divconquer_recursive(q1, d1q1, p1, d1); _fmpz_poly_stack_clear(p1); /* Compute d2q1 = d2*q1 which ends up being at most length n1+n2-1 */ fmpz_poly_init(d2q1); fmpz_poly_mul(d2q1, d2, q1); /* Compute dq1 = d1*q1*x^n2 + d2*q1 dq1 is then of length at most 2*n1+n2-1 */ _fmpz_poly_stack_init(dq1, FLINT_MAX(d1q1->length + n2, d2q1->length), FLINT_MAX(d1q1->limbs, d2q1->limbs)+1); _fmpz_poly_left_shift(dq1, d1q1, n2); fmpz_poly_clear(d1q1); _fmpz_poly_add(dq1, dq1, d2q1); fmpz_poly_clear(d2q1); /* Compute t = p1*x^(n1+n2-1) + p2*x^(n1-1) - dq1 which has length at most 2*n1+n2-1, but we are not interested in up to the first n1 coefficients, so it has effective length at most n1+n2-1 */ _fmpz_poly_stack_init(t, FLINT_MAX(A->length-n2, dq1->length), FLINT_MAX(A->limbs, dq1->limbs)+1); _fmpz_poly_right_shift(t, A, n2); _fmpz_poly_sub(t, t, dq1); _fmpz_poly_truncate(t, B->length - 1); /* Compute q2 = t div d1 It is at most an n1+n2-1 by n1 division, so the length of q2 will be at most n2 Also compute d1q2 of length at most n1+n2-1 */ fmpz_poly_init(d1q2); fmpz_poly_init(q2); fmpz_poly_div_divconquer_recursive(q2, d1q2, t, d1); _fmpz_poly_stack_clear(t); /* Compute d2q2 = d2*q2 which is of length at most n1+n2-1 */ fmpz_poly_init(d2q2); fmpz_poly_mul(d2q2, d2, q2); /* Compute dq2 = d1*q2*x^n2 + d2q2 which is of length at most n1+2*n2-1 */ _fmpz_poly_stack_init(dq2, FLINT_MAX(d1q2->length+n2, d2q2->length), FLINT_MAX(d1q2->limbs, d2q2->limbs)+1); _fmpz_poly_left_shift(dq2, d1q2, n2); fmpz_poly_clear(d1q2); _fmpz_poly_add(dq2, dq2, d2q2); fmpz_poly_clear(d2q2); /* Write out Q = q1*x^n2 + q2 Q has length at most n1+n2 */ fmpz_poly_fit_length(Q, FLINT_MAX(q1->length+n2, q2->length)); fmpz_poly_fit_limbs(Q, FLINT_MAX(q1->limbs, q2->limbs)); _fmpz_poly_left_shift(Q, q1, n2); fmpz_poly_clear(q1); _fmpz_poly_add(Q, Q, q2); fmpz_poly_clear(q2); /* Write out BQ = dq1*x^n2 + dq2 BQ has length at most 2*(n1+n2)-1 */ fmpz_poly_fit_length(BQ, FLINT_MAX(n2+dq1->length, dq2->length)); fmpz_poly_fit_limbs(BQ, FLINT_MAX(dq1->limbs, dq2->limbs)+1); _fmpz_poly_left_shift(BQ, dq1, n2); _fmpz_poly_add(BQ, BQ, dq2); _fmpz_poly_stack_clear(dq2); _fmpz_poly_stack_clear(dq1); } /* Divide and conquer division of A by B but only computing the low half of Q*B */ void fmpz_poly_div_divconquer_recursive_low(fmpz_poly_t Q, fmpz_poly_t BQ, const fmpz_poly_t A, const fmpz_poly_t B) { if (A->length < B->length) { _fmpz_poly_zero(Q); _fmpz_poly_zero(BQ); return; } // A->length is now >= B->length unsigned long crossover = 16; unsigned long crossover2 = 128; if (B->limbs > 16) crossover = 8; if ((B->length <= 12) && (B->limbs > 8)) crossover = 8; if ((B->length <= crossover) || ((A->length > 2*B->length - 1) && (A->length < crossover2))) { /* Use the classical algorithm to compute the quotient and low half of the remainder, then truncate A-R to compute BQ */ fmpz_poly_t Rb; fmpz_poly_init(Rb); fmpz_poly_divrem_classical_low(Q, Rb, A, B); fmpz_poly_fit_length(BQ, A->length); fmpz_poly_fit_limbs(BQ, FLINT_MAX(A->limbs, Rb->limbs)+1); _fmpz_poly_sub(BQ, A, Rb); fmpz_poly_clear(Rb); _fmpz_poly_truncate(BQ, B->length - 1); return; } fmpz_poly_t d1, d2, d3, d4, p1, q1, q2, dq1, dq2, d1q1, d2q1, d2q2, d1q2, t, temp; unsigned long n1 = (B->length+1)/2; unsigned long n2 = B->length - n1; /* We let B = d1*x^n2 + d2 */ _fmpz_poly_attach_shift(d1, B, n2); _fmpz_poly_attach_truncate(d2, B, n2); _fmpz_poly_attach_shift(d3, B, n1); _fmpz_poly_attach_truncate(d4, B, n1); if (A->length <= n2 + B->length - 1) { /* If A->length <= B->length + n2 - 1 then only a single quotient is needed We do a division of at most 2*n2 - 1 terms by n2 terms yielding a quotient of at most n2 terms */ // Set p1 to be A without the last // n1 coefficients // 2*n2-1 >= p1->length > 0 fmpz_poly_init(p1); fmpz_poly_fit_length(p1, A->length-n1); fmpz_poly_fit_limbs(p1, A->limbs); _fmpz_poly_right_shift(p1, A, n1); // Since A was normalised, then p1 will be // d3 is the leading terms of B and so must be normalised // d3 is length n2, so we get at most n2 terms in the quotient // We compute only the low n2-1 terms of the product d1q1 fmpz_poly_init(d1q1); fmpz_poly_div_divconquer_recursive_low(Q, d1q1, p1, d3); fmpz_poly_clear(p1); /* Compute d2q1 = Q*d4 It is of length at most n1+n2-1 terms */ fmpz_poly_init(d2q1); fmpz_poly_mul(d2q1, Q, d4); /* Compute BQ = d1q1*x^n1 + d2q1 It has length at most n1+n2-1 */ fmpz_poly_fit_length(BQ, FLINT_MAX(d1q1->length+n1, d2q1->length)); fmpz_poly_fit_limbs(BQ, FLINT_MAX(d1q1->limbs, d2q1->limbs)+1); _fmpz_poly_left_shift(BQ, d1q1, n1); fmpz_poly_clear(d1q1); _fmpz_poly_add(BQ, BQ, d2q1); fmpz_poly_clear(d2q1); return; } if (A->length > 2*B->length - 1) { // We shift A right until it is length 2*B->length - 1 // We call this polynomial p1 unsigned long shift = A->length - 2*B->length + 1; _fmpz_poly_attach_shift(p1, A, shift); /* Set q1 to p1 div B This is a 2*B->length-1 by B->length division so q1 ends up being at most length B->length d1q1 = d1*q1 is truncated to length at most B->length-1 */ fmpz_poly_init(d1q1); fmpz_poly_init(q1); fmpz_poly_div_divconquer_recursive_low(q1, d1q1, p1, B); /* Compute dq1 = d1*q1*x^shift dq1 is then of length at most A->length - B->length dq1 is normalised since d1q1 was */ fmpz_poly_init(dq1); fmpz_poly_fit_length(dq1, d1q1->length + shift); fmpz_poly_fit_limbs(dq1, d1q1->limbs); _fmpz_poly_left_shift(dq1, d1q1, shift); fmpz_poly_clear(d1q1); /* Compute t = A - dq1 We truncate, leaving at most A->length - B->length significant terms */ fmpz_poly_init(t); fmpz_poly_sub(t, A, dq1); _fmpz_poly_truncate(t, A->length - B->length); /* Compute q2 = t div B It is a smaller division than the original since t->length <= A->length-B->length dq2 has length at most B->length - 1 */ fmpz_poly_init(q2); fmpz_poly_init(dq2); fmpz_poly_div_divconquer_recursive_low(q2, dq2, t, B); fmpz_poly_clear(t); /* Write out Q = q1*x^shift + q2 Q has length at most B->length+shift Note q2 has length at most shift since at most it is an A->length-B->length by B->length division */ fmpz_poly_fit_length(Q, FLINT_MAX(q1->length+shift, q2->length)); fmpz_poly_fit_limbs(Q, FLINT_MAX(q1->limbs, q2->limbs)); _fmpz_poly_left_shift(Q, q1, shift); fmpz_poly_clear(q1); _fmpz_poly_add(Q, Q, q2); fmpz_poly_clear(q2); /* Write out BQ = dq1 + dq2 */ fmpz_poly_fit_length(BQ, FLINT_MAX(dq1->length, dq2->length)); fmpz_poly_fit_limbs(BQ, FLINT_MAX(dq1->limbs, dq2->limbs)+1); _fmpz_poly_add(BQ, dq1, dq2); _fmpz_poly_truncate(BQ, B->length - 1); fmpz_poly_clear(dq1); fmpz_poly_clear(dq2); return; } // n2 + B->length - 1 < A->length <= n1 + n2 + B->length - 1 /* We let A = a1*x^(n1+2*n2-1) + a2*x^(n1+n2-1) + a3 where a1 is length at most n1 (and at least 1), a2 is length n2 and a3 is length n1+n2-1 We set p1 = a1*x^(n1-1)+ other terms, so it has length at most 2*n1-1 */ _fmpz_poly_stack_init(p1, A->length-2*n2, A->limbs); _fmpz_poly_right_shift(p1, A, 2*n2); /* Set q1 to p1 div d1 This is at most a 2*n1-1 by n1 division so q1 ends up being at most length n1 d1q1 = d1*q1 is truncated to length at most n1-1 */ fmpz_poly_init(d1q1); fmpz_poly_init(q1); fmpz_poly_div_divconquer_recursive_low(q1, d1q1, p1, d1); _fmpz_poly_stack_clear(p1); /* Compute d2q1 = d2*q1 which ends up being at most length n1+n2-1 */ fmpz_poly_init(d2q1); fmpz_poly_mul(d2q1, d2, q1); /* Compute dq1 = d1*q1*x^n2 + d2*q1 dq1 is then of length at most n1+n2-1 */ _fmpz_poly_stack_init(dq1, FLINT_MAX(d1q1->length + n2, d2q1->length), FLINT_MAX(d1q1->limbs, d2q1->limbs)+1); _fmpz_poly_left_shift(dq1, d1q1, n2); fmpz_poly_clear(d1q1); _fmpz_poly_add(dq1, dq1, d2q1); fmpz_poly_clear(d2q1); /* Compute t = a1*x^(n1+n2-1) + a2*x^(n1-1) - dq1 which has length at most 2*n1+n2-1, but we are not interested in up to the first n1 coefficients, so it has effective length at most n1+n2-1 */ _fmpz_poly_stack_init(t, FLINT_MAX(A->length-n2, dq1->length), FLINT_MAX(A->limbs, dq1->limbs)+1); _fmpz_poly_right_shift(t, A, n2); _fmpz_poly_sub(t, t, dq1); _fmpz_poly_truncate(t, B->length - 1); /* Compute q2 = t div d1 It is at most an n1+n2-1 by n1 division, so the length of q2 will be at most n2 Also compute d1q2 truncated to length at most n1-1 */ fmpz_poly_init(d1q2); fmpz_poly_init(q2); fmpz_poly_div_divconquer_recursive_low(q2, d1q2, t, d1); _fmpz_poly_stack_clear(t); /* Compute d2q2 = d2*q2 which is of length at most n1+n2-1 */ fmpz_poly_init(d2q2); fmpz_poly_mul(d2q2, d2, q2); /* Compute dq2 = d1*q2*x^n2 + d2q2 which is of length at most n1+n2-1 */ _fmpz_poly_stack_init(dq2, FLINT_MAX(d1q2->length+n2, d2q2->length), FLINT_MAX(d1q2->limbs, d2q2->limbs)+1); _fmpz_poly_left_shift(dq2, d1q2, n2); fmpz_poly_clear(d1q2); _fmpz_poly_add(dq2, dq2, d2q2); fmpz_poly_clear(d2q2); /* Write out Q = q1*x^n2 + q2 Q has length at most n1+n2 */ fmpz_poly_fit_length(Q, FLINT_MAX(q1->length+n2, q2->length)); fmpz_poly_fit_limbs(Q, FLINT_MAX(q1->limbs, q2->limbs)); _fmpz_poly_left_shift(Q, q1, n2); fmpz_poly_clear(q1); _fmpz_poly_add(Q, Q, q2); fmpz_poly_clear(q2); /* Write out BQ = dq1*x^n2 + dq2 BQ has length at most n1+2*n2-1 We truncate to at most length B->length - 1 */ fmpz_poly_fit_length(BQ, FLINT_MAX(n2+dq1->length, dq2->length)); fmpz_poly_fit_limbs(BQ, FLINT_MAX(dq1->limbs, dq2->limbs)+1); _fmpz_poly_left_shift(BQ, dq1, n2); _fmpz_poly_add(BQ, BQ, dq2); _fmpz_poly_truncate(BQ, B->length - 1); _fmpz_poly_stack_clear(dq2); _fmpz_poly_stack_clear(dq1); } void fmpz_poly_div_divconquer(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B) { if (A->length < B->length) { _fmpz_poly_zero(Q); return; } // A->length is now >= B->length unsigned long crossover = 16; unsigned long crossover2 = 256; if (B->limbs > 16) crossover = 8; if ((B->length <= 12) && (B->limbs > 8)) crossover = 8; if ((B->length <= crossover) || ((A->length > 2*B->length - 1) && (A->length < crossover2))) { fmpz_poly_div_classical(Q, A, B); return; } // B->length is now >= crossover (8 or 16) fmpz_poly_t d1, d2, d3, p1, q1, q2, dq1, dq2, d1q1, d2q1, d2q2, d1q2, t, temp; unsigned long n1 = (B->length+1)/2; unsigned long n2 = B->length - n1; // n1 and n2 are at least 4 /* We let B = d1*x^n2 + d2 d1 is of length n1 and d2 of length n2 */ _fmpz_poly_attach_shift(d1, B, n2); _fmpz_poly_attach_truncate(d2, B, n2); _fmpz_poly_attach_shift(d3, B, n1); if (A->length <= n2 + B->length - 1) { /* If A->length <= B->length + n2 - 1 then only a single quotient is needed We do a division of at most 2*n2 - 1 terms by n2 terms yielding a quotient of at most n2 terms */ // Set p1 to be A without the last // n1 coefficients // 2*n2-1 >= p1->length > 0 fmpz_poly_init(p1); fmpz_poly_fit_length(p1, A->length-n1); fmpz_poly_fit_limbs(p1, A->limbs); _fmpz_poly_right_shift(p1, A, n1); // Since A was normalised, then p1 will be // d3 is the leading terms of B and so must be normalised // d3 is length n2, so we get at most n2 terms in the quotient fmpz_poly_div_divconquer(Q, p1, d3); fmpz_poly_clear(p1); return; } if (A->length > 2*B->length - 1) { // We shift A right until it is length 2*B->length -1 // We call this polynomial p1 unsigned long shift = A->length - 2*B->length + 1; _fmpz_poly_attach_shift(p1, A, shift); /* Set q1 to p1 div B This is a 2*B->length-1 by B->length division so q1 ends up being at most length B->length d1q1 = low(d1*q1) is length at most 2*B->length-1 We discard the lower B->length-1 terms */ fmpz_poly_init(d1q1); fmpz_poly_init(q1); fmpz_poly_div_divconquer_recursive_low(q1, d1q1, p1, B); /* Compute dq1 = d1*q1*x^shift dq1 is then of length at most A->length dq1 is normalised since d1q1 was */ fmpz_poly_init(dq1); fmpz_poly_fit_length(dq1, d1q1->length + shift); fmpz_poly_fit_limbs(dq1, d1q1->limbs); _fmpz_poly_left_shift(dq1, d1q1, shift); fmpz_poly_clear(d1q1); /* Compute t = A - dq1 The first B->length coefficients cancel if the division is exact, leaving A->length - B->length significant terms otherwise we truncate at this length */ fmpz_poly_init(t); fmpz_poly_sub(t, A, dq1); fmpz_poly_clear(dq1); _fmpz_poly_truncate(t, A->length - B->length); /* Compute q2 = t div B It is a smaller division than the original since t->length <= A->length-B->length */ fmpz_poly_init(q2); fmpz_poly_div_divconquer(q2, t, B); fmpz_poly_clear(t); /* Write out Q = q1*x^shift + q2 Q has length at most B->length+shift Note q2 has length at most shift since at most it is an A->length-B->length by B->length division */ fmpz_poly_fit_length(Q, FLINT_MAX(q1->length+shift, q2->length)); fmpz_poly_fit_limbs(Q, FLINT_MAX(q1->limbs, q2->limbs)); _fmpz_poly_left_shift(Q, q1, shift); fmpz_poly_clear(q1); _fmpz_poly_add(Q, Q, q2); fmpz_poly_clear(q2); return; } // We now have n2 + B->length - 1 < A->length <= 2*B->length - 1 /* We let A = a1*x^(n1+2*n2-1) + a2*x^(n1+n2-1) + a3 where a1 is length at most n1 and a2 is length n2 and a3 is length n1+n2-1 */ // Set p1 to a1*x^(n1-1) + other terms // It has length at most 2*n1-1 and is normalised // A->length >= 2*n2 fmpz_poly_init(p1); fmpz_poly_fit_length(p1, A->length - 2*n2); fmpz_poly_fit_limbs(p1, A->limbs); _fmpz_poly_right_shift(p1, A, 2*n2); /* Set q1 to p1 div d1 This is at most a 2*n1-1 by n1 division so q1 ends up being at most length n1 d1q1 = low(d1*q1) is length at most n1-1 Thus we have discarded the leading n1 terms (at most) */ fmpz_poly_init(d1q1); fmpz_poly_init(q1); fmpz_poly_div_divconquer_recursive_low(q1, d1q1, p1, d1); fmpz_poly_clear(p1); /* Compute d2q1 = d2*q1 with low n1 - 1 terms zeroed d2*q1 is length at most n1+n2-1 leaving at most n2 non-zero terms to the left */ _fmpz_poly_stack_init(d2q1, d2->length+q1->length-1, d2->limbs+q1->limbs+1); _fmpz_poly_mul_trunc_left_n(d2q1, d2, q1, n1 - 1); /* Compute dq1 = d1*q1*x^n2 + d2*q1 dq1 is then of length at most 2*n1+n2-1 but may have any length below this */ _fmpz_poly_stack_init(dq1, FLINT_MAX(d1q1->length + n2, d2q1->length), B->limbs+q1->limbs+1); _fmpz_poly_left_shift(dq1, d1q1, n2); fmpz_poly_clear(d1q1); _fmpz_poly_add(dq1, dq1, d2q1); /* Compute t = a1*x^(2*n2-1) + a2*x^(n2-1) - dq1 after shifting dq1 to the right by (n1-n2) which has length at most 2*n1+n2-1, but we discard up to n1 coefficients, so it has effective length 2*n2-1 with the last n2-1 coefficients ignored. Thus there are at most n2 significant coefficients */ _fmpz_poly_stack_init(t, n1+2*n2-1, FLINT_MAX(A->limbs,dq1->limbs)+1); _fmpz_poly_right_shift(t, A, n1); _fmpz_poly_attach_shift(temp, dq1, n1-n2); _fmpz_poly_sub(t, t, temp); _fmpz_poly_truncate(t, 2*n2-1); /* Compute q2 = t div d3 It is at most a 2*n2-1 by n2 division, so the length of q2 will be n2 at most */ fmpz_poly_init(q2); fmpz_poly_div_divconquer(q2, t, d3); _fmpz_poly_stack_clear(t); _fmpz_poly_stack_clear(dq1); _fmpz_poly_stack_clear(d2q1); /* Write out Q = q1*x^n2 + q2 Q has length n1+n2 */ fmpz_poly_fit_length(Q, q1->length+n2); fmpz_poly_fit_limbs(Q, FLINT_MAX(q1->limbs, q2->limbs)); _fmpz_poly_left_shift(Q, q1, n2); fmpz_poly_clear(q1); _fmpz_poly_add(Q, Q, q2); fmpz_poly_clear(q2); } void fmpz_poly_divrem_divconquer(fmpz_poly_t Q, fmpz_poly_t R, const fmpz_poly_t A, const fmpz_poly_t B) { fmpz_poly_t QB; fmpz_poly_init(QB); fmpz_poly_div_divconquer_recursive(Q, QB, A, B); fmpz_poly_fit_limbs(R, FLINT_MAX(QB->limbs, A->limbs)+1); fmpz_poly_fit_length(R, A->length); _fmpz_poly_sub(R, A, QB); _fmpz_poly_normalise(R); fmpz_poly_clear(QB); } /* Compute the polynomial X^{2n} / Q. Used by Newton iteration to bootstrap power series inversion. Q must be monic and have length >= n. */ void fmpz_poly_newton_invert_basecase(fmpz_poly_t Q_inv, const fmpz_poly_t Q, unsigned long n) { fmpz_poly_t X2n, Qn; fmpz_poly_init2(X2n, 2*n-1, 1); _fmpz_poly_zero_coeffs(X2n, 2*n - 2); _fmpz_poly_set_coeff_ui(X2n, 2*n - 2, 1); X2n->length = 2*n-1; Qn->coeffs = Q->coeffs + (Q->length - n)*(Q->limbs + 1); Qn->limbs = Q->limbs; Qn->length = n; fmpz_poly_div_mulders(Q_inv, X2n, Qn); fmpz_poly_clear(X2n); } #define FLINT_NEWTON_INVERSE_BASECASE_CUTOFF 32 /* Recursively compute 1 / Q mod x^n using Newton iteration Assumes Q is given to the full precision n required and has constant term 1 */ void fmpz_poly_newton_invert(fmpz_poly_t Q_inv, const fmpz_poly_t Q, const unsigned long n) { if (n < FLINT_NEWTON_INVERSE_BASECASE_CUTOFF) { fmpz_poly_t Q_rev; fmpz_poly_init(Q_rev); fmpz_poly_fit_length(Q_rev, n); fmpz_poly_fit_limbs(Q_rev, Q->limbs); _fmpz_poly_reverse(Q_rev, Q, n); fmpz_poly_newton_invert_basecase(Q_inv, Q_rev, n); fmpz_poly_fit_length(Q_inv, n); _fmpz_poly_reverse(Q_inv, Q_inv, n); fmpz_poly_clear(Q_rev); return; } unsigned long m = (n+1)/2; fmpz_poly_t g0, prod, prod2; fmpz_poly_init(g0); fmpz_poly_init(prod); fmpz_poly_init(prod2); fmpz_poly_newton_invert(g0, Q, m); fmpz_poly_mul_trunc_n(prod, Q, g0, n); fmpz_sub_ui_inplace(prod->coeffs, 1); fmpz_poly_mul_trunc_n(prod2, prod, g0, n); fmpz_poly_fit_length(Q_inv, n); fmpz_poly_fit_limbs(Q_inv, FLINT_MAX(prod2->limbs, g0->limbs)+1); _fmpz_poly_sub(Q_inv, g0, prod2); fmpz_poly_clear(prod2); fmpz_poly_clear(prod); fmpz_poly_clear(g0); } /* Yields a precision n power series quotient of A by B assuming A and B are both given to precision n and B is normalised (i.e. constant coefficient is 1). */ void fmpz_poly_div_series(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B, const unsigned long n) { fmpz_poly_t Ain, Bin; if (A == Q) { _fmpz_poly_stack_init(Ain, A->length, A->limbs); _fmpz_poly_set(Ain, A); } else _fmpz_poly_attach(Ain, A); if (B == Q) { _fmpz_poly_stack_init(Bin, B->length, B->limbs); _fmpz_poly_set(Bin, B); } else _fmpz_poly_attach(Bin, B); fmpz_poly_t B_inv; fmpz_poly_init(B_inv); fmpz_poly_newton_invert(B_inv, Bin, n); fmpz_poly_mul_trunc_n(Q, B_inv, Ain, n); fmpz_poly_clear(B_inv); if (A == Q) _fmpz_poly_stack_clear(Ain); if (B == Q) _fmpz_poly_stack_clear(Bin); } /* Polynomial division of A by B The remainder is not computed, to save time B is assumed to be monic */ void fmpz_poly_div_newton(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B) { if (A->length < B->length) { fmpz_poly_set_coeff_si(Q, 0, 0); _fmpz_poly_normalise(Q); return; } fmpz_poly_t A_rev, B_rev; fmpz_poly_init2(A_rev, A->length, A->limbs); fmpz_poly_init2(B_rev, B->length, B->limbs); _fmpz_poly_reverse(A_rev, A, A->length); _fmpz_poly_reverse(B_rev, B, B->length); fmpz_poly_div_series(Q, A_rev, B_rev, A->length - B->length + 1); fmpz_poly_fit_length(Q, A->length - B->length + 1); _fmpz_poly_reverse(Q, Q, A->length - B->length + 1); fmpz_poly_clear(B_rev); fmpz_poly_clear(A_rev); } /*=================================================================================== Mulder's short division algorithm ====================================================================================*/ // Mulders algorithm without improvements /*void fmpz_poly_div_mulders(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B) { if (A->length < B->length) { _fmpz_poly_zero(Q); return; } unsigned long crossover = 16; if (B->limbs > 16) crossover = 8; if ((B->length <= 12) && (B->limbs > 8)) crossover = 8; if ((B->length <= crossover) || (A->length > 2*B->length - 1)) { fmpz_poly_div_classical(Q, A, B); return; } unsigned long k; k = 0; /*if (B->length <= 100) k = B->length/5; if (B->length <= 20) k = B->length/4; if (B->length == 10) k = B->length/3;*/ /* fmpz_poly_t d1, d2, g1, g2, p1, q1, q2, dq1, dq2, d1q1, d2q1, d2q2, d1q2, t, temp; #if MULDERS_NEGATIVE unsigned long n1 = (B->length+1)/2 - k; #else unsigned long n1 = (B->length+1)/2 + k; #endif unsigned long n2 = B->length - n1; /* We let B = d1*x^n2 + d2 */ /* d1->length = n1; d2->length = n2; g1->length = n2; g2->length = n1; d1->limbs = B->limbs; d2->limbs = B->limbs; g1->limbs = B->limbs; g2->limbs = B->limbs; d1->coeffs = B->coeffs + n2*(B->limbs+1); d2->coeffs = B->coeffs; g1->coeffs = B->coeffs + n1*(B->limbs+1); g2->coeffs = B->coeffs; if (A->length <= 2*n1+n2-1) { temp->length = A->length - (n1+n2-1); temp->limbs = A->limbs; temp->coeffs = A->coeffs + (n1+n2-1)*(A->limbs+1); _fmpz_poly_stack_init(p1, temp->length+n1-1, A->limbs); _fmpz_poly_left_shift(p1, temp, n1-1); p1->length = temp->length+n1-1; fmpz_poly_init(d1q1); _fmpz_poly_normalise(p1); fmpz_poly_div_divconquer_recursive(Q, d1q1, p1, d1); //****************************** fmpz_poly_clear(d1q1); _fmpz_poly_stack_clear(p1); return; } else { /* We let A = a1*x^(2*n1+n2-1) + a2*x^(n1+n2-1) + a3 where a1 is length n2 and a2 is length n1 and a3 is length n1+n2-1 We set p1 = a1*x^(n2-1), so it has length 2*n2-1 */ /* temp->length = A->length - (2*n1+n2-1); temp->limbs = A->limbs; temp->coeffs = A->coeffs + (2*n1+n2-1)*(A->limbs+1); _fmpz_poly_stack_init(p1, temp->length+n2-1, A->limbs); _fmpz_poly_left_shift(p1, temp, n2-1); p1->length = temp->length+n2-1; /* Set q1 to p1 div g1 This is a 2*n2-1 by n2 division so q1 ends up being length n2 g1q1 = g1*q1 is length 2*n2-1 but we retrieve only the low n2-1 terms */ /* fmpz_poly_init(d1q1); fmpz_poly_init(q1); _fmpz_poly_normalise(p1); fmpz_poly_div_divconquer_recursive(q1, d1q1, p1, g1); //****************************** _fmpz_poly_stack_clear(p1); } /* Compute g2q1 = g2*q1 which ends up being length n1+n2-1 but we set the right most n2-1 terms to zero */ /* _fmpz_poly_stack_init(d2q1, g2->length+q1->length-1, g2->limbs+q1->limbs+1); _fmpz_poly_mul_trunc_left_n(d2q1, g2, q1, n2 - 1); /* Compute dq1 = g1*q1*x^n1 + g2*q1 dq1 is then of length n1+2*n2-1 but we have only the rightmost n1+n2-1 terms */ /* _fmpz_poly_stack_init(dq1, FLINT_MAX(d1q1->length + n1, d2q1->length), B->limbs+q1->limbs+1); _fmpz_poly_zero_coeffs(dq1, n1); dq1->length = d1q1->length + n1; temp->length = d1q1->length; temp->limbs = dq1->limbs; temp->coeffs = dq1->coeffs + n1*(dq1->limbs+1); _fmpz_poly_set(temp, d1q1); fmpz_poly_clear(d1q1); _fmpz_poly_add(dq1, dq1, d2q1); /* Compute t = p1*x^(n1+n2-1) + p2*x^(n2-1) - dq1 which has length 2*n1+n2-1, but we are not interested in the first n1 coefficients, so it has effective length n1+n2-1 */ /* temp->length = A->length - (n1+n2-1); temp->limbs = A->limbs; temp->coeffs = A->coeffs + (n1+n2-1)*(A->limbs+1); #if MULDERS_NEGATIVE _fmpz_poly_stack_init(t, n1+2*n2-1, FLINT_MAX(A->limbs,dq1->limbs)+1); _fmpz_poly_left_shift(t, temp, n2-1); t->length = temp->length+n2-1; _fmpz_poly_sub(t, t, dq1); _fmpz_poly_right_shift(t, t, n2-n1); #else _fmpz_poly_stack_init(t, 2*n1+n2-1, FLINT_MAX(A->limbs,dq1->limbs)+1); _fmpz_poly_left_shift(t, temp, n1-1); t->length = temp->length+n1-1; temp->length = dq1->length; temp->limbs = t->limbs; temp->coeffs = t->coeffs + (n1-n2)*(t->limbs+1); _fmpz_poly_sub(temp, temp, dq1); #endif t->length = 2*n1-1; _fmpz_poly_normalise(t); /* Compute q2 = t div d1 It is a 2*n1-1 by n1 division, so the length of q2 will be n1 */ /* fmpz_poly_init(q2); _fmpz_poly_normalise(t); fmpz_poly_div_mulders(q2, t, d1); _fmpz_poly_stack_clear(t); _fmpz_poly_stack_clear(dq1); _fmpz_poly_stack_clear(d2q1); /* Write out Q = q1*x^n1 + q2 Q has length n1+n2 */ /* fmpz_poly_fit_length(Q, q1->length+n1); fmpz_poly_fit_limbs(Q, FLINT_MAX(q1->limbs, q2->limbs)); _fmpz_poly_set(Q, q2); fmpz_poly_clear(q2); Q->length = q1->length + n1; temp->length = q1->length; temp->limbs = Q->limbs; temp->coeffs = Q->coeffs + n1*(Q->limbs+1); _fmpz_poly_set(temp, q1); fmpz_poly_clear(q1); }*/ void fmpz_poly_div_mulders(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B) { if (A->length < B->length) { _fmpz_poly_zero(Q); return; } // Crossover must be at least 8 so that n2 is not zero unsigned long crossover = 16; unsigned long crossover2 = 256; if (B->limbs > 16) crossover = 8; if ((B->length <= 12) && (B->limbs > 8)) crossover = 8; if ((B->length <= crossover) || ((A->length > 2*B->length - 1) && (A->length < crossover2))) { fmpz_poly_div_classical(Q, A, B); return; } unsigned long k; k = 0; if (B->length <= 100) k = B->length/5; if (B->length <= 20) k = B->length/4; if (B->length == 10) k = B->length/3; fmpz_poly_t d1, d2, g1, g2, p1, q1, q2, dq1, dq2, d1q1, d2q1, d2q2, d1q2, t, temp; // We demand n2 not be zero, this holds since // crossover is at least 8 unsigned long n1 = (B->length+1)/2 + k; unsigned long n2 = B->length - n1; /* We let B = d1*x^n2 + d2 */ _fmpz_poly_attach_shift(d1, B, n2); _fmpz_poly_attach_truncate(d2, B, n2); _fmpz_poly_attach_shift(g1, B, n1); _fmpz_poly_attach_truncate(g2, B, n1); if (A->length <= n1 + B->length - 1) { /* We only need a single division so we shift and make a recursive call Since n2 is not zero the size has been reduced */ _fmpz_poly_stack_init(p1, A->length - n2, A->limbs); _fmpz_poly_right_shift(p1, A, n2); fmpz_poly_div_mulders(Q, p1, d1); _fmpz_poly_stack_clear(p1); return; } if (A->length > 2*B->length - 1) { fmpz_poly_div_divconquer(Q, A, B); return; } /* We let A = a1*x^(2*n1+n2-1) + a2*x^(n1+n2-1) + a3 where a1 is at most length n2 and a2 is length n1 and a3 is length n1+n2-1 We set p1 = a1*x^(n2-1) + other terms, so it has length at most 2*n2-1 A->length is at least 2*n1 + n2 - 1 which is at least the requisite 2*n1 */ _fmpz_poly_stack_init(p1, A->length-2*n2, A->limbs); _fmpz_poly_right_shift(p1, A, 2*n1); /* Set q1 to p1 div g1 This is at most a 2*n2-1 by n2 division so q1 ends up being at most length n2 d1q1 = g1*q1 is length 2*n2-1 but we retrieve only the low n2-1 terms */ fmpz_poly_init(d1q1); fmpz_poly_init(q1); fmpz_poly_div_divconquer_recursive_low(q1, d1q1, p1, g1); _fmpz_poly_stack_clear(p1); /* Compute d2q1 = g2*q1 which ends up being at most length n1+n2-1 but we set the right most n2-1 terms to zero g2->length cannot be zero since it is d1 */ _fmpz_poly_stack_init(d2q1, g2->length+q1->length-1, g2->limbs+q1->limbs+1); _fmpz_poly_mul_trunc_left_n(d2q1, g2, q1, n2 - 1); /* Compute dq1 = g1*q1*x^n1 + g2*q1 dq1 is then of length n1+2*n2-1 but we have only the rightmost n1+n2-1 terms, the last n2-1 of which are irrelevant */ _fmpz_poly_stack_init(dq1, FLINT_MAX(d1q1->length + n1, d2q1->length), FLINT_MAX(d1q1->limbs, d2q1->limbs)+1); _fmpz_poly_left_shift(dq1, d1q1, n1); fmpz_poly_clear(d1q1); _fmpz_poly_add(dq1, dq1, d2q1); /* Compute t = p1*x^(n1+n2-1) + p2*x^(n2-1) - dq1 where dq1 has been shifted left by (n1-n2), which has length 2*n1+n2-1, but we are not interested in the first n2 coefficients, so it has effective length at most 2*n1-1 */ _fmpz_poly_stack_init(t, n1+B->length, FLINT_MAX(A->limbs,dq1->limbs)+1); _fmpz_poly_right_shift(t, A, n2); _fmpz_poly_attach_shift(temp, t, n1-n2); _fmpz_poly_sub(temp, temp, dq1); _fmpz_poly_truncate(t, 2*n1-1); /* Compute q2 = t div d1 It is at most a 2*n1-1 by n1 division, so the length of q2 will be at most n1 */ fmpz_poly_init(q2); fmpz_poly_div_mulders(q2, t, d1); _fmpz_poly_stack_clear(t); _fmpz_poly_stack_clear(dq1); _fmpz_poly_stack_clear(d2q1); /* Write out Q = q1*x^n1 + q2 Q has length at most n1+n2 */ fmpz_poly_fit_length(Q, FLINT_MAX(q1->length+n1, q2->length)); fmpz_poly_fit_limbs(Q, FLINT_MAX(q1->limbs, q2->limbs)); _fmpz_poly_left_shift(Q, q1, n1); fmpz_poly_clear(q1); _fmpz_poly_add(Q, Q, q2); fmpz_poly_clear(q2); } /*=================================================================================== Pseudo-division algorithm ====================================================================================*/ //Pseudo-division a la Cohen. void fmpz_poly_pseudo_divrem_cohen(fmpz_poly_t Q, fmpz_poly_t R, const fmpz_poly_t A, const fmpz_poly_t B) { fmpz_poly_t qB; if (B->length == 0) { printf("Error: Divide by zero\n"); abort(); } unsigned long size_A = A->limbs + 1; unsigned long size_B = B->limbs + 1; unsigned long size_R; unsigned long limbs_R; unsigned long size_Q; unsigned long limbs_Q; fmpz_t coeffs_A = A->coeffs; fmpz_t coeffs_B = B->coeffs; fmpz_t B_lead = coeffs_B + (B->length-1)*size_B; fmpz_t coeff_Q; fmpz_t coeffs_R; long m = A->length; long n = B->length; long e = m - n + 1; NORM(B_lead); unsigned long size_B_lead = ABS(B_lead[0]); mp_limb_t sign_B_lead = B_lead[0]; unsigned long bits_B_lead = fmpz_bits(B_lead); mp_limb_t sign_quot; fmpz_poly_fit_length(R, A->length); fmpz_poly_fit_limbs(R, A->limbs); R->length = A->length; _fmpz_poly_set(R, A); coeffs_R = R->coeffs; size_R = R->limbs+1; if ((long) R->length >= (long) B->length) { fmpz_poly_fit_length(Q, R->length-B->length+1); fmpz_poly_fit_limbs(Q, ABS(A->coeffs[(A->length-1)*(A->limbs+1)])); for (unsigned long i = 0; i < R->length-B->length+1; i++) Q->coeffs[i*(Q->limbs+1)] = 0; Q->length = R->length-B->length+1; size_Q = Q->limbs+1; } else { _fmpz_poly_zero(Q); return; } fmpz_poly_t Bm1; Bm1->length = B->length - 1; Bm1->limbs = B->limbs; Bm1->coeffs = B->coeffs; while ((long) R->length >= (long) B->length) { _fmpz_poly_scalar_mul_fmpz(Q, Q, B_lead); coeff_Q = coeffs_R + (R->length-1)*size_R; fmpz_add(Q->coeffs + (R->length-B->length)*size_Q, Q->coeffs + (R->length-B->length)*size_Q, coeff_Q); if (B->length > 1) { fmpz_poly_init2(qB, B->length-1, B->limbs+ABS(coeff_Q[0])); _fmpz_poly_scalar_mul_fmpz(qB, Bm1, coeff_Q); fmpz_poly_fit_limbs(R, FLINT_MAX(R->limbs + size_B_lead, qB->limbs) + 1); } else { fmpz_poly_fit_limbs(R, R->limbs + size_B_lead); } coeffs_R = R->coeffs; size_R = R->limbs+1; _fmpz_poly_scalar_mul_fmpz(R, R, B_lead); fmpz_poly_t R_sub; R_sub->coeffs = coeffs_R+(R->length-B->length)*size_R; R_sub->limbs = R->limbs; R_sub->length = B->length-1; if (B->length > 1) { _fmpz_poly_sub(R_sub, R_sub, qB); fmpz_poly_clear(qB); } R_sub->coeffs[(B->length-1)*(R_sub->limbs+1)] = 0; _fmpz_poly_normalise(R); if (R->length) fmpz_poly_fit_limbs(Q, FLINT_MAX(Q->limbs + size_B_lead, ABS(R->coeffs[(R->length-1)*(R->limbs+1)])) + 1); size_Q = Q->limbs+1; e--; } fmpz_t pow = (fmpz_t) flint_stack_alloc((bits_B_lead*e)/FLINT_BITS+2); fmpz_pow_ui(pow, B_lead, e); fmpz_poly_fit_limbs(Q, Q->limbs+ABS(pow[0])); fmpz_poly_fit_limbs(R, R->limbs+ABS(pow[0])); _fmpz_poly_scalar_mul_fmpz(Q, Q, pow); _fmpz_poly_scalar_mul_fmpz(R, R, pow); flint_stack_release(); } void fmpz_poly_pseudo_divrem_shoup(fmpz_poly_t Q, fmpz_poly_t R, const fmpz_poly_t A, const fmpz_poly_t B) { fmpz_poly_t qB; if (B->length == 0) { printf("Error: Divide by zero\n"); abort(); } unsigned long size_A = A->limbs + 1; unsigned long size_B = B->limbs + 1; unsigned long size_R; unsigned long limbs_R; unsigned long size_Q; unsigned long limbs_Q; fmpz_t coeffs_A = A->coeffs; fmpz_t coeffs_B = B->coeffs; fmpz_t B_lead = coeffs_B + (B->length-1)*size_B; fmpz_t coeff_Q; fmpz_t coeffs_R; long m = A->length; long n = B->length; long e = m - n + 1; NORM(B_lead); unsigned long size_B_lead = ABS(B_lead[0]); mp_limb_t sign_B_lead = B_lead[0]; mp_limb_t sign_quot; int lead_is_one = fmpz_is_one(B_lead); fmpz_t coeff_R, coeff_A; if ((long) A->length >= (long) B->length) { fmpz_poly_fit_length(R, A->length); fmpz_poly_fit_limbs(R, A->limbs + (m - n)*size_B_lead); R->length = A->length; coeffs_R = R->coeffs; size_R = R->limbs + 1; fmpz_poly_fit_length(Q, A->length-B->length+1); fmpz_poly_fit_limbs(Q, ABS(A->coeffs[(A->length-1)*(A->limbs+1)])); for (unsigned long i = 0; i < A->length-B->length+1; i++) Q->coeffs[i*(Q->limbs+1)] = 0; Q->length = A->length-B->length+1; size_Q = Q->limbs+1; } else { fmpz_poly_set(R, A); _fmpz_poly_zero(Q); return; } if (!lead_is_one) { fmpz_t pow = (fmpz_t) flint_stack_alloc(size_B_lead*(m-n+1)+1); fmpz_set(pow, B_lead); coeff_R = coeffs_R + (m-n-1)*size_R; coeff_A = A->coeffs + (m-n-1)*(A->limbs+1); for (long i = m - n - 1; i >= 0; i--) { fmpz_mul(coeff_R, coeff_A, pow); if (i > 0) fmpz_mul(pow, pow, B_lead); coeff_R -= size_R; coeff_A -= (A->limbs+1); } coeff_R = coeffs_R + (m-n)*size_R; coeff_A = A->coeffs + (m-n)*(A->limbs+1); for (long i = m - n; i < m; i++) { fmpz_set(coeff_R, coeff_A); coeff_R += size_R; coeff_A += (A->limbs+1); } flint_stack_release(); } else { coeff_R = coeffs_R; coeff_A = A->coeffs; for (unsigned long i = 0; i < m; i++) { fmpz_set(coeff_R, coeff_A); coeff_R += size_R; coeff_A += (A->limbs+1); } } unsigned long coeff = R->length; fmpz_poly_t Bm1; Bm1->length = B->length - 1; Bm1->limbs = B->limbs; Bm1->coeffs = B->coeffs; while ((long) coeff >= (long) B->length) { coeff_Q = coeffs_R + (coeff-1)*size_R; fmpz_set(Q->coeffs + (coeff-B->length)*size_Q, coeff_Q); coeff_Q = Q->coeffs + (coeff-B->length)*size_Q; if (B->length > 1) { fmpz_poly_init2(qB, Bm1->length, Bm1->limbs+ABS(coeff_Q[0])); _fmpz_poly_scalar_mul_fmpz(qB, Bm1, coeff_Q); fmpz_poly_fit_limbs(R, FLINT_MAX(R->limbs + size_B_lead, qB->limbs) + 1); coeffs_R = R->coeffs; size_R = R->limbs+1; fmpz_poly_t R_sub; R_sub->coeffs = coeffs_R+(coeff-B->length)*size_R; R_sub->limbs = R->limbs; R_sub->length = B->length-1; if (!lead_is_one) _fmpz_poly_scalar_mul_fmpz(R_sub, R_sub, B_lead); _fmpz_poly_sub(R_sub, R_sub, qB); fmpz_poly_clear(qB); } coeff--; if (coeff >= B->length) { fmpz_poly_fit_limbs(Q, ABS(R->coeffs[(coeff-1)*size_R])); size_Q = Q->limbs+1; } } R->length = B->length - 1; _fmpz_poly_normalise(R); if (!lead_is_one) { size_Q = Q->limbs + 1; coeff_Q = Q->coeffs; fmpz_t pow = (fmpz_t) flint_stack_alloc(size_B_lead*(m-n+1)+1); fmpz_set(pow, B_lead); for (long i = 1; i <= m-n; i++) { coeff_Q = Q->coeffs + i*size_Q; fmpz_poly_fit_limbs(Q, ABS(coeff_Q[0]) + ABS(pow[0])); size_Q = Q->limbs + 1; coeff_Q = Q->coeffs + i*size_Q; fmpz_mul(coeff_Q, coeff_Q, pow); if (i < m-n) fmpz_mul(pow, pow, B_lead); } flint_stack_release(); } } /* Pseudo division of A by B. Returns Q, R and d such that l^d A = QB + R where l is the leading coefficient of B. This is faster than the pseudo divisions above when there is no coefficient explosion, but is slower otherwise. It is usually desirable to use this version unless you know specifically that coefficient explosion will occur. */ void fmpz_poly_pseudo_divrem_basecase(fmpz_poly_t Q, fmpz_poly_t R, unsigned long * d, const fmpz_poly_t A, const fmpz_poly_t B) { fmpz_poly_t qB; if (B->length == 0) { printf("Error: Divide by zero\n"); abort(); } unsigned long size_A = A->limbs + 1; unsigned long size_B = B->limbs + 1; unsigned long size_R; unsigned long limbs_R; unsigned long size_Q; unsigned long limbs_Q; fmpz_t coeffs_A = A->coeffs; fmpz_t coeffs_B = B->coeffs; fmpz_t B_lead = coeffs_B + (B->length-1)*size_B; fmpz_t coeff_Q; fmpz_t coeff_R; fmpz_t coeffs_R; int scale; long m = A->length; long n = B->length; NORM(B_lead); unsigned long size_B_lead = ABS(B_lead[0]); mp_limb_t sign_B_lead = B_lead[0]; mp_limb_t sign_quot; fmpz_poly_fit_length(R, A->length); fmpz_poly_fit_limbs(R, A->limbs); R->length = A->length; _fmpz_poly_set(R, A); coeffs_R = R->coeffs; size_R = R->limbs+1; *d = 0; if ((long) R->length >= (long) B->length) { fmpz_poly_fit_length(Q, R->length-B->length+1); fmpz_poly_fit_limbs(Q, ABS(A->coeffs[(A->length-1)*(A->limbs+1)])); for (unsigned long i = 0; i < R->length-B->length+1; i++) Q->coeffs[i*(Q->limbs+1)] = 0; Q->length = R->length-B->length+1; size_Q = Q->limbs+1; } else { _fmpz_poly_zero(Q); return; } fmpz_poly_t Bm1; Bm1->length = B->length - 1; Bm1->limbs = B->limbs; Bm1->coeffs = B->coeffs; coeff_R = coeffs_R + (R->length-1)*size_R; fmpz_t rem = (fmpz_t) flint_heap_alloc(size_B_lead+1); while ((long) R->length >= (long) B->length) { coeff_Q = Q->coeffs+(R->length - B->length)*size_Q; __fmpz_normalise(coeff_R); sign_quot = ABS(coeff_R[0]) - size_B_lead + 1; if (((long) sign_quot > 1) || ((sign_quot == 1) && (mpn_cmp(coeff_R+1, B_lead+1, size_B_lead) >= 0))) { mpn_tdiv_qr(coeff_Q+1, rem+1, 0, coeff_R+1, ABS(coeff_R[0]), B_lead+1, size_B_lead); rem[0] = size_B_lead; __fmpz_normalise(rem); } else { coeff_Q[0] = 0; if (coeff_R[0] == 0) rem[0] = 0; else { rem[0] = 1; rem[1] = 1; } } if (fmpz_is_zero(rem)) { if (((long) (sign_B_lead ^ coeff_R[0])) < 0) { coeff_Q[0] = -sign_quot; for (unsigned long i = 0; i < size_B_lead; i++) { if (rem[i]) { fmpz_sub_ui_inplace(coeff_Q,1); break; } } } else { coeff_Q[0] = sign_quot; } NORM(coeff_Q); scale = 0; } else { _fmpz_poly_scalar_mul_fmpz(Q, Q, B_lead); fmpz_set(coeff_Q, coeff_R); scale = 1; (*d)++; } if (B->length > 1) { fmpz_poly_init2(qB, B->length-1, B->limbs+ABS(coeff_Q[0])); _fmpz_poly_scalar_mul_fmpz(qB, Bm1, coeff_Q); } if (scale) { fmpz_poly_fit_limbs(R, FLINT_MAX(R->limbs + size_B_lead, qB->limbs) + 1); coeffs_R = R->coeffs; size_R = R->limbs+1; _fmpz_poly_scalar_mul_fmpz(R, R, B_lead); } else if (B->length > 1) { fmpz_poly_fit_limbs(R, FLINT_MAX(R->limbs, qB->limbs) + 1); coeffs_R = R->coeffs; size_R = R->limbs+1; } fmpz_poly_t R_sub; R_sub->coeffs = coeffs_R+(R->length-B->length)*size_R; R_sub->limbs = R->limbs; R_sub->length = B->length-1; if (B->length > 1) { _fmpz_poly_sub(R_sub, R_sub, qB); fmpz_poly_clear(qB); } R_sub->coeffs[(B->length-1)*(R_sub->limbs+1)] = 0; _fmpz_poly_normalise(R); coeff_R = coeffs_R + (R->length-1)*size_R; if (R->length) fmpz_poly_fit_limbs(Q, R->limbs); size_Q = Q->limbs+1; } flint_heap_free(rem); } void fmpz_poly_pseudo_div_basecase(fmpz_poly_t Q, unsigned long * d, const fmpz_poly_t A, const fmpz_poly_t B) { fmpz_poly_t qB, R; fmpz_poly_init(R); if (B->length == 0) { printf("Error: Divide by zero\n"); abort(); } unsigned long size_A = A->limbs + 1; unsigned long size_B = B->limbs + 1; unsigned long size_R; unsigned long limbs_R; unsigned long size_Q; unsigned long limbs_Q; fmpz_t coeffs_A = A->coeffs; fmpz_t coeffs_B = B->coeffs; fmpz_t B_lead = coeffs_B + (B->length-1)*size_B; fmpz_t coeff_Q; fmpz_t coeff_R; fmpz_t coeffs_R; int scale; long m = A->length; long n = B->length; NORM(B_lead); unsigned long size_B_lead = ABS(B_lead[0]); mp_limb_t sign_B_lead = B_lead[0]; mp_limb_t sign_quot; fmpz_poly_fit_length(R, A->length); fmpz_poly_fit_limbs(R, A->limbs); R->length = A->length; _fmpz_poly_set(R, A); coeffs_R = R->coeffs; size_R = R->limbs+1; *d = 0; if ((long) R->length >= (long) B->length) { fmpz_poly_fit_length(Q, R->length-B->length+1); fmpz_poly_fit_limbs(Q, ABS(A->coeffs[(A->length-1)*(A->limbs+1)])); for (unsigned long i = 0; i < R->length-B->length+1; i++) Q->coeffs[i*(Q->limbs+1)] = 0; Q->length = R->length-B->length+1; size_Q = Q->limbs+1; } else { _fmpz_poly_zero(Q); return; } fmpz_poly_t Bm1; Bm1->length = B->length - 1; Bm1->limbs = B->limbs; Bm1->coeffs = B->coeffs; coeff_R = coeffs_R + (R->length-1)*size_R; fmpz_t rem = (fmpz_t) flint_heap_alloc(size_B_lead+1); while ((long) R->length >= (long) B->length) { coeff_Q = Q->coeffs+(R->length - B->length)*size_Q; __fmpz_normalise(coeff_R); sign_quot = ABS(coeff_R[0]) - size_B_lead + 1; if (((long) sign_quot > 1) || ((sign_quot == 1) && (mpn_cmp(coeff_R+1, B_lead+1, size_B_lead) >= 0))) { mpn_tdiv_qr(coeff_Q+1, rem+1, 0, coeff_R+1, ABS(coeff_R[0]), B_lead+1, size_B_lead); rem[0] = size_B_lead; __fmpz_normalise(rem); } else { coeff_Q[0] = 0; if (coeff_R[0] == 0) rem[0] = 0; else { rem[0] = 1; rem[1] = 1; } } if (fmpz_is_zero(rem)) { if (((long) (sign_B_lead ^ coeff_R[0])) < 0) { coeff_Q[0] = -sign_quot; for (unsigned long i = 0; i < size_B_lead; i++) { if (rem[i]) { fmpz_sub_ui_inplace(coeff_Q,1); break; } } } else { coeff_Q[0] = sign_quot; } NORM(coeff_Q); scale = 0; } else { _fmpz_poly_scalar_mul_fmpz(Q, Q, B_lead); fmpz_set(coeff_Q, coeff_R); scale = 1; (*d)++; } if (R->length != B->length) { if (B->length > 1) { fmpz_poly_init2(qB, B->length-1, B->limbs+ABS(coeff_Q[0])); _fmpz_poly_scalar_mul_fmpz(qB, Bm1, coeff_Q); } if (scale) { fmpz_poly_fit_limbs(R, FLINT_MAX(R->limbs + size_B_lead, qB->limbs) + 1); coeffs_R = R->coeffs; size_R = R->limbs+1; _fmpz_poly_scalar_mul_fmpz(R, R, B_lead); } fmpz_poly_t R_sub; R_sub->coeffs = coeffs_R+(R->length-B->length)*size_R; R_sub->limbs = R->limbs; R_sub->length = B->length-1; if (B->length > 1) { _fmpz_poly_sub(R_sub, R_sub, qB); fmpz_poly_clear(qB); } R_sub->coeffs[(B->length-1)*(R_sub->limbs+1)] = 0; _fmpz_poly_normalise(R); coeff_R = coeffs_R + (R->length-1)*size_R; if (R->length) fmpz_poly_fit_limbs(Q, R->limbs); size_Q = Q->limbs+1; } else R->length = 0; } fmpz_poly_clear(R); flint_heap_free(rem); } /* Pseudo division using a divide and conquer algorithm. */ void fmpz_poly_pseudo_divrem_recursive(fmpz_poly_t Q, fmpz_poly_t R, unsigned long * d, const fmpz_poly_t A, const fmpz_poly_t B) { if (A->length < B->length) { fmpz_poly_fit_length(R, A->length); fmpz_poly_fit_limbs(R, A->limbs); _fmpz_poly_set(R, A); _fmpz_poly_zero(Q); *d = 0; return; } unsigned long crossover = 16; unsigned long crossover2 = 128; if (B->limbs > 16) crossover = 8; if ((B->length <= 12) && (B->limbs > 8)) crossover = 8; if ((B->length <= crossover) || ((A->length > 2*B->length - 1) && (A->length < crossover2))) { fmpz_poly_pseudo_divrem_basecase(Q, R, d, A, B); return; } fmpz_poly_t d1, d2, d3, d4, p1, q1, q2, dq1, dq2, r1, d2q1, d2q2, r2, t, u, temp; fmpz_t B_lead; unsigned long size_B_lead; unsigned long bits_B_lead; unsigned long n1 = (B->length+1)/2; unsigned long n2 = B->length - n1; /* We let B = d1*x^n2 + d2 */ _fmpz_poly_attach_shift(d1, B, n2); _fmpz_poly_attach_truncate(d2, B, n2); _fmpz_poly_attach_shift(d3, B, n1); _fmpz_poly_attach_truncate(d4, B, n1); /* We need the leading coefficient of B */ B_lead = B->coeffs + (B->length-1)*(B->limbs+1); size_B_lead = ABS(B_lead[0]); bits_B_lead = fmpz_bits(B_lead); if (A->length <= n2 + B->length - 1) { /* A is greater than length n1+n2-1 and at most length n1+2*n2-1 We shift right by n1 and zero the last n2-1 coefficients, leaving at at most n2 significant terms */ _fmpz_poly_stack_init(p1, A->length-n1, A->limbs); _fmpz_poly_right_shift(p1, A, n1); _fmpz_poly_zero_coeffs(p1, n2-1); /* We compute p1 div d3 which is at most a 2*n2-1 by n2 division, leaving n2 terms in the quotient. Since we are doing pseudo division, the remainder will have at most n2-1 terms */ fmpz_poly_init(r1); fmpz_poly_pseudo_divrem_recursive(Q, r1, d, p1, d3); _fmpz_poly_stack_clear(p1); /* We compute d2q1 = Q*d4 It will have at most n1+n2-1 terms */ _fmpz_poly_stack_init(d2q1, d4->length+Q->length-1, d4->limbs+Q->limbs+1); _fmpz_poly_mul(d2q1, d4, Q); /* Compute R = lead(B)^n * R' where R' is the terms of A we haven't dealt with, of which there are at most n1+n2-1 */ fmpz_poly_fit_length(R, n1+n2-1); fmpz_poly_fit_limbs(R, FLINT_MAX(FLINT_MAX(A->limbs+((*d)*bits_B_lead)/FLINT_BITS+1, r1->limbs), d2q1->limbs)+1); fmpz_t pow = (fmpz_t) flint_stack_alloc((bits_B_lead*(*d))/FLINT_BITS+2); fmpz_pow_ui(pow, B_lead, *d); _fmpz_poly_attach_truncate(temp, A, n1+n2-1); _fmpz_poly_scalar_mul_fmpz(R, temp, pow); flint_stack_release(); /* Compute the original remainder from the first pseudo division r' = r1^n1 - d2q1. This should be thought of as r'/lead(B)^n We add this to the remainder R', first multiplying everything through by lead(B)^n. This gives the remainder R + r'. We note r' will have at most n1+n2-1 terms. */ fmpz_poly_fit_length(r1, FLINT_MAX(r1->length+n1, d2q1->length)); _fmpz_poly_left_shift(r1, r1, n1); _fmpz_poly_sub(r1, r1, d2q1); _fmpz_poly_stack_clear(d2q1); _fmpz_poly_add(R, R, r1); fmpz_poly_clear(r1); return; } unsigned long s1, s2; if (A->length > 2*B->length - 1) { // We shift A right until it is length 2*B->length - 1 // We call this polynomial p1. Zero the final B->length-1 // coefficients. Note A->length > 2*B->length - 1 unsigned long shift = A->length - 2*B->length + 1; _fmpz_poly_stack_init(p1, 2*B->length - 1, A->limbs); _fmpz_poly_right_shift(p1, A, shift); _fmpz_poly_zero_coeffs(p1, B->length - 1); /* Set q1 to p1 div B This is a 2*B->length-1 by B->length division so q1 ends up being at most length B->length r1 is length at most B->length-1 */ fmpz_poly_init(r1); fmpz_poly_init(q1); fmpz_poly_pseudo_divrem_recursive(q1, r1, &s1, p1, B); _fmpz_poly_stack_clear(p1); /* Compute t = (lead(B)^s1) * a2 + r1*x^shift which ends up being at most length A->length - B->length since r1 is at most length B->length-1 Here a2 is what remains of A after the first R->length coefficients are removed. */ _fmpz_poly_stack_init(t, A->length - B->length, FLINT_MAX(A->limbs+(bits_B_lead*s1)/FLINT_BITS+1, r1->limbs)+1); _fmpz_poly_attach_truncate(temp, A, A->length - B->length); fmpz_t pow = (fmpz_t) flint_stack_alloc((bits_B_lead*s1)/FLINT_BITS+2); fmpz_pow_ui(pow, B_lead, s1); _fmpz_poly_scalar_mul_fmpz(t, temp, pow); flint_stack_release(); fmpz_poly_fit_length(r1, r1->length+shift); _fmpz_poly_left_shift(r1, r1, shift); _fmpz_poly_add(t, t, r1); fmpz_poly_clear(r1); /* Compute q2 = t div B It is a smaller division than the original since t->length <= A->length - B->length r2 has length at most B->length - 1 */ fmpz_poly_init(q2); fmpz_poly_pseudo_divrem_recursive(q2, R, &s2, t, B); _fmpz_poly_stack_clear(t); /* Write out Q = lead(B)^s2*q1*x^shift + q2 Q has length at most B->length+shift Note q2 has length at most shift since at most it is an A->length-B->length by B->length division q1 cannot have length zero since we are doing pseudo division */ fmpz_poly_fit_length(Q, q1->length+shift); fmpz_poly_fit_limbs(Q, FLINT_MAX(q1->limbs + (s2*bits_B_lead)/FLINT_BITS+1, q2->limbs)); pow = (fmpz_t) flint_stack_alloc((bits_B_lead*s2)/FLINT_BITS+2); fmpz_pow_ui(pow, B_lead, s2); _fmpz_poly_scalar_mul_fmpz(Q, q1, pow); fmpz_poly_clear(q1); flint_stack_release(); _fmpz_poly_left_shift(Q, Q, shift); _fmpz_poly_add(Q, Q, q2); fmpz_poly_clear(q2); /* Set d to the power of lead(B) that everything must be multiplied by */ *d = s1 + s2; return; } /* We let A = a1*x^(n1+2*n2-1) + a2*x^(n1+n2-1) + a3 where a1 is at most length n1 and a2 is length n2 and a3 is length n1+n2-1 We set p1 = a1*x^(n1-1), so it has length at most 2*n1-1. We note A is at least length n1+2*n2-1 */ _fmpz_poly_stack_init(p1, A->length-2*n2, A->limbs); _fmpz_poly_right_shift(p1, A, 2*n2); _fmpz_poly_zero_coeffs(p1, n1-1); /* Set q1 to p1 div d1 This is at most a 2*n1-1 by n1 division so q1 ends up being at most length length n1 r1 is length n1-1 */ fmpz_poly_init(r1); fmpz_poly_init(q1); fmpz_poly_pseudo_divrem_recursive(q1, r1, &s1, p1, d1); _fmpz_poly_stack_clear(p1); /* Compute d2q1 = d2*q1 which ends up being length n1+n2-1 Note q1->length is at least 1 since we are doing pseudo division */ _fmpz_poly_stack_init(d2q1, d2->length+q1->length-1, d2->limbs+q1->limbs+1); _fmpz_poly_mul(d2q1, d2, q1); /* Compute t = (lead(B)^s1) * (a2*x^(n1+n2-1)+a3) + r1*x^(2*n2) - d2q1*x^n2 which ends up being at most length n2+B->length-1 since r1 is at most length n1-1 and d2q1 is at most length n1+n2-1 */ _fmpz_poly_stack_init(t, n2+B->length-1, FLINT_MAX(FLINT_MAX(A->limbs+(bits_B_lead*s1)/FLINT_BITS+1, r1->limbs), d2q1->limbs)+1); _fmpz_poly_attach_truncate(temp, A, n2+B->length-1); fmpz_t pow = (fmpz_t) flint_stack_alloc((bits_B_lead*s1)/FLINT_BITS+2); fmpz_pow_ui(pow, B_lead, s1); _fmpz_poly_scalar_mul_fmpz(t, temp, pow); flint_stack_release(); fmpz_poly_fit_length(r1, FLINT_MAX(r1->length+2*n2, d2q1->length+n2)); _fmpz_poly_left_shift(r1, r1, n2); _fmpz_poly_sub(r1, r1, d2q1); _fmpz_poly_left_shift(r1, r1, n2); _fmpz_poly_add(t, t, r1); fmpz_poly_clear(r1); /* Compute q2 = t div B and set R to the remainder It is at most a n2+B->length-1 by n1+n2 division, so the length of q2 will be at most n2 . R will have length at most n1+n2-1 since we are doing pseudo division */ fmpz_poly_init(q2); fmpz_poly_pseudo_divrem_recursive(q2, R, &s2, t, B); _fmpz_poly_stack_clear(t); _fmpz_poly_stack_clear(d2q1); /* Write out Q = lead(B)^s2 * q1*x^n2 + q2 Q has length n1+n2 Note q1->length is not zero since we are doing pseudo division */ fmpz_poly_fit_length(Q, q1->length+n2); fmpz_poly_fit_limbs(Q, FLINT_MAX(q1->limbs + (s2*bits_B_lead)/FLINT_BITS+1, q2->limbs)); pow = (fmpz_t) flint_stack_alloc((bits_B_lead*s2)/FLINT_BITS+2); fmpz_pow_ui(pow, B_lead, s2); _fmpz_poly_scalar_mul_fmpz(Q, q1, pow); fmpz_poly_clear(q1); flint_stack_release(); _fmpz_poly_left_shift(Q, Q, n2); _fmpz_poly_add(Q, Q, q2); fmpz_poly_clear(q2); /* Set d to the power of lead(B) which everything has been raised to */ *d = s1+s2; } void fmpz_poly_pseudo_div_recursive(fmpz_poly_t Q, unsigned long * d, const fmpz_poly_t A, const fmpz_poly_t B) { if (A->length < B->length) { _fmpz_poly_zero(Q); *d = 0; return; } unsigned long crossover = 16; unsigned long crossover2 = 256; if (B->limbs > 16) crossover = 8; if ((B->length <= 12) && (B->limbs > 8)) crossover = 8; if ((B->length <= crossover) || ((A->length > 2*B->length - 1) && (A->length < crossover2))) { fmpz_poly_pseudo_div_basecase(Q, d, A, B); return; } fmpz_poly_t d1, d2, d3, d4, p1, q1, q2, dq1, dq2, r1, d2q1, d2q2, r2, t, u, temp; fmpz_t B_lead; unsigned long size_B_lead; unsigned long bits_B_lead; unsigned long n1 = (B->length+1)/2; unsigned long n2 = B->length - n1; /* We let B = d1*x^n2 + d2 */ _fmpz_poly_attach_shift(d1, B, n2); _fmpz_poly_attach_truncate(d2, B, n2); _fmpz_poly_attach_shift(d3, B, n1); _fmpz_poly_attach_truncate(d4, B, n1); /* We need the leading coefficient of B */ B_lead = B->coeffs + (B->length-1)*(B->limbs+1); size_B_lead = ABS(B_lead[0]); bits_B_lead = fmpz_bits(B_lead); if (A->length <= n2 + B->length - 1) { /* A is greater than length n1+n2-1 and at most length n1+2*n2-1 We shift right by n1 and zero the last n2-1 coefficients, leaving at at most n2 significant terms */ _fmpz_poly_stack_init(p1, A->length-n1, A->limbs); _fmpz_poly_right_shift(p1, A, n1); _fmpz_poly_zero_coeffs(p1, n2-1); /* We compute p1 div d3 which is at most a 2*n2-1 by n2 division, leaving n2 terms in the quotient. */ fmpz_poly_pseudo_div_recursive(Q, d, p1, d3); _fmpz_poly_stack_clear(p1); return; } unsigned long s1, s2; if (A->length > 2*B->length - 1) { // We shift A right until it is length 2*B->length - 1 // We call this polynomial p1. Zero the final B->length-1 // coefficients. Note A->length > 2*B->length - 1 unsigned long shift = A->length - 2*B->length + 1; _fmpz_poly_stack_init(p1, 2*B->length - 1, A->limbs); _fmpz_poly_right_shift(p1, A, shift); _fmpz_poly_zero_coeffs(p1, B->length - 1); /* Set q1 to p1 div B This is a 2*B->length-1 by B->length division so q1 ends up being at most length B->length r1 is length at most B->length-1 */ fmpz_poly_init(r1); fmpz_poly_init(q1); fmpz_poly_pseudo_divrem_recursive(q1, r1, &s1, p1, B); _fmpz_poly_stack_clear(p1); /* Compute t = (lead(B)^s1) * a2 + r1*x^shift which ends up being at most length A->length - B->length since r1 is at most length B->length-1 Here a2 is what remains of A after the first R->length coefficients are removed. */ _fmpz_poly_stack_init(t, A->length - B->length, FLINT_MAX(A->limbs+(bits_B_lead*s1)/FLINT_BITS+1, r1->limbs)+1); _fmpz_poly_attach_truncate(temp, A, A->length - B->length); fmpz_t pow = (fmpz_t) flint_stack_alloc((bits_B_lead*s1)/FLINT_BITS+2); fmpz_pow_ui(pow, B_lead, s1); _fmpz_poly_scalar_mul_fmpz(t, temp, pow); flint_stack_release(); fmpz_poly_fit_length(r1, r1->length+shift); _fmpz_poly_left_shift(r1, r1, shift); _fmpz_poly_add(t, t, r1); fmpz_poly_clear(r1); /* Compute q2 = t div B It is a smaller division than the original since t->length <= A->length - B->length */ fmpz_poly_init(q2); fmpz_poly_pseudo_div_recursive(q2, &s2, t, B); _fmpz_poly_stack_clear(t); /* Write out Q = lead(B)^s2*q1*x^shift + q2 Q has length at most B->length+shift Note q2 has length at most shift since at most it is an A->length-B->length by B->length division q1 cannot have length zero since we are doing pseudo division */ fmpz_poly_fit_length(Q, q1->length+shift); fmpz_poly_fit_limbs(Q, FLINT_MAX(q1->limbs + (s2*bits_B_lead)/FLINT_BITS+1, q2->limbs)); pow = (fmpz_t) flint_stack_alloc((bits_B_lead*s2)/FLINT_BITS+2); fmpz_pow_ui(pow, B_lead, s2); _fmpz_poly_scalar_mul_fmpz(Q, q1, pow); flint_stack_release(); fmpz_poly_clear(q1); _fmpz_poly_left_shift(Q, Q, shift); _fmpz_poly_add(Q, Q, q2); fmpz_poly_clear(q2); /* Set d to the power of lead(B) that everything must be multiplied by */ *d = s1 + s2; return; } /* We let A = a1*x^(n1+2*n2-1) + a2*x^(n1+n2-1) + a3 where a1 is at most length n1 and a2 is length n2 and a3 is length n1+n2-1 We set p1 = a1*x^(n1-1), so it has length at most 2*n1-1. We note A is at least length n1+2*n2-1 */ _fmpz_poly_stack_init(p1, A->length-2*n2, A->limbs); _fmpz_poly_right_shift(p1, A, 2*n2); _fmpz_poly_zero_coeffs(p1, n1-1); /* Set q1 to p1 div d1 This is at most a 2*n1-1 by n1 division so q1 ends up being at most length length n1 r1 is length n1-1 */ fmpz_poly_init(r1); fmpz_poly_init(q1); fmpz_poly_pseudo_divrem_recursive(q1, r1, &s1, p1, d1); _fmpz_poly_stack_clear(p1); /* Compute d2q1 = d2*q1 which ends up being length n1+n2-1 Note q1->length is at least 1 since we are doing pseudo division */ _fmpz_poly_stack_init(d2q1, d2->length+q1->length-1, d2->limbs+q1->limbs+1); _fmpz_poly_mul(d2q1, d2, q1); /* Compute t = (lead(B)^s1) * (a2*x^(n1+n2-1)+a3) + r1*x^(2*n2) - d2q1*x^n2 which ends up being at most length n2+B->length-1 since r1 is at most length n1-1 and d2q1 is at most length n1+n2-1 */ _fmpz_poly_stack_init(t, n2+B->length-1, FLINT_MAX(FLINT_MAX(A->limbs+(bits_B_lead*s1)/FLINT_BITS+1, r1->limbs), d2q1->limbs)+1); _fmpz_poly_attach_truncate(temp, A, n2+B->length-1); fmpz_t pow = (fmpz_t) flint_stack_alloc((bits_B_lead*s1)/FLINT_BITS+2); fmpz_pow_ui(pow, B_lead, s1); _fmpz_poly_scalar_mul_fmpz(t, temp, pow); flint_stack_release(); fmpz_poly_fit_length(r1, FLINT_MAX(r1->length+2*n2, d2q1->length+n2)); _fmpz_poly_left_shift(r1, r1, n2); _fmpz_poly_sub(r1, r1, d2q1); _fmpz_poly_left_shift(r1, r1, n2); _fmpz_poly_add(t, t, r1); fmpz_poly_clear(r1); /* Compute q2 = t div B and set R to the remainder It is at most a n2+B->length-1 by n1+n2 division, so the length of q2 will be at most n2 . R will have length at most n1+n2-1 since we are doing pseudo division */ fmpz_poly_init(q2); fmpz_poly_pseudo_div_recursive(q2, &s2, t, B); _fmpz_poly_stack_clear(t); _fmpz_poly_stack_clear(d2q1); /* Write out Q = lead(B)^s2 * q1*x^n2 + q2 Q has length n1+n2 Note q1->length is not zero since we are doing pseudo division */ fmpz_poly_fit_length(Q, q1->length+n2); fmpz_poly_fit_limbs(Q, FLINT_MAX(q1->limbs + (s2*bits_B_lead)/FLINT_BITS+1, q2->limbs)); pow = (fmpz_t) flint_stack_alloc((bits_B_lead*s2)/FLINT_BITS+2); fmpz_pow_ui(pow, B_lead, s2); _fmpz_poly_scalar_mul_fmpz(Q, q1, pow); fmpz_poly_clear(q1); flint_stack_release(); _fmpz_poly_left_shift(Q, Q, n2); _fmpz_poly_add(Q, Q, q2); fmpz_poly_clear(q2); /* Set d to the power of lead(B) which everything has been raised to */ *d = s1+s2; } /**************************************************************************** Powering ****************************************************************************/ void fmpz_poly_power(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long exp) { if (exp == 0) { fmpz_poly_fit_limbs(output, 1); fmpz_poly_fit_length(output, 1); fmpz_poly_set_coeff_ui(output, 0, 1); output->length = 1; return; } if ((poly->length == 1) && (poly->coeffs[0] == 1) && (poly->coeffs[1] == 1)) { fmpz_poly_fit_limbs(output, 1); fmpz_poly_fit_length(output, 1); fmpz_poly_set_coeff_ui(output, 0, 1); output->length = 1; return; } if (poly->length == 1) { fmpz_poly_fit_length(output, 1); fmpz_poly_fit_limbs(output, fmpz_size(poly->coeffs)*exp); fmpz_pow_ui(output->coeffs, poly->coeffs, exp); output->length = 1; return; } if (poly->length == 0) { fmpz_poly_fit_limbs(output, 1); fmpz_poly_fit_length(output, 1); output->length = 0; return; } //================================================================================== if (poly->length == 2) // Compute using binomial expansion { fmpz_t coeff1, coeff2; if (poly == output) { coeff1 = fmpz_init(poly->limbs); fmpz_set(coeff1, poly->coeffs); coeff2 = fmpz_init(poly->limbs); fmpz_set(coeff2, poly->coeffs + poly->limbs + 1); } else { coeff1 = poly->coeffs; coeff2 = poly->coeffs + poly->limbs + 1; } fmpz_poly_fit_length(output, exp + 1); unsigned long bits2 = fmpz_bits(coeff2); if (coeff1[0] == 0) { fmpz_poly_fit_limbs(output, (bits2*exp-1)/FLINT_BITS + 1); fmpz_t coeff_out = output->coeffs; unsigned long size_out = output->limbs + 1; for (unsigned long i = 0; i < exp; i++) { coeff_out[0] = 0; coeff_out += size_out; } fmpz_pow_ui(coeff_out, coeff2, exp); output->length = exp + 1; if (poly == output) { fmpz_clear(coeff1); fmpz_clear(coeff2); } return; } // A rough estimate of the max number of limbs needed for a coefficient unsigned long bits1 = fmpz_bits(coeff1); unsigned long bits = FLINT_MAX(bits1, bits2); fmpz_t pow; if (!(fmpz_is_one(coeff1) && fmpz_is_one(coeff2))) { bits = exp*(bits+1); } else { bits = exp; } fmpz_poly_fit_limbs(output, (bits-1)/FLINT_BITS+2); long i; unsigned long cbits; fmpz_t coeff_out = output->coeffs; fmpz_t last_coeff; if (fmpz_is_one(coeff2)) { if (fmpz_is_one(coeff1)) { coeff_out = output->coeffs; coeff_out[0] = 1; coeff_out[1] = 1; for (i = 1; i <= exp; i++) { last_coeff = coeff_out; coeff_out += (output->limbs+1); __fmpz_binomial_next(coeff_out, last_coeff, exp, i); } } else { fmpz_poly_set_coeff_ui(output, exp, 1); coeff_out = output->coeffs + exp*(output->limbs+1); for (i = exp-1; i >= 0; i--) { coeff_out = output->coeffs + i*(output->limbs+1); last_coeff = coeff_out + output->limbs+1; __fmpz_binomial_next(coeff_out, last_coeff, exp, exp - i); fmpz_mul(coeff_out, coeff_out, coeff1); } } } else { if (fmpz_is_one(coeff1)) { coeff_out = output->coeffs; coeff_out[0] = 1; coeff_out[1] = 1; for (i = 1; i <= exp; i++) { output->length++; coeff_out = output->coeffs + i*(output->limbs+1); last_coeff = coeff_out - output->limbs - 1; __fmpz_binomial_next(coeff_out, last_coeff, exp, i); fmpz_mul(coeff_out, coeff_out, coeff2); } } else { coeff_out = output->coeffs; fmpz_pow_ui(coeff_out, coeff1, exp); for (i = 1; i <= exp; i++) { output->length++; coeff_out = output->coeffs + i*(output->limbs+1); last_coeff = coeff_out - output->limbs - 1; fmpz_tdiv(coeff_out, last_coeff, coeff1); __fmpz_binomial_next(coeff_out, coeff_out, exp, i); fmpz_mul(coeff_out, coeff_out, coeff2); } } } output->length = exp + 1; if (poly == output) { fmpz_clear(coeff1); fmpz_clear(coeff2); } return; } //=================================================================================== fmpz_poly_t temp; fmpz_poly_init(temp); fmpz_poly_fit_length(output, poly->length); fmpz_poly_fit_limbs(output, poly->limbs); _fmpz_poly_set(output, poly); unsigned long bits = FLINT_BIT_COUNT(exp); fmpz_poly_t polycopy; if (poly == output) { fmpz_poly_init(polycopy); fmpz_poly_set(polycopy, poly); } else _fmpz_poly_attach(polycopy, poly); while (bits > 1) { fmpz_poly_mul(output, output, output); if ((1L<<(bits-2)) & exp) { fmpz_poly_mul(output, output, polycopy); } bits--; } if (poly == output) fmpz_poly_clear(polycopy); } void fmpz_poly_power_trunc_n(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long exponent, const unsigned long n) { unsigned long exp = exponent; fmpz_poly_t power, temp; fmpz_poly_init(power); fmpz_poly_init(temp); if ((poly->length == 0) || (n == 0)) { fmpz_poly_fit_limbs(output, 1); fmpz_poly_fit_length(output, 1); output->length = 0; return; } if (exp == 0) { fmpz_poly_fit_limbs(output, 1); fmpz_poly_fit_length(output, 1); fmpz_poly_set_coeff_ui(output, 0, 1); output->length = 1; return; } if ((poly->length == 1) && (poly->coeffs[0] == 1) && (poly->coeffs[1] == 1)) { fmpz_poly_fit_limbs(output, 1); fmpz_poly_fit_length(output, 1); fmpz_poly_set_coeff_ui(output, 0, 1); output->length = 1; return; } fmpz_poly_fit_length(output, n); // Set output to poly fmpz_poly_fit_limbs(output, poly->limbs); if (poly->length <= n) _fmpz_poly_set(output, poly); else { if (poly == output) { _fmpz_poly_truncate(output, n); } else { fmpz_poly_t temp2; _fmpz_poly_attach_truncate(temp2, poly, n); _fmpz_poly_set(output, temp2); } _fmpz_poly_normalise(output); } while (!(exp & 1L)) // Square until we get to the first binary 1 in the exponent { fmpz_poly_mul_trunc_n(output, output, output, n); exp >>= 1; } exp >>= 1; if (exp) // Exponent is not just a power of 2, so keep multiplying by higher powers { fmpz_poly_fit_length(power, n); fmpz_poly_fit_limbs(power, output->limbs); _fmpz_poly_set(power, output); while (exp) { fmpz_poly_mul_trunc_n(power, power, power, n); if (exp & 1) { fmpz_poly_mul_trunc_n(output, output, power, n); } exp >>= 1; } } } /**************************************************************************** Content ****************************************************************************/ void fmpz_poly_content(fmpz_t c, fmpz_poly_t poly) { unsigned long length = poly->length; if (length == 0) { fmpz_set_ui(c, 0L); return; } if (length == 1) { fmpz_set(c, poly->coeffs); if ((long) c[0] < 0L) c[0] = -c[0]; return; } fmpz_t coeff = fmpz_poly_get_coeff_ptr(poly, length - 1); fmpz_set(c, coeff); for (long i = length - 2; (i >= 0L) && !fmpz_is_one(c); i--) { coeff = fmpz_poly_get_coeff_ptr(poly, i); fmpz_gcd(c, c, coeff); } } /**************************************************************************** GCD ****************************************************************************/ void fmpz_poly_gcd_subresultant(fmpz_poly_t D, const fmpz_poly_t poly1, const fmpz_poly_t poly2) { fmpz_poly_t Ain, Bin; //printf("Ain = "); fmpz_poly_print_pretty(poly1, "x"); printf("\n"); //printf("Bin = "); fmpz_poly_print_pretty(poly2, "x"); printf("\n"); if (poly2->length > poly1->length) { _fmpz_poly_attach(Ain, poly2); _fmpz_poly_attach(Bin, poly1); } else { _fmpz_poly_attach(Ain, poly1); _fmpz_poly_attach(Bin, poly2); } if (Bin->length == 0) { fmpz_poly_set(D, Ain); return; } fmpz_t a, b, d; a = fmpz_init(Ain->limbs); b = fmpz_init(Bin->limbs); fmpz_poly_content(a, Ain); fmpz_poly_content(b, Bin); //printf("a = "); fmpz_print(a); printf("\n"); //printf("b = "); fmpz_print(b); printf("\n"); d = fmpz_init(FLINT_MIN(fmpz_size(a), fmpz_size(b))); fmpz_gcd(d, a, b); fmpz_poly_t A, B, Q, R; fmpz_poly_init(A); fmpz_poly_init(B); fmpz_poly_init(Q); fmpz_poly_init(R); unsigned long s; fmpz_poly_scalar_div_fmpz(A, Ain, a); fmpz_poly_scalar_div_fmpz(B, Bin, b); fmpz_clear(b); //release b fmpz_clear(a); //release a int done = 0; fmpz_t g; fmpz_t h = fmpz_init(1); fmpz_t one = fmpz_init(1); fmpz_set_ui(h, 1UL); fmpz_set_ui(one, 1UL); g = one; unsigned long olddelta = 1; while (!done) { //printf("A = "); fmpz_poly_print_pretty(A, "x"); printf("\n"); //printf("B = "); fmpz_poly_print_pretty(B, "x"); printf("\n"); unsigned long delta = A->length - B->length; fmpz_poly_pseudo_divrem(Q, R, &s, A, B); //printf("R = "); fmpz_poly_print_pretty(R, "x"); printf("\n"); if (R->length > 1) { //printf("delta = %ld\n", delta); fmpz_poly_swap(A, B); fmpz_t r; if (olddelta == 1) { r = fmpz_init((delta+1)*fmpz_size(g)+1); fmpz_pow_ui(r, g, delta+1); } else { r = fmpz_init(fmpz_size(g) + delta*fmpz_size(h)+1); fmpz_pow_ui(r, h, delta); fmpz_mul(r, r, g); } g = fmpz_poly_get_coeff_ptr(A, A->length - 1); fmpz_t temp = fmpz_init((delta-s+1)*fmpz_size(g)+1); fmpz_pow_ui(temp, g, delta-s+1); fmpz_poly_scalar_mul_fmpz(R, R, temp); fmpz_clear(temp); // release temp fmpz_poly_scalar_div_fmpz(B, R, r); fmpz_clear(r); // release r olddelta = delta; if (delta == 0) { fmpz_clear(h); h = fmpz_init(delta*fmpz_size(g)+1); fmpz_pow_ui(h, g, delta); } else if (delta == 1) { olddelta = 1; fmpz_clear(h); h = fmpz_init(fmpz_size(g)); fmpz_set(h, g); } else { temp = fmpz_init((delta-1)*fmpz_size(h)+1); fmpz_pow_ui(temp, h, delta - 1); fmpz_clear(h); h = fmpz_init(delta*fmpz_size(g)+1); fmpz_t temp2 = fmpz_init(delta*fmpz_size(g)+1); fmpz_pow_ui(temp2, g, delta); fmpz_fdiv(h, temp2, temp); fmpz_clear(temp2); // release temp2 fmpz_clear(temp); // release temp } } else { if (R->length == 1) { fmpz_poly_zero(B); fmpz_poly_set_coeff_ui(B, 0, 1UL); } done = 1; } } //printf("B = "); fmpz_poly_print_pretty(B, "x"); printf("\n"); b = fmpz_init(B->limbs+1); fmpz_poly_content(b, B); //printf("b = "); fmpz_print(b); printf("\n"); //printf("d = "); fmpz_print(d); printf("\n"); fmpz_poly_scalar_div_fmpz(D, B, b); fmpz_poly_scalar_mul_fmpz(D, D, d); fmpz_clear(b); // release b if ((long) (_fmpz_poly_lead(D)[0]) < 0L) fmpz_poly_neg(D, D); fmpz_clear(h); fmpz_clear(one); fmpz_poly_clear(A); fmpz_poly_clear(B); fmpz_poly_clear(Q); fmpz_poly_clear(R); fmpz_clear(d); //release d } unsigned long fmpz_poly_max_norm_bits(fmpz_poly_t H) { unsigned long bits = FLINT_ABS(fmpz_poly_max_bits(H)); unsigned long bits_lc = fmpz_bits(_fmpz_poly_lead(H)); return bits - bits_lc + 1; } void fmpz_poly_gcd_modular(fmpz_poly_t H, const fmpz_poly_t poly1, const fmpz_poly_t poly2) { fmpz_poly_t Ain, Bin; //printf("Ain = "); fmpz_poly_print_pretty(poly1, "x"); printf("\n"); //printf("Bin = "); fmpz_poly_print_pretty(poly2, "x"); printf("\n"); if (poly2->length > poly1->length) { _fmpz_poly_attach(Ain, poly2); _fmpz_poly_attach(Bin, poly1); } else { _fmpz_poly_attach(Ain, poly1); _fmpz_poly_attach(Bin, poly2); } if (Bin->length == 0) { fmpz_poly_set(H, Ain); return; } fmpz_t ac, bc, d; ac = fmpz_init(Ain->limbs); bc = fmpz_init(Bin->limbs); fmpz_poly_content(ac, Ain); fmpz_poly_content(bc, Bin); //printf("a = "); fmpz_print(a); printf("\n"); //printf("b = "); fmpz_print(b); printf("\n"); d = fmpz_init(FLINT_MIN(fmpz_size(ac), fmpz_size(bc))); fmpz_gcd(d, ac, bc); if (Bin->length == 1) { fmpz_poly_set_coeff_fmpz(H, 0, d); H->length = 1; fmpz_clear(d); fmpz_clear(ac); // release ac fmpz_clear(bc); // release bc return; } fmpz_poly_t A, B; fmpz_poly_init(A); fmpz_poly_init(B); fmpz_poly_scalar_div_fmpz(A, Ain, ac); fmpz_poly_scalar_div_fmpz(B, Bin, bc); fmpz_clear(bc); //release bc fmpz_clear(ac); //release ac unsigned long bits1 = FLINT_ABS(fmpz_poly_max_bits(A)); unsigned long bits2 = FLINT_ABS(fmpz_poly_max_bits(B)); unsigned long bits = FLINT_MAX(bits1, bits2); unsigned long nb1 = fmpz_poly_max_norm_bits(A); unsigned long nb2 = fmpz_poly_max_norm_bits(B); unsigned long bound; fmpz_t lead_A = _fmpz_poly_lead(A); fmpz_t lead_B = _fmpz_poly_lead(B); fmpz_t g = fmpz_init(FLINT_MIN(FLINT_ABS(lead_A[0]), FLINT_ABS(lead_B[0]))); fmpz_gcd(g, lead_A, lead_B); unsigned long gbits = fmpz_bits(g); int g_pm1 = 0; if ((FLINT_ABS(g[0]) == 1L) && (g[1] == 1L)) g_pm1 = 1; unsigned long p; unsigned long pbits; if (bits <= FLINT_D_BITS - 2) { pbits = bits+2; p = (1L<<(bits+1)); } else { pbits = FLINT_BITS - 1; p = (1L<<(FLINT_BITS-2)); } zmod_poly_t a, b, h; int first = 1; unsigned long n = B->length; unsigned long modsize = FLINT_MAX(A->limbs, B->limbs); fmpz_t modulus = fmpz_init(modsize); modulus[0] = 0L; fmpz_poly_t Q; fmpz_poly_init(Q); for (;;) { if (!first) { zmod_poly_clear(a); zmod_poly_clear(b); zmod_poly_clear(h); } else first = 0; do { p = z_nextprime(p); } while (!fmpz_mod_ui(g, p)); zmod_poly_init(a, p); zmod_poly_init(b, p); zmod_poly_init(h, p); if (bits1 + 1 < pbits) fmpz_poly_to_zmod_poly_no_red(a, A); else fmpz_poly_to_zmod_poly(a, A); if (bits2 + 1 < pbits) fmpz_poly_to_zmod_poly_no_red(b, B); else fmpz_poly_to_zmod_poly(b, B); zmod_poly_gcd(h, a, b); if (h->length == 1) // gcd is 1 { fmpz_poly_set_coeff_ui(H, 0, 1L); break; } if (h->length - 1 > n) // discard continue; if (g_pm1) zmod_poly_make_monic(h, h); else { unsigned long h_inv = z_invert(h->coeffs[h->length-1], h->p); unsigned long g_mod = fmpz_mod_ui(g, h->p); h_inv = z_mulmod2_precomp(h_inv, g_mod, h->p, h->p_inv); zmod_poly_scalar_mul(h, h, h_inv); } if (h->length - 1 < n) { zmod_poly_to_fmpz_poly(H, h); if (g_pm1) { if (fmpz_poly_divides(Q, A, H) && fmpz_poly_divides(Q, B, H)) break; } else { bound = h->length + FLINT_MIN(nb1, nb2) + gbits + 1; if (pbits > bound) { fmpz_t hc = fmpz_init(H->limbs); fmpz_poly_content(hc, H); fmpz_poly_scalar_div_fmpz(H, H, hc); fmpz_clear(hc); // release hc break; } } fmpz_set_ui(modulus, p); n = h->length - 1; continue; } fmpz_t newmod = fmpz_init(modulus[0] + 1); if (g_pm1) { if ((fmpz_poly_CRT(H, H, h, newmod, modulus)) || (fmpz_bits(newmod) > FLINT_MIN(bits1, bits2))) if (fmpz_poly_divides(Q, A, H) && fmpz_poly_divides(Q, B, H)) { fmpz_clear(newmod); // release newmod break; } } else { if (fmpz_poly_CRT(H, H, h, newmod, modulus) || (fmpz_bits(newmod) > bound)) { fmpz_t hc = fmpz_init(H->limbs); fmpz_poly_content(hc, H); fmpz_poly_scalar_div_fmpz(H, H, hc); fmpz_clear(hc); // release hc if ((fmpz_bits(newmod) > bound) || (fmpz_poly_divides(Q, A, H) && fmpz_poly_divides(Q, B, H))) { fmpz_clear(newmod); // release newmod break; } } } if (newmod[0] >= modsize) { modulus = fmpz_realloc(modulus, (modsize+8)); modsize += 8; } fmpz_set(modulus, newmod); fmpz_clear(newmod); // release newmod } fmpz_clear(g); // release g zmod_poly_clear(a); zmod_poly_clear(b); zmod_poly_clear(h); fmpz_poly_scalar_mul_fmpz(H, H, d); if ((long) (_fmpz_poly_lead(H)[0]) < 0L) fmpz_poly_neg(H, H); fmpz_poly_clear(A); fmpz_poly_clear(B); fmpz_poly_clear(Q); fmpz_clear(modulus); fmpz_clear(d); //release d } void fmpz_poly_gcd(fmpz_poly_t res, const fmpz_poly_t poly1, const fmpz_poly_t poly2) { if (poly1 == poly2) { if (res != poly1) fmpz_poly_set(res, poly1); return; } if ((poly1->length == 1) || (poly2->length == 1)) { fmpz_poly_gcd_modular(res, poly1, poly2); return; } if ((poly1->length <= 6) && (poly2->length <= 6)) { fmpz_poly_gcd_subresultant(res, poly1, poly2); return; } unsigned long max_length = (poly1->length > poly2->length) ? poly1->length : poly2->length; if (max_length > 1) { fmpz_poly_gcd_modular(res, poly1, poly2); return; } unsigned long bits1 = FLINT_ABS(fmpz_poly_max_bits(poly1)); unsigned long bits2 = FLINT_ABS(fmpz_poly_max_bits(poly2)); unsigned long max_bits = FLINT_MAX(bits1, bits2); if (max_bits < FLINT_BITS) { fmpz_poly_gcd_modular(res, poly1, poly2); return; } if (max_length*max_length*max_length*max_length*max_bits > 2000000UL) { fmpz_poly_gcd_modular(res, poly1, poly2); return; } fmpz_poly_gcd_subresultant(res, poly1, poly2); } /* Invert poly1 modulo poly2 with denominator (not guaranteed to be the resultant) i.e. H*poly1 = d modulo poly2 Assumes poly1 is reduced modulo poly2, which is monic and irreducible Assumes d has enough space to store fmpz_poly_resultant_bound(poly1, poly2)/FLINT_BITS + 2 limbs */ void fmpz_poly_invmod_modular(fmpz_t d, fmpz_poly_t H, fmpz_poly_t poly1, fmpz_poly_t poly2) { FLINT_ASSERT(poly2->length > poly1->length); if ((poly1->length == 1) && (poly2->length > 1)) { fmpz_set(d, poly1->coeffs); fmpz_poly_set_coeff_ui(H, 0, 1L); H->length = 1; return; } if ((poly1->length == 0) || (poly2->length == 0)) { printf("Error: divide by zero!\n"); abort(); } fmpz_poly_t A, B; fmpz_poly_init(A); fmpz_poly_init(B); fmpz_poly_set(A, poly1); fmpz_poly_set(B, poly2); fmpz_poly_t prod, quot, rem; fmpz_poly_init(prod); fmpz_poly_init(quot); fmpz_poly_init(rem); unsigned long p = (1L<<(FLINT_BITS-2)); zmod_poly_t a, b, h; int first = 1; unsigned long n = B->length; unsigned long modsize = FLINT_MAX(A->limbs, B->limbs); fmpz_t modulus = fmpz_init(modsize); modulus[0] = 0L; fmpz_poly_t Q; fmpz_poly_init(Q); for (;;) { if (!first) { zmod_poly_clear(a); zmod_poly_clear(b); zmod_poly_clear(h); } p = z_nextprime(p); zmod_poly_init(a, p); zmod_poly_init(b, p); zmod_poly_init(h, p); fmpz_poly_to_zmod_poly(a, A); fmpz_poly_to_zmod_poly(b, B); unsigned long r = zmod_poly_resultant(a, b); if ((fmpz_mod_ui(_fmpz_poly_lead(A), p) == 0L) || (r == 0L)) { continue; } unsigned long coprime = zmod_poly_gcd_invert(h, a, b); if (!coprime) { continue; } zmod_poly_scalar_mul(h, h, r); if (first) { zmod_poly_to_fmpz_poly(H, h); fmpz_set_ui(modulus, p); first = 0; continue; } fmpz_t newmod = fmpz_init(modulus[0] + 1); if (fmpz_poly_CRT(H, H, h, newmod, modulus)) { fmpz_t hc = fmpz_init(H->limbs); fmpz_poly_content(hc, H); fmpz_poly_scalar_div_fmpz(H, H, hc); fmpz_clear(hc); // release hc fmpz_clear(newmod); // release newmod fmpz_poly_mul(prod, H, poly1); fmpz_poly_divrem(quot, rem, prod, poly2); if (rem->length == 1) { fmpz_set(d, rem->coeffs); break; } } if (newmod[0] >= modsize) { modulus = fmpz_realloc(modulus, (modsize+8)); modsize += 8; } fmpz_set(modulus, newmod); fmpz_clear(newmod); // release newmod } zmod_poly_clear(a); zmod_poly_clear(b); zmod_poly_clear(h); fmpz_poly_clear(quot); fmpz_poly_clear(rem); fmpz_poly_clear(prod); fmpz_poly_clear(A); fmpz_poly_clear(B); fmpz_poly_clear(Q); fmpz_clear(modulus); } void fmpz_poly_xgcd_modular(fmpz_t r, fmpz_poly_t s, fmpz_poly_t t, fmpz_poly_t a, fmpz_poly_t b) { fmpz_poly_resultant(r, a, b); if (r[0] == 0) { return; } int stabilised = 0; fmpz_t prod = fmpz_init(a->limbs + 1); unsigned long modsize = a->limbs + 1; fmpz_set_ui(prod, 1L); fmpz_poly_zero(s); fmpz_poly_zero(t); unsigned long p = (1L<<(FLINT_BITS-2)); int first = 1; for (;;) { p = z_nextprime(p); unsigned long R = fmpz_mod_ui(r, p); if ((fmpz_mod_ui(_fmpz_poly_lead(a), p) == 0L) || (fmpz_mod_ui(_fmpz_poly_lead(b), p) == 0L) || (R == 0)) continue; zmod_poly_t D, S, T, A, B; zmod_poly_init(D, p); zmod_poly_init(S, p); zmod_poly_init(T, p); zmod_poly_init(A, p); zmod_poly_init(B, p); fmpz_poly_to_zmod_poly(A, a); fmpz_poly_to_zmod_poly(B, b); if (stabilised) { fmpz_poly_to_zmod_poly(S, s); fmpz_poly_to_zmod_poly(T, t); zmod_poly_t t1, t2; zmod_poly_init(t1, p); zmod_poly_init(t2, p); zmod_poly_mul(t1, A, S); zmod_poly_mul(t2, B, T); zmod_poly_add(t1, t1, t2); if ((t1->length == 1) && (t1->coeffs[0] == R)) fmpz_mul_ui(prod, prod, p); else stabilised = 0; if (prod[0] >= modsize - 1) { prod = fmpz_realloc(prod, modsize+8); modsize += 8; } } if (!stabilised) { zmod_poly_xgcd(D, S, T, A, B); zmod_poly_scalar_mul(S, S, R); zmod_poly_scalar_mul(T, T, R); if (first) { zmod_poly_to_fmpz_poly(s, S); zmod_poly_to_fmpz_poly(t, T); fmpz_set_ui(prod, p); stabilised = 1; first = 0; } else { if (prod[0] >= modsize - 2) { modsize += 8; } fmpz_t tmp = fmpz_init(modsize); int S_stabilised = fmpz_poly_CRT(s, s, S, tmp, prod); int T_stabilised = fmpz_poly_CRT(t, t, T, tmp, prod); fmpz_clear(prod); prod = tmp; stabilised = S_stabilised && T_stabilised; } } if (stabilised) { unsigned long bound1 = FLINT_BIT_COUNT(FLINT_MIN(a->length, s->length)) + FLINT_ABS(_fmpz_poly_max_bits(a)) + FLINT_ABS(_fmpz_poly_max_bits(s)); unsigned long bound2 = FLINT_BIT_COUNT(FLINT_MIN(b->length, t->length)) + FLINT_ABS(_fmpz_poly_max_bits(b)) + FLINT_ABS(_fmpz_poly_max_bits(t)); unsigned long bound = 4 + FLINT_MAX(fmpz_bits(r), FLINT_MAX(bound1, bound2)); if (modsize < bound/FLINT_BITS + 2) { prod = fmpz_realloc(prod, bound/FLINT_BITS + 2); modsize = bound/FLINT_BITS + 2; } if (fmpz_bits(prod) > bound) break; } } } /**************************************************************************** Resultant ****************************************************************************/ void fmpz_poly_2norm(fmpz_t norm, fmpz_poly_t pol) { if (pol->length == 0) { norm[0] = 0L; return; } fmpz_t sqr = fmpz_init(2*pol->limbs); fmpz_t sum = fmpz_init(2*pol->limbs + 1); fmpz_t temp = fmpz_init(2*pol->limbs + 1); unsigned long size = pol->limbs+1; fmpz_t coeff = pol->coeffs; fmpz_set_ui(sum, 0L); for (unsigned long i = 0; i < pol->length; i++) { fmpz_mul(sqr, coeff, coeff); fmpz_add(sum, sum, sqr); coeff += size; } fmpz_sqrtrem(norm, temp, sum); if (temp[0]) fmpz_add_ui(norm, norm, 1L); fmpz_clear(temp); // release temp fmpz_clear(sum); // release sum fmpz_clear(sqr); // release sqr } unsigned long fmpz_poly_resultant_bound(fmpz_poly_t a, fmpz_poly_t b) { if (b->length == 0) return 0; if (a->length == 0) return 0; fmpz_t t1, t2, tt; t1 = fmpz_init(b->length*(a->limbs+1)); t2 = fmpz_init(a->length*(b->limbs+1)); fmpz_poly_2norm(t1, a); fmpz_poly_2norm(t2, b); fmpz_pow_ui(t1, t1, b->length - 1); fmpz_pow_ui(t2, t2, a->length - 1); tt = fmpz_init(fmpz_size(t1)+fmpz_size(t2)); fmpz_mul(tt, t1, t2); fmpz_clear(t1); fmpz_clear(t2); unsigned long bound = fmpz_bits(tt); fmpz_clear(tt); return bound; } void fmpz_poly_resultant(fmpz_t res, fmpz_poly_t a, fmpz_poly_t b) { if ((a->length == 0) || (b->length == 0)) { res[0] = 0L; return; } unsigned long bound = fmpz_poly_resultant_bound(a, b)+2; fmpz_t prod = fmpz_init(bound/FLINT_BITS + 2); fmpz_set_ui(res, 0L); fmpz_set_ui(prod, 1L); unsigned long p = (1L<<(FLINT_BITS-2)); unsigned long t; int first = 1; for (;;) { if (fmpz_bits(prod) > bound) break; p = z_nextprime(p); if ((fmpz_mod_ui(_fmpz_poly_lead(a), p) == 0L) || (fmpz_mod_ui(_fmpz_poly_lead(b), p) == 0L)) continue; zmod_poly_t A, B; zmod_poly_init(A, p); zmod_poly_init(B, p); fmpz_poly_to_zmod_poly(A, a); fmpz_poly_to_zmod_poly(B, b); t = zmod_poly_resultant(A, B); if (first) { fmpz_set_ui(prod, p); fmpz_set_ui(res, t); first = 0; } else { unsigned long c = fmpz_mod_ui(prod, p); c = z_invert(c, p); fmpz_CRT_ui2_precomp(res, res, prod, t, p, c, A->p_inv); fmpz_mul_ui(prod, prod, p); } zmod_poly_clear(A); zmod_poly_clear(B); } fmpz_t proddiv2 = fmpz_init(prod[0]); fmpz_div_2exp(proddiv2, prod, 1); if (fmpz_cmpabs(res, proddiv2) > 0L) fmpz_sub(res, res, prod); fmpz_clear(proddiv2); fmpz_clear(prod); } flint-1.011/ZmodF-test.c0000644017361200017500000006040411025357254014667 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** ZmodF-test.c: test module for ZmodF module Copyright (C) 2007, David Harvey TODO: establish and test overflow bit guarantees ******************************************************************************/ #include #include "ZmodF.h" #include "ZmodF_mul.h" #include "test-support.h" mpz_t global_mpz; // to avoid frequent mpz_init calls // ============================================================================ // Test case support code /* Prints the ZmodF_t to stdout in hex, each limb in a separate block, most significant limb (i.e. the overflow limb) first. */ void ZmodF_print(ZmodF_t x, unsigned long n) { for (long i = n; i >= 0; i--) #if FLINT_BITS == 64 printf("%016lx ", x[i]); #else printf("%08lx ", x[i]); #endif } /* Generates a random ZmodF_t with at most overflow_bits used in the overflow limb. More precisely, the top (FLINT_BITS - overflow_bits) bits will all be equal to the sign bit. It uses mpz_rrandomb to get long strings of 0's and 1's. */ void ZmodF_random(ZmodF_t x, unsigned long n, unsigned long overflow_bits) { ZmodF_zero(x, n); mpz_rrandomb(global_mpz, randstate, (n+1)*FLINT_BITS); mpz_export(x, NULL, -1, sizeof(mp_limb_t), 0, 0, global_mpz); // GMP has a "bug" where the top bit of the output of mpz_rrandomb // is always set. So we flip everything with probability 1/2. if (random_ulong(2)) for (unsigned long i = 0; i <= n; i++) x[i] = ~x[i]; // Now copy the sign bit downwards so that only overflow_bits bits are used. if ((mp_limb_signed_t) x[n] >= 0) x[n] &= (1UL << overflow_bits) - 1; else x[n] |= ~((1UL << overflow_bits) - 1); } #if FLINT_BITS == 64 #define SENTRY_LIMB 0x0123456789abcdefUL #else #define SENTRY_LIMB 0x01234567UL #endif #define MAX_COEFFS 5 #define MAX_N 30 mp_limb_t global_buf[MAX_COEFFS * (MAX_N + 3)]; ZmodF_t coeffs[MAX_COEFFS]; mpz_t coeffs_mpz_in[MAX_COEFFS]; mpz_t coeffs_mpz_out[MAX_COEFFS]; unsigned long global_n = 0; mpz_t global_p; /* Converts given ZmodF_t into mpz_t format, reduced into [0, p) range. Assumes global_n and global_p are set correctly. */ void ZmodF_convert_out(mpz_t output, ZmodF_t input) { int negative = ((mp_limb_signed_t) input[global_n] < 0); if (negative) for (int i = 0; i <= global_n; i++) input[i] = ~input[i]; mpz_import(output, global_n+1, -1, sizeof(mp_limb_t), 0, 0, input); if (negative) { mpz_add_ui(output, output, 1); mpz_neg(output, output); for (int i = 0; i <= global_n; i++) input[i] = ~input[i]; } mpz_mod(output, output, global_p); } /* y := x * 2^(s/2) mod p (using a very naive algorithm) y may alias x Assumes global_n and global_p are set correctly. */ void naive_mul_sqrt2exp(mpz_t y, mpz_t x, unsigned long s) { static mpz_t temp; static int init = 0; if (!init) { mpz_init(temp); init = 1; } if (s & 1) { mpz_mul_2exp(y, x, s/2 + global_n*FLINT_BITS/4); mpz_mul_2exp(temp, y, global_n*FLINT_BITS/2); mpz_sub(y, temp, y); mpz_mod(y, y, global_p); } else { mpz_mul_2exp(y, x, s/2); mpz_mod(y, y, global_p); } } /* y := x * 2^(-s/2) mod p (using a very naive algorithm) y may alias x Assumes global_n and global_p are set correctly. */ void naive_div_sqrt2exp(mpz_t y, mpz_t x, unsigned long s) { naive_mul_sqrt2exp(y, x, 4*global_n*FLINT_BITS - s); } /* Sets up "count" ZmodF_t's in the global array with random data. Makes coeffs[0], ..., coeffs[count-1] point to those buffers. Adds sentry limbs at the ends of each buffer. Converts each coefficient to mpz_t form in coeffs_mpz_in. Sets global_n := n and global_p := B^n + 1. */ void setup_coeffs(unsigned long count, unsigned long n, unsigned long overflow_bits) { assert(n <= MAX_N); assert(count <= MAX_COEFFS); // update global_p only if n has changed since last time if (n != global_n) { global_n = n; mpz_set_ui(global_p, 1); mpz_mul_2exp(global_p, global_p, n*FLINT_BITS); mpz_add_ui(global_p, global_p, 1); } // make pointers point to the right place coeffs[0] = global_buf + 1; for (int i = 1; i < count; i++) coeffs[i] = coeffs[i-1] + (n+3); // add sentry limbs for (int i = 0; i < count; i++) coeffs[i][-1] = coeffs[i][n+1] = SENTRY_LIMB; // generate random coefficients for (int i = 0; i < count; i++) ZmodF_random(coeffs[i], n, overflow_bits); // convert coefficients to coeffs_mpz_in for (int i = 0; i < count; i++) ZmodF_convert_out(coeffs_mpz_in[i], coeffs[i]); } /* Checks that the sentries have not been overwritten, and that the first "count" pointers in "coeffs" points to correct distinct buffers. Converts each coefficient to mpz_t form in coeffs_mpz_out. Returns 1 on success. */ int check_coeffs(unsigned long count, unsigned long n) { // check sentry limbs for (int i = 0; i < count; i++) { if (coeffs[i][-1] != SENTRY_LIMB) return 0; if (coeffs[i][n+1] != SENTRY_LIMB) return 0; } // check pointers point to valid buffers for (int i = 0; i < count; i++) { unsigned long offset = coeffs[i] - global_buf; if (offset % (n+3) != 1) return 0; if ((offset - 1) / (n+3) >= count) return 0; } // check pointers point to distinct buffers for (int i = 0; i < count; i++) for (int j = i+1; j < count; j++) if (coeffs[i] == coeffs[j]) return 0; // convert coefficients to coeffs_mpz_out for (int i = 0; i < count; i++) ZmodF_convert_out(coeffs_mpz_out[i], coeffs[i]); return 1; } // ============================================================================ // Actual test cases for ZmodF functions int test_ZmodF_normalise() { for (unsigned long n = 1; n <= 5; n++) for (unsigned long trial = 0; trial < 100000; trial++) { setup_coeffs(1, n, random_ulong(FLINT_BITS - 2)); ZmodF_normalise(coeffs[0], n); if (!check_coeffs(1, n)) return 0; // check normalised value is in [0, p) if (coeffs[0][n]) { if (coeffs[0][n] != 1) return 0; for (unsigned long i = 0; i < n; i++) if (coeffs[0][i]) return 0; } // check output actually equals input mod p if (mpz_cmp(coeffs_mpz_in[0], coeffs_mpz_out[0])) return 0; } return 1; } int test_ZmodF_fast_reduce() { for (unsigned long n = 1; n <= 5; n++) for (unsigned long trial = 0; trial < 100000; trial++) { setup_coeffs(1, n, random_ulong(FLINT_BITS - 2)); ZmodF_fast_reduce(coeffs[0], n); if (!check_coeffs(1, n)) return 0; // check high limb of normalised value is in [0, 2] if (coeffs[0][n] > 2) return 0; // check output actually equals input mod p if (mpz_cmp(coeffs_mpz_in[0], coeffs_mpz_out[0])) return 0; } return 1; } int test_ZmodF_neg() { for (unsigned long n = 1; n <= 5; n++) for (unsigned long trial = 0; trial < 50000; trial++) for (int inplace = 0; inplace <= 1; inplace++) { setup_coeffs(2, n, random_ulong(FLINT_BITS - 2)); ZmodF_neg(coeffs[inplace], coeffs[0], n); if (!check_coeffs(2, n)) return 0; mpz_neg(global_mpz, coeffs_mpz_out[inplace]); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_in[0], global_mpz)) return 0; } return 1; } int test_ZmodF_short_div_2exp() { for (unsigned long n = 1; n <= 3; n++) for (unsigned long trial = 0; trial < 2000; trial++) for (unsigned long s = 1; s < FLINT_BITS; s++) for (int inplace = 0; inplace <= 1; inplace++) { setup_coeffs(2, n, random_ulong(FLINT_BITS - 2)); ZmodF_short_div_2exp(coeffs[inplace], coeffs[0], s, n); if (!check_coeffs(2, n)) return 0; naive_div_sqrt2exp(global_mpz, coeffs_mpz_in[0], 2*s); if (mpz_cmp(coeffs_mpz_out[inplace], global_mpz)) return 0; } return 1; } int test_ZmodF_mul_Bexp() { for (unsigned long n = 1; n <= 6; n++) for (unsigned long trial = 0; trial < 20000; trial++) for (unsigned long s = 1; s < n; s++) { setup_coeffs(2, n, random_ulong(FLINT_BITS - 2)); ZmodF_mul_Bexp(coeffs[1], coeffs[0], s, n); if (!check_coeffs(2, n)) return 0; naive_mul_sqrt2exp(global_mpz, coeffs_mpz_in[0], 2*FLINT_BITS*s); if (mpz_cmp(coeffs_mpz_out[1], global_mpz)) return 0; } return 1; } int test_ZmodF_div_Bexp_sub() { for (unsigned long n = 1; n <= 6; n++) for (unsigned long trial = 0; trial < 4000; trial++) for (unsigned long s = 1; s < n; s++) for (int inbuf1 = 0; inbuf1 <= 2; inbuf1++) for (int inbuf2 = 1; inbuf2 <= 2; inbuf2++) { setup_coeffs(3, n, random_ulong(FLINT_BITS - 2)); ZmodF_div_Bexp_sub(coeffs[0], coeffs[inbuf1], coeffs[inbuf2], s, n); if (!check_coeffs(3, n)) return 0; naive_div_sqrt2exp(global_mpz, coeffs_mpz_in[inbuf2], 2*FLINT_BITS*s); mpz_sub(global_mpz, coeffs_mpz_in[inbuf1], global_mpz); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_out[0], global_mpz)) return 0; } return 1; } int test_ZmodF_div_Bexp_add() { for (unsigned long n = 1; n <= 6; n++) for (unsigned long trial = 0; trial < 4000; trial++) for (unsigned long s = 1; s < n; s++) for (int inbuf1 = 0; inbuf1 <= 2; inbuf1++) for (int inbuf2 = 1; inbuf2 <= 2; inbuf2++) { setup_coeffs(3, n, random_ulong(FLINT_BITS - 2)); ZmodF_div_Bexp_add(coeffs[0], coeffs[inbuf1], coeffs[inbuf2], s, n); if (!check_coeffs(3, n)) return 0; naive_div_sqrt2exp(global_mpz, coeffs_mpz_in[inbuf2], 2*FLINT_BITS*s); mpz_add(global_mpz, coeffs_mpz_in[inbuf1], global_mpz); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_out[0], global_mpz)) return 0; } return 1; } int test_ZmodF_sub_mul_Bexp() { for (unsigned long n = 1; n <= 6; n++) for (unsigned long trial = 0; trial < 4000; trial++) for (unsigned long s = 1; s < n; s++) for (int inbuf1 = 1; inbuf1 <= 2; inbuf1++) for (int inbuf2 = 1; inbuf2 <= 2; inbuf2++) { setup_coeffs(3, n, random_ulong(FLINT_BITS - 2)); ZmodF_sub_mul_Bexp(coeffs[0], coeffs[inbuf1], coeffs[inbuf2], s, n); if (!check_coeffs(3, n)) return 0; mpz_sub(global_mpz, coeffs_mpz_in[inbuf1], coeffs_mpz_in[inbuf2]); naive_mul_sqrt2exp(global_mpz, global_mpz, 2*FLINT_BITS*s); if (mpz_cmp(coeffs_mpz_out[0], global_mpz)) return 0; } return 1; } int test_ZmodF_mul_pseudosqrt2_n_odd() { for (unsigned long n = 1; n <= 9; n += 2) for (unsigned long trial = 0; trial < 8000; trial++) for (unsigned long s = 0; s < 2*n; s++) { setup_coeffs(2, n, random_ulong(FLINT_BITS - 4)); ZmodF_mul_pseudosqrt2_n_odd(coeffs[1], coeffs[0], s, n); if (!check_coeffs(2, n)) return 0; mpz_mul_2exp(global_mpz, coeffs_mpz_in[0], n*FLINT_BITS/2); mpz_sub(global_mpz, coeffs_mpz_in[0], global_mpz); mpz_mul_2exp(global_mpz, global_mpz, s*FLINT_BITS); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_out[1], global_mpz)) return 0; } return 1; } int test_ZmodF_mul_pseudosqrt2_n_even() { for (unsigned long n = 2; n <= 10; n += 2) for (unsigned long trial = 0; trial < 8000; trial++) for (unsigned long s = 0; s < 2*n; s++) { setup_coeffs(2, n, random_ulong(FLINT_BITS - 2)); ZmodF_mul_pseudosqrt2_n_even(coeffs[1], coeffs[0], s, n); if (!check_coeffs(2, n)) return 0; mpz_mul_2exp(global_mpz, coeffs_mpz_in[0], n*FLINT_BITS/2); mpz_sub(global_mpz, coeffs_mpz_in[0], global_mpz); mpz_mul_2exp(global_mpz, global_mpz, s*FLINT_BITS); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_out[1], global_mpz)) return 0; } return 1; } int test_ZmodF_mul_2exp() { for (unsigned long n = 1; n <= 6; n++) for (unsigned long trial = 0; trial < 500; trial++) for (unsigned long s = 0; s < n*FLINT_BITS; s++) { setup_coeffs(2, n, random_ulong(FLINT_BITS - 3)); ZmodF_mul_2exp(coeffs[1], coeffs[0], s, n); if (!check_coeffs(2, n)) return 0; naive_mul_sqrt2exp(global_mpz, coeffs_mpz_in[0], 2*s); if (mpz_cmp(coeffs_mpz_out[1], global_mpz)) return 0; } return 1; } int test_ZmodF_mul_sqrt2exp() { for (unsigned long n = 1; n <= 6; n++) for (unsigned long trial = 0; trial < 500; trial++) for (unsigned long s = 0; s < 2*n*FLINT_BITS; s++) { setup_coeffs(2, n, random_ulong(FLINT_BITS - 6)); ZmodF_mul_sqrt2exp(coeffs[1], coeffs[0], s, n); if (!check_coeffs(2, n)) return 0; naive_mul_sqrt2exp(global_mpz, coeffs_mpz_in[0], s); if (mpz_cmp(coeffs_mpz_out[1], global_mpz)) return 0; } return 1; } int test_ZmodF_sub_mul_2exp() { for (unsigned long n = 1; n <= 6; n++) for (unsigned long trial = 0; trial < 100; trial++) for (unsigned long s = 0; s < n*FLINT_BITS; s++) for (int inbuf1 = 1; inbuf1 <= 2; inbuf1++) for (int inbuf2 = 1; inbuf2 <= 2; inbuf2++) { setup_coeffs(3, n, random_ulong(FLINT_BITS - 2)); ZmodF_sub_mul_2exp(coeffs[0], coeffs[inbuf1], coeffs[inbuf2], s, n); if (!check_coeffs(3, n)) return 0; mpz_sub(global_mpz, coeffs_mpz_in[inbuf1], coeffs_mpz_in[inbuf2]); naive_mul_sqrt2exp(global_mpz, global_mpz, 2*s); if (mpz_cmp(coeffs_mpz_out[0], global_mpz)) return 0; } return 1; } int test_ZmodF_forward_butterfly_Bexp() { for (unsigned long n = 1; n <= 6; n++) for (unsigned long trial = 0; trial < 25000; trial++) for (unsigned long s = 1; s < n; s++) { setup_coeffs(3, n, random_ulong(FLINT_BITS - 2)); ZmodF_forward_butterfly_Bexp(&coeffs[0], &coeffs[1], &coeffs[2], s, n); if (!check_coeffs(3, n)) return 0; mpz_sub(global_mpz, coeffs_mpz_in[0], coeffs_mpz_in[1]); naive_mul_sqrt2exp(global_mpz, global_mpz, 2*FLINT_BITS*s); if (mpz_cmp(coeffs_mpz_out[1], global_mpz)) return 0; mpz_add(global_mpz, coeffs_mpz_in[0], coeffs_mpz_in[1]); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_out[0], global_mpz)) return 0; } return 1; } int test_ZmodF_forward_butterfly_2exp() { for (unsigned long n = 1; n <= 6; n++) for (unsigned long trial = 0; trial < 400; trial++) for (unsigned long s = 0; s < n*FLINT_BITS; s++) { setup_coeffs(3, n, random_ulong(FLINT_BITS - 2)); ZmodF_forward_butterfly_2exp(&coeffs[0], &coeffs[1], &coeffs[2], s, n); if (!check_coeffs(3, n)) return 0; mpz_sub(global_mpz, coeffs_mpz_in[0], coeffs_mpz_in[1]); naive_mul_sqrt2exp(global_mpz, global_mpz, 2*s); if (mpz_cmp(coeffs_mpz_out[1], global_mpz)) return 0; mpz_add(global_mpz, coeffs_mpz_in[0], coeffs_mpz_in[1]); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_out[0], global_mpz)) return 0; } return 1; } int test_ZmodF_forward_butterfly_sqrt2exp() { for (unsigned long n = 1; n <= 6; n++) for (unsigned long trial = 0; trial < 400; trial++) for (unsigned long s = 0; s < 2*n*FLINT_BITS; s++) { setup_coeffs(3, n, random_ulong(FLINT_BITS - 2)); ZmodF_forward_butterfly_sqrt2exp(&coeffs[0], &coeffs[1], &coeffs[2], s, n); if (!check_coeffs(3, n)) return 0; mpz_sub(global_mpz, coeffs_mpz_in[0], coeffs_mpz_in[1]); naive_mul_sqrt2exp(global_mpz, global_mpz, s); if (mpz_cmp(coeffs_mpz_out[1], global_mpz)) return 0; mpz_add(global_mpz, coeffs_mpz_in[0], coeffs_mpz_in[1]); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_out[0], global_mpz)) return 0; } return 1; } int test_ZmodF_inverse_butterfly_Bexp() { for (unsigned long n = 1; n <= 6; n++) for (unsigned long trial = 0; trial < 25000; trial++) for (unsigned long s = 1; s < n; s++) { setup_coeffs(3, n, random_ulong(FLINT_BITS - 2)); ZmodF_inverse_butterfly_Bexp(&coeffs[0], &coeffs[1], &coeffs[2], s, n); if (!check_coeffs(3, n)) return 0; naive_div_sqrt2exp(global_mpz, coeffs_mpz_in[1], 2*FLINT_BITS*s); mpz_add(global_mpz, coeffs_mpz_in[0], global_mpz); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_out[0], global_mpz)) return 0; naive_div_sqrt2exp(global_mpz, coeffs_mpz_in[1], 2*FLINT_BITS*s); mpz_sub(global_mpz, coeffs_mpz_in[0], global_mpz); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_out[1], global_mpz)) return 0; } return 1; } int test_ZmodF_inverse_butterfly_2exp() { for (unsigned long n = 1; n <= 6; n++) for (unsigned long trial = 0; trial < 400; trial++) for (unsigned long s = 0; s < n*FLINT_BITS; s++) { setup_coeffs(3, n, random_ulong(FLINT_BITS - 2)); ZmodF_inverse_butterfly_2exp(&coeffs[0], &coeffs[1], &coeffs[2], s, n); if (!check_coeffs(3, n)) return 0; naive_div_sqrt2exp(global_mpz, coeffs_mpz_in[1], 2*s); mpz_add(global_mpz, coeffs_mpz_in[0], global_mpz); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_out[0], global_mpz)) return 0; naive_div_sqrt2exp(global_mpz, coeffs_mpz_in[1], 2*s); mpz_sub(global_mpz, coeffs_mpz_in[0], global_mpz); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_out[1], global_mpz)) return 0; } return 1; } int test_ZmodF_inverse_butterfly_sqrt2exp() { for (unsigned long n = 1; n <= 6; n++) for (unsigned long trial = 0; trial < 400; trial++) for (unsigned long s = 0; s < 2*n*FLINT_BITS; s++) { setup_coeffs(3, n, random_ulong(FLINT_BITS - 2)); ZmodF_inverse_butterfly_sqrt2exp(&coeffs[0], &coeffs[1], &coeffs[2], s, n); if (!check_coeffs(3, n)) return 0; naive_div_sqrt2exp(global_mpz, coeffs_mpz_in[1], s); mpz_add(global_mpz, coeffs_mpz_in[0], global_mpz); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_out[0], global_mpz)) return 0; naive_div_sqrt2exp(global_mpz, coeffs_mpz_in[1], s); mpz_sub(global_mpz, coeffs_mpz_in[0], global_mpz); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_out[1], global_mpz)) return 0; } return 1; } int test_ZmodF_simple_butterfly() { for (unsigned long n = 1; n <= 6; n++) for (unsigned long trial = 0; trial < 4000; trial++) { setup_coeffs(3, n, random_ulong(FLINT_BITS - 2)); ZmodF_simple_butterfly(&coeffs[0], &coeffs[1], &coeffs[2], n); if (!check_coeffs(3, n)) return 0; mpz_sub(global_mpz, coeffs_mpz_in[0], coeffs_mpz_in[1]); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_out[1], global_mpz)) return 0; mpz_add(global_mpz, coeffs_mpz_in[0], coeffs_mpz_in[1]); mpz_mod(global_mpz, global_mpz, global_p); if (mpz_cmp(coeffs_mpz_out[0], global_mpz)) return 0; } return 1; } #define RUN_TEST(targetfunc) \ printf("Testing " #targetfunc "()... "); \ fflush(stdout); \ success = test_##targetfunc(); \ all_success = all_success && success; \ printf(success ? "ok\n" : "FAIL!\n"); void ZmodF_test_all() { int success, all_success = 1; RUN_TEST(ZmodF_normalise); RUN_TEST(ZmodF_fast_reduce); RUN_TEST(ZmodF_neg); RUN_TEST(ZmodF_short_div_2exp); RUN_TEST(ZmodF_mul_Bexp); RUN_TEST(ZmodF_div_Bexp_sub); RUN_TEST(ZmodF_div_Bexp_add); RUN_TEST(ZmodF_sub_mul_Bexp); RUN_TEST(ZmodF_mul_pseudosqrt2_n_odd); RUN_TEST(ZmodF_mul_pseudosqrt2_n_even); RUN_TEST(ZmodF_mul_2exp); RUN_TEST(ZmodF_mul_sqrt2exp); RUN_TEST(ZmodF_sub_mul_2exp); RUN_TEST(ZmodF_forward_butterfly_Bexp); RUN_TEST(ZmodF_forward_butterfly_2exp); RUN_TEST(ZmodF_forward_butterfly_sqrt2exp); RUN_TEST(ZmodF_inverse_butterfly_Bexp); RUN_TEST(ZmodF_inverse_butterfly_2exp); RUN_TEST(ZmodF_inverse_butterfly_sqrt2exp); RUN_TEST(ZmodF_simple_butterfly); printf(all_success ? "\nAll tests passed\n" : "\nAt least one test FAILED!\n"); } int main() { test_support_init(); mpz_init(global_mpz); mpz_init(global_p); for (int i = 0; i < MAX_COEFFS; i++) { mpz_init(coeffs_mpz_in[i]); mpz_init(coeffs_mpz_out[i]); } ZmodF_test_all(); test_support_cleanup(); flint_stack_cleanup(); return 0; } // end of file **************************************************************** flint-1.011/profiler-main.c0000644017361200017500000001635611025357254015446 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** Command-line profiling utility (C) 2007 William Hart and David Harvey ******************************************************************************/ #include "flint.h" #include "profiler-main.h" #include #include #include #include #include #include prof_Driver_t prof_active_Driver = NULL; prof_DriverString_t prof_active_DriverString = NULL; prof_DriverDefaultParams_t prof_active_DriverDefaultParams = NULL; prof1d_Sampler_t prof1d_active_Sampler = NULL; prof2d_Sampler_t prof2d_active_Sampler = NULL; #define MACHINE_NAME_MAXLEN 1000 char machine_name[MACHINE_NAME_MAXLEN + 1]; #define PROFILE_PARAMS_MAXLEN 1000 char profile_params[PROFILE_PARAMS_MAXLEN + 1]; gmp_randstate_t profiler_main_randstate; void profiler_random_limbs(unsigned long* output, unsigned long count) { for (unsigned long i = 0; i < count; i++) output[i] = gmp_urandomb_ui(profiler_main_randstate, FLINT_BITS); } void prof2d_set_sampler(prof2d_Sampler_t sampler) { prof2d_active_Sampler = sampler; } void prof1d_set_sampler(prof1d_Sampler_t sampler) { prof1d_active_Sampler = sampler; } void prof_start() { start_clock(0); } void prof_stop() { stop_clock(0); } typedef struct { unsigned long x; void* arg; prof1d_Sampler_t sampler; } prof1d_info_t; typedef struct { unsigned long x, y; void* arg; prof2d_Sampler_t sampler; } prof2d_info_t; void prof1d_info_exec(void* info, unsigned long count) { prof1d_info_t* z = (prof1d_info_t*) info; z->sampler(z->x, z->arg, count); } void prof2d_info_exec(void* info, unsigned long count) { prof2d_info_t* z = (prof2d_info_t*) info; z->sampler(z->x, z->y, z->arg, count); } void prof1d_sample(unsigned long x, void* arg) { double min_time, max_time; prof1d_info_t info; info.x = x; info.arg = arg; info.sampler = prof1d_active_Sampler; prof_repeat(&min_time, &max_time, prof1d_info_exec, &info); printf("%d\t%.3le\t%.3le\n", x, min_time, max_time); fflush(stdout); } void prof2d_sample(unsigned long x, unsigned long y, void* arg) { double min_time, max_time; prof2d_info_t info; info.x = x; info.y = y; info.arg = arg; info.sampler = prof2d_active_Sampler; prof_repeat(&min_time, &max_time, prof2d_info_exec, &info); printf("%d\t%d\t%.3le\t%.3le\n", x, y, min_time, max_time); fflush(stdout); } void do_target(int index, char* params) { printf("FLINT profile output\n\n"); time_t now; time(&now); printf("TIMESTAMP: %s", ctime(&now)); printf("MACHINE: %s\n\n", machine_name); printf("MODULE: %s\n", prof_module_name); printf("TARGET: %s\n", prof_target_name[index]); printf("PARAMETERS: %s\n", params); printf("\n"); if (prof_DriverString_list[index]) printf("DESCRIPTION:\n%s\n", prof_DriverString_list[index](params)); printf("\n"); printf("============================================== begin data \n"); prof_active_Driver = prof_Driver_list[index]; if (prof_active_Driver != NULL) prof_active_Driver(params); } // returns -1 if target name not found // name can be either the string, or the integer index of the target int lookup_target_name(char* name) { int id = atoi(name); if (id > 0) { // looked like an integer if (id <= prof_target_count) return id - 1; // integer out of range return -1; } // not an integer; look up the name in the list for (int i = 0; i < prof_target_count; i++) { if (!strcmp(prof_target_name[i], name)) return i; } return -1; } void error(char* message) { printf("Error: %s\n", message); exit(0); } void help() { printf("FLINT profiling utility for module \"%s\"\n", prof_module_name); printf("\n"); printf("options:\n"); printf(" -h Show this help screen\n"); printf(" -t Target to run (either string or integer index).\n"); printf(" Overrides environment variable FLINT_PROFILE_TARGET.\n"); printf(" -p Parameters to pass to target's Driver function (a string).\n"); printf(" Overrides environment variable FLINT_PROFILE_PARAMS.\n"); printf("\n"); printf("Targets in this profiling module are:\n"); for (int i = 0; i < prof_target_count; i++) printf(" %2ld. %s\n", i+1, prof_target_name[i]); } int main(int argc, char* argv[]) { gmp_randinit_default(profiler_main_randstate); // get name of current machine from environment variable char* machine_name_env = getenv("FLINT_MACHINE_NAME"); if (machine_name_env) strncpy(machine_name, machine_name_env, MACHINE_NAME_MAXLEN); else strcpy(machine_name, "unknown"); int selected_target = -1; char* profile_target_env = getenv("FLINT_PROFILE_TARGET"); if (profile_target_env) selected_target = lookup_target_name(profile_target_env); char* profile_params_env = getenv("FLINT_PROFILE_PARAMS"); if (profile_params_env) strncpy(profile_params, profile_params_env, PROFILE_PARAMS_MAXLEN); int num_args = 0; char** args = NULL; // scan command line options for (int n = 1; n < argc; n++) { if (!strcmp(argv[n], "-h")) { help(); return 0; } else if (!strcmp(argv[n], "-t")) { // grab target name if (++n == argc) error("missing target name."); selected_target = lookup_target_name(argv[n]); if (selected_target == -1) error("unknown target name."); } else if (!strcmp(argv[n], "-p")) { if (++n == argc) error("missing target parameters."); strncpy(profile_params, argv[n], PROFILE_PARAMS_MAXLEN); } else error("unrecognised option."); } if (selected_target == -1) { help(); return 0; } // get default parameters from appropriate function if no // parameters supplied on command line if (!strlen(profile_params)) { prof_DriverDefaultParams_t f = prof_DriverDefaultParams_list[selected_target]; if (f) strcpy(profile_params, f()); } do_target(selected_target, profile_params); return 0; } // end of file **************************************************************** flint-1.011/ZmodF_poly-profile.c0000644017361200017500000001475011025357254016416 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** ZmodF_poly-profile.c Profiling for ZmodF_poly Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #include "profiler-main.h" #include "ZmodF_poly.h" #include "flint.h" #include #include // ============================================================================ void sample_ZmodF_poly_FFT(unsigned long length, unsigned long n, void* arg, unsigned long count) { unsigned long m = ceil_log2(2*length); ZmodF_poly_t poly; ZmodF_poly_init(poly, m, n, 1); // todo: need to generate random data here prof_start(); for (unsigned long i = 0; i < count; i++) { poly->length = length; ZmodF_poly_FFT(poly, 2*length); } prof_stop(); ZmodF_poly_clear(poly); } char* profDriverString_ZmodF_poly_FFT(char* params) { return "ZmodF_poly_FFT over various truncation lengths and coefficient sizes.\n" "Parameters are: min truncation length; max truncation length; ratio between\n" "consecutive truncation lengths; number of coefficient lengths to try."; } char* profDriverDefaultParams_ZmodF_poly_FFT() { return "100 200 1.1 6"; } void profDriver_ZmodF_poly_FFT(char* params) { unsigned long length_min, length_max, n_count; double length_ratio; sscanf(params, "%ld %ld %lf %ld", &length_min, &length_max, &length_ratio, &n_count); prof2d_set_sampler(sample_ZmodF_poly_FFT); for (unsigned long length = length_min; length < length_max; length = (int)(ceil(length_ratio * length))) { unsigned long m = ceil_log2(2*length); // restrict coefficient lengths so that appropriate roots of unity // are available unsigned long n_skip = (1 << m) / (4*FLINT_BITS); if (n_skip == 0) n_skip = 1; for (unsigned long n = n_skip; n <= n_count * n_skip; n += n_skip) prof2d_sample(length, n, NULL); } } // ============================================================================ void sample_ZmodF_poly_IFFT(unsigned long length, unsigned long n, void* arg, unsigned long count) { unsigned long m = ceil_log2(length); ZmodF_poly_t poly; ZmodF_poly_init(poly, m, n, 1); poly->length = length; // todo: need to generate random data here prof_start(); for (unsigned long i = 0; i < count; i++) ZmodF_poly_IFFT(poly); prof_stop(); ZmodF_poly_clear(poly); } char* profDriverString_ZmodF_poly_IFFT(char* params) { return "ZmodF_poly_IFFT over various truncation lengths and coefficient sizes.\n" "Parameters are: min truncation length; max truncation length; ratio between\n" "consecutive truncation lengths; number of coefficient lengths to try."; } char* profDriverDefaultParams_ZmodF_poly_IFFT() { return "100 200 1.1 6"; } void profDriver_ZmodF_poly_IFFT(char* params) { unsigned long length_min, length_max, n_count; double length_ratio; sscanf(params, "%ld %ld %lf %ld", &length_min, &length_max, &length_ratio, &n_count); prof2d_set_sampler(sample_ZmodF_poly_IFFT); for (unsigned long length = length_min; length < length_max; length = (int)(ceil(length_ratio * length))) { unsigned long m = ceil_log2(length); // restrict coefficient lengths so that appropriate roots of unity // are available unsigned long n_skip = (1 << m) / (4*FLINT_BITS); if (n_skip == 0) n_skip = 1; for (unsigned long n = n_skip; n <= n_count * n_skip; n += n_skip) prof2d_sample(length, n, NULL); } } // ============================================================================ void sample_ZmodF_poly_negacyclic_convolution( unsigned long depth, unsigned long n, void* arg, unsigned long count) { ZmodF_poly_t poly1, poly2, poly3; ZmodF_poly_init(poly1, depth, n, 1); ZmodF_poly_init(poly2, depth, n, 1); ZmodF_poly_init(poly3, depth, n, 1); unsigned long size = 1 << depth; for (unsigned long i = 0; i < size; i++) { profiler_random_limbs(poly1->coeffs[i], n+1); profiler_random_limbs(poly2->coeffs[i], n+1); } unsigned long twist = (2*n*FLINT_BITS) >> depth; prof_start(); for (unsigned long i = 0; i < count; i++) ZmodF_poly_negacyclic_convolution(poly3, poly1, poly2); prof_stop(); ZmodF_poly_clear(poly3); ZmodF_poly_clear(poly2); ZmodF_poly_clear(poly1); } char* profDriverString_ZmodF_poly_negacyclic_convolution(char* params) { return "ZmodF_poly_negacyclic_convolution over various depths and coefficient sizes.\n" "Parameters are: min depth; max depth; min coeff length; max coeff length."; } char* profDriverDefaultParams_ZmodF_poly_negacyclic_convolution() { return "3 8 1 8"; } void profDriver_ZmodF_poly_negacyclic_convolution(char* params) { unsigned long depth_min, depth_max, n_min, n_max; sscanf(params, "%ld %ld %ld %ld", &depth_min, &depth_max, &n_min, &n_max); prof2d_set_sampler(sample_ZmodF_poly_negacyclic_convolution); for (unsigned long depth = depth_min; depth <= depth_max; depth++) { for (unsigned long n = n_min; n <= n_max; n++) { // restrict coefficient lengths so that appropriate roots of unity // are available if ((2*n*FLINT_BITS) % (1 << depth)) continue; prof2d_sample(depth, n, NULL); } } } // end of file **************************************************************** flint-1.011/ZmodF_poly-test.c0000644017361200017500000010277311025357254015740 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** ZmodF_poly-test.c: test module for ZmodF_poly module Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #include #include #include #include "flint.h" #include "memory-manager.h" #include "ZmodF_poly.h" #include "fmpz_poly.h" #include "mpz_poly.h" #include "test-support.h" #define VARY_BITS 1 #define SIGNS 1 #define DEBUG 0 // prints debug information #define DEBUG2 1 /**************************************************************************** Test code for Conversion Routines ****************************************************************************/ unsigned long randint(unsigned long randsup) { static unsigned long randval = 4035456057U; randval = ((unsigned long)randval*1025416097U+286824428U)%(unsigned long)4294967291U; return (unsigned long)randval%randsup; } void randpoly(mpz_poly_t pol, unsigned long length, unsigned long maxbits) { unsigned long bits; mpz_t temp; mpz_init(temp); mpz_poly_zero(pol); for (unsigned long i = 0; i < length; i++) { #if VARY_BITS bits = randint(maxbits+1); #else bits = maxbits; #endif if (bits == 0) mpz_set_ui(temp,0); else { mpz_rrandomb(temp, randstate, bits); #if SIGNS if (randint(2)) mpz_neg(temp,temp); #endif } mpz_poly_set_coeff(pol, i, temp); } mpz_clear(temp); } void randpoly_unsigned(mpz_poly_t pol, unsigned long length, unsigned long maxbits) { unsigned long bits; mpz_t temp; mpz_init(temp); mpz_poly_zero(pol); for (unsigned long i = 0; i < length; i++) { #if VARY_BITS bits = randint(maxbits+1); #else bits = maxbits; #endif if (bits == 0) mpz_set_ui(temp,0); else { mpz_rrandomb(temp, randstate, bits); } mpz_poly_set_coeff(pol, i, temp); } mpz_clear(temp); } /**************************************************************************** Test code for Fourier Transform Routines ****************************************************************************/ /* Prints the ZmodF_t, each limb in a separate block, most significant limb (i.e. the overflow limb) first. */ void ZmodF_print(ZmodF_t x, unsigned long n) { for (long i = n; i >= 0; i--) #if FLINT_BITS == 64 printf("%016lx ", x[i]); #else printf("%08lx ", x[i]); #endif } /* Prints each coefficient of the polynomial on a separate line. */ void ZmodF_poly_print(ZmodF_poly_t x) { for (unsigned long k = 0; k < (1UL << x->depth); k++) { ZmodF_print(x->coeffs[k], x->n); printf("\n"); } } /* Generates a random ZmodF_poly_t with at most overflow_bits used in the overflow limb for each coefficient. The ZmodF_poly_t should already be initialised. This function ignores the "length" attribute. */ void ZmodF_poly_random(ZmodF_poly_t x, unsigned long overflow_bits) { unsigned long n = x->n; mpz_t temp; mpz_init(temp); for (unsigned long k = 0; k < (1UL << x->depth); k++) { ZmodF_t y = x->coeffs[k]; ZmodF_zero(y, n); mpz_rrandomb(temp, randstate, (n+1)*FLINT_BITS); mpz_export(y, NULL, -1, sizeof(mp_limb_t), 0, 0, temp); // GMP has a "bug" where the top bit of the output of mpz_rrandomb // is always set. So we flip everything with probability 1/2. if (random_ulong(2)) for (unsigned long i = 0; i <= n; i++) y[i] = ~y[i]; // Copy the sign bit downwards so that only overflow_bits bits are used. if ((mp_limb_signed_t) y[n] >= 0) y[n] &= (1UL << overflow_bits) - 1; else y[n] |= ~((1UL << overflow_bits) - 1); } mpz_clear(temp); } mpz_t global_p; unsigned long global_n = 0; // Sets: // global_n := n, // global_p = 2^(FLINT_BITS*n) + 1 void set_global_n(unsigned long n) { if (n != global_n) { global_n = n; mpz_set_ui(global_p, 1); mpz_mul_2exp(global_p, global_p, n*FLINT_BITS); mpz_add_ui(global_p, global_p, 1); } } /* Converts given ZmodF_t into mpz_t format, reduced into [0, p) range. Assumes global_n and global_p are set correctly. */ void ZmodF_convert_out(mpz_t output, ZmodF_t input) { int negative = ((mp_limb_signed_t) input[global_n] < 0); if (negative) for (int i = 0; i <= global_n; i++) input[i] = ~input[i]; mpz_import(output, global_n+1, -1, sizeof(mp_limb_t), 0, 0, input); if (negative) { mpz_add_ui(output, output, 1); mpz_neg(output, output); for (int i = 0; i <= global_n; i++) input[i] = ~input[i]; } mpz_mod(output, output, global_p); } /* Converts input polynomial to mpz_poly format. Each output coefficient is normalised into [0, p). All 2^depth coefficients are converted. Assumes that output is already initialised. */ void ZmodF_poly_convert_out(mpz_poly_t output, ZmodF_poly_t input) { unsigned long size = 1UL << input->depth; unsigned long n = input->n; mpz_poly_ensure_alloc(output, size); set_global_n(n); for (unsigned long k = 0; k < size; k++) ZmodF_convert_out(output->coeffs[k], input->coeffs[k]); output->length = size; } /* y := x * 2^(s/2) mod p (using a very naive algorithm) y may alias x Assumes global_n and global_p are set correctly. */ void naive_mul_sqrt2exp(mpz_t y, mpz_t x, unsigned long s) { static mpz_t temp; static int init = 0; if (!init) { mpz_init(temp); init = 1; } if (s & 1) { mpz_mul_2exp(y, x, s/2 + global_n*FLINT_BITS/4); mpz_mul_2exp(temp, y, global_n*FLINT_BITS/2); mpz_sub(y, temp, y); mpz_mod(y, y, global_p); } else { mpz_mul_2exp(y, x, s/2); mpz_mod(y, y, global_p); } } // root and twist are powers of sqrt2 void naive_FFT(mpz_poly_t x, unsigned long depth, unsigned long root, unsigned long twist, unsigned long n) { static mpz_t temp; static int init = 0; if (!init) { mpz_init(temp); init = 1; } unsigned long size = 1UL << depth; for (unsigned long d = 0; d < depth; d++) { unsigned long half = 1UL << (depth - d - 1); for (unsigned long start = 0; start < size; start += 2*half) { for (unsigned long i = 0; i < half; i++) { mpz_t* a = &x->coeffs[start + i]; mpz_t* b = &x->coeffs[start + half + i]; mpz_add(temp, *a, *b); mpz_sub(*b, *a, *b); naive_mul_sqrt2exp(*b, *b, twist + i*root); mpz_mod(*a, temp, global_p); } } root <<= 1; twist <<= 1; } } int test__ZmodF_poly_FFT_iterative_case( unsigned long depth, unsigned long nonzero, unsigned long length, unsigned long twist, unsigned long n) { mpz_poly_t poly1, poly2; ZmodF_poly_t f; unsigned long size = 1UL << depth; unsigned long root = 4*n*FLINT_BITS / size; mpz_poly_init(poly1); mpz_poly_init(poly2); ZmodF_poly_init(f, depth, n, 1); int success = 1; set_global_n(n); ZmodF_poly_random(f, 4); ZmodF_poly_convert_out(poly1, f); for (unsigned long i = nonzero; i < size; i++) mpz_set_ui(poly1->coeffs[i], 0); naive_FFT(poly1, depth, root, twist, n); _ZmodF_poly_FFT_iterative(f->coeffs, depth, 1, nonzero, length, twist, n, f->scratch); ZmodF_poly_convert_out(poly2, f); for (unsigned long i = 0; i < length; i++) if (mpz_cmp(poly1->coeffs[i], poly2->coeffs[i])) success = 0; ZmodF_poly_clear(f); mpz_poly_clear(poly2); mpz_poly_clear(poly1); return success; } int test__ZmodF_poly_FFT_iterative() { int success = 1; for (unsigned long depth = 1; depth <= 11 && success; depth++) { unsigned long size = 1UL << depth; // need 4*n*FLINT_BITS divisible by 2^depth unsigned long n_skip = size / (4*FLINT_BITS); if (n_skip == 0) n_skip = 1; for (unsigned long n = n_skip; n < 6*n_skip && success; n += n_skip) { #if DEBUG printf("depth = %d, n = %d\n", depth, n); #endif unsigned long num_trials = 40000 / (1 << depth); for (unsigned long trial = 0; trial < num_trials && success; trial++) { unsigned long nonzero, length, twist, root; if (depth == 0) nonzero = length = 1; else { nonzero = random_ulong(size-1) + 1; length = random_ulong(size-1) + 1; } twist = random_ulong(4*n*FLINT_BITS / size); success = success && test__ZmodF_poly_FFT_iterative_case( depth, nonzero, length, twist, n); } } } return success; } int test__ZmodF_poly_FFT_factor_case( unsigned long rows_depth, unsigned long cols_depth, unsigned long nonzero, unsigned long length, unsigned long twist, unsigned long n) { mpz_poly_t poly1, poly2; ZmodF_poly_t f; unsigned long depth = rows_depth + cols_depth; unsigned long size = 1UL << depth; unsigned long root = 4*n*FLINT_BITS / size; mpz_poly_init(poly1); mpz_poly_init(poly2); ZmodF_poly_init(f, depth, n, 1); int success = 1; set_global_n(n); ZmodF_poly_random(f, 4); ZmodF_poly_convert_out(poly1, f); for (unsigned long i = nonzero; i < size; i++) mpz_set_ui(poly1->coeffs[i], 0); naive_FFT(poly1, depth, root, twist, n); _ZmodF_poly_FFT_factor(f->coeffs, rows_depth, cols_depth, 1, nonzero, length, twist, n, f->scratch); ZmodF_poly_convert_out(poly2, f); for (unsigned long i = 0; i < length; i++) if (mpz_cmp(poly1->coeffs[i], poly2->coeffs[i])) success = 0; ZmodF_poly_clear(f); mpz_poly_clear(poly2); mpz_poly_clear(poly1); return success; } int test__ZmodF_poly_FFT_factor() { int success = 1; for (unsigned long depth = 2; depth <= 6 && success; depth++) for (unsigned long depth1 = 1; depth1 < depth && success; depth1++) { unsigned long depth2 = depth - depth1; unsigned long size = 1UL << depth; // need 4*n*FLINT_BITS divisible by 2^depth unsigned long n = size / (4*FLINT_BITS); if (n == 0) n = 1; #if DEBUG printf("depth1 = %d, depth2 = %d, n = %d\n", depth1, depth2, n); #endif for (unsigned long length = 1; length <= size; length++) for (unsigned long nonzero = 1; nonzero <= size; nonzero++) { unsigned long num_trials = 1000000 / (1 << (3*depth)); if (num_trials == 0) num_trials = 1; for (unsigned long trial = 0; trial < num_trials; trial++) { unsigned long twist = random_ulong( 4*n*FLINT_BITS / size); success = success && test__ZmodF_poly_FFT_factor_case( depth1, depth2, nonzero, length, twist, n); } } } return success; } // root and twist are powers of sqrt2 void naive_IFFT(mpz_poly_t x, unsigned long depth, unsigned long root, unsigned long twist, unsigned long n) { static mpz_t temp; static int init = 0; if (!init) { mpz_init(temp); init = 1; } unsigned long size = 1UL << depth; root <<= (depth - 1); twist <<= (depth - 1); for (unsigned long d = 0; d < depth; d++) { unsigned long half = 1UL << d; for (unsigned long start = 0; start < size; start += 2*half) { for (unsigned long i = 0; i < half; i++) { mpz_t* a = &x->coeffs[start + i]; mpz_t* b = &x->coeffs[start + half + i]; naive_mul_sqrt2exp(*b, *b, 4*n*FLINT_BITS - (twist + i*root)); mpz_add(temp, *a, *b); mpz_sub(*b, *a, *b); mpz_mod(*a, temp, global_p); mpz_mod(*b, *b, global_p); } } root >>= 1; twist >>= 1; } } int test__ZmodF_poly_IFFT_recursive_case( unsigned long depth, unsigned long nonzero, unsigned long length, int extra, unsigned long twist, unsigned long n) { mpz_poly_t poly1, poly2; ZmodF_poly_t f; mpz_t extra_coeff; mpz_init(extra_coeff); unsigned long size = 1UL << depth; unsigned long root = 4*n*FLINT_BITS / size; mpz_poly_init(poly1); mpz_poly_init(poly2); ZmodF_poly_init(f, depth, n, 1); int success = 1; set_global_n(n); // run truncated inverse transform on random data ZmodF_poly_random(f, 4); ZmodF_poly_convert_out(poly1, f); _ZmodF_poly_IFFT_recursive(f->coeffs, depth, 1, nonzero, length, extra, twist, n, f->scratch); // reassemble the untransformed coefficients ZmodF_poly_convert_out(poly2, f); if (extra) // save extra coefficient if necessary mpz_set(extra_coeff, poly2->coeffs[length]); for (unsigned long i = length; i < nonzero; i++) mpz_set(poly2->coeffs[i], poly1->coeffs[i]); for (unsigned long i = nonzero; i < size; i++) mpz_set_ui(poly2->coeffs[i], 0); // run forward transform on proposed untransformed coefficients naive_FFT(poly2, depth, root, twist, n); // rescale for (unsigned long i = 0; i < size; i++) naive_mul_sqrt2exp(poly2->coeffs[i], poly2->coeffs[i], 2*(2*n*FLINT_BITS - depth)); // check the first few agree with input for (unsigned long i = 0; i < length; i++) if (mpz_cmp(poly2->coeffs[i], poly1->coeffs[i])) success = 0; // check the extra coefficient is correct too if (extra) if (mpz_cmp(poly2->coeffs[length], extra_coeff)) success = 0; ZmodF_poly_clear(f); mpz_poly_clear(poly2); mpz_poly_clear(poly1); return success; } int test__ZmodF_poly_IFFT_recursive() { int success = 1; for (unsigned long depth = 1; depth <= 6 && success; depth++) { unsigned long size = 1UL << depth; // need 4*n*FLINT_BITS divisible by 2^depth unsigned long n_skip = size / (4*FLINT_BITS); if (n_skip == 0) n_skip = 1; for (unsigned long n = n_skip; n < 6*n_skip && success; n += n_skip) { #if DEBUG printf("depth = %d, n = %d\n", depth, n); #endif for (unsigned long nonzero = 1; nonzero <= size; nonzero++) for (int extra = 0; extra < 2; extra++) for (unsigned long length = 1-extra; length <= nonzero; length++) { if (extra && length == size) continue; unsigned long num_trials = 100000 / (1 << (3*depth)); if (num_trials == 0) num_trials = 1; for (unsigned long trial = 0; trial < num_trials; trial++) { unsigned long twist = random_ulong( 4*n*FLINT_BITS / size); success = success && test__ZmodF_poly_IFFT_recursive_case( depth, nonzero, length, extra, twist, n); } } } } return success; } int test__ZmodF_poly_IFFT_iterative_case(unsigned long depth, unsigned long twist, unsigned long n) { mpz_poly_t poly1, poly2; ZmodF_poly_t f; mpz_t extra_coeff; mpz_init(extra_coeff); unsigned long size = 1UL << depth; unsigned long root = 4*n*FLINT_BITS / size; mpz_poly_init(poly1); mpz_poly_init(poly2); ZmodF_poly_init(f, depth, n, 1); int success = 1; set_global_n(n); ZmodF_poly_random(f, 4); ZmodF_poly_convert_out(poly1, f); naive_IFFT(poly1, depth, root, twist, n); _ZmodF_poly_IFFT_iterative(f->coeffs, depth, 1, twist, n, f->scratch); ZmodF_poly_convert_out(poly2, f); for (unsigned long i = 0; i < size; i++) if (mpz_cmp(poly1->coeffs[i], poly2->coeffs[i])) success = 0; ZmodF_poly_clear(f); mpz_poly_clear(poly2); mpz_poly_clear(poly1); return success; } int test__ZmodF_poly_IFFT_iterative() { int success = 1; for (unsigned long depth = 1; depth <= 9 && success; depth++) { unsigned long size = 1UL << depth; // need 4*n*FLINT_BITS divisible by 2^depth unsigned long n_skip = size / (4*FLINT_BITS); if (n_skip == 0) n_skip = 1; for (unsigned long n = n_skip; n < 6*n_skip && success; n += n_skip) { #if DEBUG printf("depth = %d, n = %d\n", depth, n); #endif unsigned long num_trials = 100000 / (1 << (depth)); if (num_trials == 0) num_trials = 1; for (unsigned long trial = 0; trial < num_trials; trial++) { unsigned long twist = random_ulong( 4*n*FLINT_BITS / size); success = success && test__ZmodF_poly_IFFT_iterative_case( depth, twist, n); } } } return success; } int test__ZmodF_poly_IFFT() { mpz_poly_t poly1, poly2; mpz_poly_init(poly1); mpz_poly_init(poly2); mpz_t extra_coeff; mpz_init(extra_coeff); int success = 1; for (unsigned long depth = 1; depth <= 11 && success; depth++) { unsigned long size = 1UL << depth; // need 4*n*FLINT_BITS divisible by 2^depth unsigned long n_skip = size / (4*FLINT_BITS); if (n_skip == 0) n_skip = 1; for (unsigned long n = n_skip; n < 6*n_skip && success; n += n_skip) { ZmodF_poly_t f; ZmodF_poly_init(f, depth, n, 1); #if DEBUG printf("depth = %d, n = %d\n", depth, n); #endif set_global_n(n); unsigned long num_trials = 40000 / (1 << depth); for (unsigned long trial = 0; trial < num_trials; trial++) { unsigned long nonzero, length, twist, root; int extra = random_ulong(2); if (depth == 0) { nonzero = 1; length = 1 - extra; } else { nonzero = random_ulong(size-1) + 1; length = random_ulong(nonzero) + 1 - extra; } root = 4*n*FLINT_BITS / size; twist = random_ulong(root); // run truncated inverse transform on random data ZmodF_poly_random(f, 4); ZmodF_poly_convert_out(poly1, f); _ZmodF_poly_IFFT(f->coeffs, depth, 1, nonzero, length, extra, twist, n, f->scratch); // reassemble the untransformed coefficients ZmodF_poly_convert_out(poly2, f); if (extra) // save extra coefficient if necessary mpz_set(extra_coeff, poly2->coeffs[length]); for (unsigned long i = length; i < nonzero; i++) mpz_set(poly2->coeffs[i], poly1->coeffs[i]); for (unsigned long i = nonzero; i < size; i++) mpz_set_ui(poly2->coeffs[i], 0); // run forward transform on proposed untransformed coefficients naive_FFT(poly2, depth, root, twist, n); // rescale for (unsigned long i = 0; i < size; i++) naive_mul_sqrt2exp(poly2->coeffs[i], poly2->coeffs[i], 2*(2*n*FLINT_BITS - depth)); // check the first few agree with input for (unsigned long i = 0; i < length; i++) if (mpz_cmp(poly2->coeffs[i], poly1->coeffs[i])) success = 0; // check the extra coefficient is correct too if (extra) if (mpz_cmp(poly2->coeffs[length], extra_coeff)) success = 0; } ZmodF_poly_clear(f); } } mpz_clear(extra_coeff); mpz_poly_clear(poly2); mpz_poly_clear(poly1); return success; } // x and y should both have length 2^depth // this version just multiplies out the convolution void really_naive_convolution(mpz_poly_t res, mpz_poly_t x, mpz_poly_t y, unsigned long depth) { unsigned long size = 1UL << depth; mpz_poly_ensure_alloc(res, size); res->length = size; for (unsigned long i = 0; i < size; i++) mpz_set_ui(res->coeffs[i], 0); for (unsigned long i = 0; i < size; i++) for (unsigned long j = 0; j < size; j++) mpz_addmul(res->coeffs[(i+j) % size], x->coeffs[i], y->coeffs[j]); for (unsigned long i = 0; i < size; i++) mpz_mod(res->coeffs[i], res->coeffs[i], global_p); } // x and y should both have length 2^depth // this version uses naive_FFT and naive_IFFT mod p void naive_convolution(mpz_poly_t res, mpz_poly_t x, mpz_poly_t y, unsigned long depth, unsigned long n) { unsigned long size = 1UL << depth; mpz_poly_t xt, yt; mpz_poly_init(xt); mpz_poly_init(yt); mpz_poly_set(xt, x); mpz_poly_set(yt, y); naive_FFT(xt, depth, (4*n*FLINT_BITS) >> depth, 0, n); naive_FFT(yt, depth, (4*n*FLINT_BITS) >> depth, 0, n); mpz_poly_ensure_alloc(res, size); for (unsigned long i = 0; i < (1 << depth); i++) { mpz_mul(res->coeffs[i], xt->coeffs[i], yt->coeffs[i]); mpz_mul_2exp(res->coeffs[i], res->coeffs[i], 2*n*FLINT_BITS - depth); mpz_mod(res->coeffs[i], res->coeffs[i], global_p); } res->length = size; naive_IFFT(res, depth, (4*n*FLINT_BITS) >> depth, 0, n); mpz_poly_clear(xt); mpz_poly_clear(yt); } int test_ZmodF_poly_convolution() { mpz_poly_t poly1, poly2, poly3, poly4; mpz_poly_init(poly1); mpz_poly_init(poly2); mpz_poly_init(poly3); mpz_poly_init(poly4); int success = 1; for (unsigned long depth = 0; depth <= 11 && success; depth++) { unsigned long size = 1UL << depth; // need 4*n*FLINT_BITS divisible by 2^depth unsigned long n_skip = size / (4*FLINT_BITS); if (n_skip == 0) n_skip = 1; for (unsigned long n = n_skip; n < 6*n_skip && success; n += n_skip) { ZmodF_poly_t f1, f2, f3; ZmodF_poly_init(f1, depth, n, 1); ZmodF_poly_init(f2, depth, n, 1); ZmodF_poly_init(f3, depth, n, 1); #if DEBUG printf("depth = %d, n = %d\n", depth, n); #endif set_global_n(n); // switch to FFT-based convolution even for the test code, otherwise // tests get too slow int use_really_naive = (depth <= 5); unsigned long num_trials = (use_really_naive ? 50000 : 20000) / ((1 << depth) * n); if (num_trials == 0) num_trials = 1; for (unsigned long trial = 0; trial < num_trials && success; trial++) { unsigned long len1 = random_ulong(size+1); unsigned long len2 = random_ulong(size+1); ZmodF_poly_random(f1, 4); ZmodF_poly_random(f2, 4); f1->length = len1; f2->length = len2; ZmodF_poly_convert_out(poly1, f1); for (unsigned long i = len1; i < size; i++) mpz_set_ui(poly1->coeffs[i], 0); ZmodF_poly_convert_out(poly2, f2); for (unsigned long i = len2; i < size; i++) mpz_set_ui(poly2->coeffs[i], 0); ZmodF_poly_convolution(f3, f1, f2); ZmodF_poly_convert_out(poly3, f3); if (use_really_naive) really_naive_convolution(poly4, poly1, poly2, depth); else naive_convolution(poly4, poly1, poly2, depth, n); unsigned long out_len = len1 + len2 - 1; if (out_len > size) out_len = size; for (unsigned long i = 0; i < out_len; i++) if (mpz_cmp(poly3->coeffs[i], poly4->coeffs[i])) success = 0; } ZmodF_poly_clear(f3); ZmodF_poly_clear(f2); ZmodF_poly_clear(f1); } } mpz_poly_clear(poly4); mpz_poly_clear(poly3); mpz_poly_clear(poly2); mpz_poly_clear(poly1); return success; } int test_ZmodF_poly_convolution_range() { mpz_poly_t poly1, poly2, poly3, poly4; mpz_poly_init(poly1); mpz_poly_init(poly2); mpz_poly_init(poly3); mpz_poly_init(poly4); int success = 1; for (unsigned long depth = 0; depth <= 11 && success; depth++) { unsigned long size = 1UL << depth; // need 4*n*FLINT_BITS divisible by 2^depth unsigned long n_skip = size / (4*FLINT_BITS); if (n_skip == 0) n_skip = 1; for (unsigned long n = n_skip; n < 6*n_skip && success; n += n_skip) { ZmodF_poly_t f1, f2, f3; ZmodF_poly_init(f1, depth, n, 1); ZmodF_poly_init(f2, depth, n, 1); ZmodF_poly_init(f3, depth, n, 1); #if DEBUG printf("depth = %d, n = %d\n", depth, n); #endif set_global_n(n); // switch to FFT-based convolution even for the test code, otherwise // tests get too slow int use_really_naive = (depth <= 5); unsigned long num_trials = (use_really_naive ? 50000 : 20000) / ((1 << depth) * n); if (num_trials == 0) num_trials = 1; for (unsigned long trial = 0; trial < num_trials && success; trial++) { unsigned long len1 = random_ulong(size+1); unsigned long len2 = random_ulong(size+1); unsigned long out_len = len1 + len2 - 1; if (out_len > size) out_len = size; unsigned long trunc; if (out_len) trunc = random_ulong(out_len)+1; else trunc = 0; #if DEBUG printf("len1 = %ld, len2 = %ld, trunc = %ld\n", len1, len2, trunc); #endif ZmodF_poly_random(f1, 4); ZmodF_poly_random(f2, 4); f1->length = len1; f2->length = len2; ZmodF_poly_convert_out(poly1, f1); for (unsigned long i = len1; i < size; i++) mpz_set_ui(poly1->coeffs[i], 0); ZmodF_poly_convert_out(poly2, f2); for (unsigned long i = len2; i < size; i++) mpz_set_ui(poly2->coeffs[i], 0); ZmodF_poly_convolution_range(f3, f1, f2, 0, trunc); ZmodF_poly_convert_out(poly3, f3); if (use_really_naive) really_naive_convolution(poly4, poly1, poly2, depth); else naive_convolution(poly4, poly1, poly2, depth, n); for (unsigned long i = 0; i < trunc; i++) if (mpz_cmp(poly3->coeffs[i], poly4->coeffs[i])) success = 0; } ZmodF_poly_clear(f3); ZmodF_poly_clear(f2); ZmodF_poly_clear(f1); } } mpz_poly_clear(poly4); mpz_poly_clear(poly3); mpz_poly_clear(poly2); mpz_poly_clear(poly1); return success; } // x and y should both have length 2^depth // this version just multiplies out the convolution void really_naive_negacyclic_convolution(mpz_poly_t res, mpz_poly_t x, mpz_poly_t y, unsigned long depth) { unsigned long size = 1UL << depth; mpz_poly_ensure_alloc(res, size); res->length = size; for (unsigned long i = 0; i < size; i++) mpz_set_ui(res->coeffs[i], 0); for (unsigned long i = 0; i < size; i++) for (unsigned long j = 0; j < size; j++) { unsigned long k = i + j; if (k < size) mpz_addmul(res->coeffs[k], x->coeffs[i], y->coeffs[j]); else mpz_submul(res->coeffs[k-size], x->coeffs[i], y->coeffs[j]); } for (unsigned long i = 0; i < size; i++) mpz_mod(res->coeffs[i], res->coeffs[i], global_p); } // x and y should both have length 2^depth // this version uses naive_FFT and naive_IFFT mod p void naive_negacyclic_convolution(mpz_poly_t res, mpz_poly_t x, mpz_poly_t y, unsigned long depth, unsigned long n) { unsigned long size = 1UL << depth; mpz_poly_t xt, yt; mpz_poly_init(xt); mpz_poly_init(yt); mpz_poly_set(xt, x); mpz_poly_set(yt, y); for (unsigned long i = 0; i < (1 << depth); i++) { naive_mul_sqrt2exp(xt->coeffs[i], xt->coeffs[i], (2*i*n*FLINT_BITS) >> depth); naive_mul_sqrt2exp(yt->coeffs[i], yt->coeffs[i], (2*i*n*FLINT_BITS) >> depth); } naive_FFT(xt, depth, (4*n*FLINT_BITS) >> depth, 0, n); naive_FFT(yt, depth, (4*n*FLINT_BITS) >> depth, 0, n); mpz_poly_ensure_alloc(res, size); for (unsigned long i = 0; i < (1 << depth); i++) { mpz_mul(res->coeffs[i], xt->coeffs[i], yt->coeffs[i]); mpz_mul_2exp(res->coeffs[i], res->coeffs[i], 2*n*FLINT_BITS - depth); mpz_mod(res->coeffs[i], res->coeffs[i], global_p); } res->length = size; naive_IFFT(res, depth, (4*n*FLINT_BITS) >> depth, 0, n); for (unsigned long i = 0; i < (1 << depth); i++) { naive_mul_sqrt2exp(res->coeffs[i], res->coeffs[i], (4*n*FLINT_BITS) - ((2*i*n*FLINT_BITS) >> depth)); } mpz_poly_clear(xt); mpz_poly_clear(yt); } int test_ZmodF_poly_negacyclic_convolution() { mpz_poly_t poly1, poly2, poly3, poly4; mpz_poly_init(poly1); mpz_poly_init(poly2); mpz_poly_init(poly3); mpz_poly_init(poly4); int success = 1; for (unsigned long depth = 0; depth <= 10 && success; depth++) { unsigned long size = 1UL << depth; // need n*FLINT_BITS divisible by 2^depth unsigned long n_skip = size / (FLINT_BITS); if (n_skip == 0) n_skip = 1; for (unsigned long n = n_skip; n < 6*n_skip && success; n += n_skip) { ZmodF_poly_t f1, f2, f3; ZmodF_poly_init(f1, depth, n, 1); ZmodF_poly_init(f2, depth, n, 1); ZmodF_poly_init(f3, depth, n, 1); #if DEBUG printf("depth = %d, n = %d\n", depth, n); #endif set_global_n(n); // switch to FFT-based convolution even for the test code, otherwise // tests get too slow int use_really_naive = (depth <= 5); unsigned long num_trials = (use_really_naive ? 50000 : 20000) / ((1 << depth) * n); if (num_trials == 0) num_trials = 1; for (unsigned long trial = 0; trial < num_trials && success; trial++) { ZmodF_poly_random(f1, 4); ZmodF_poly_random(f2, 4); f1->length = size; f2->length = size; ZmodF_poly_convert_out(poly1, f1); ZmodF_poly_convert_out(poly2, f2); ZmodF_poly_negacyclic_convolution(f3, f1, f2); ZmodF_poly_convert_out(poly3, f3); if (use_really_naive) really_naive_negacyclic_convolution(poly4, poly1, poly2, depth); else naive_negacyclic_convolution(poly4, poly1, poly2, depth, n); for (unsigned long i = 0; i < size; i++) if (mpz_cmp(poly3->coeffs[i], poly4->coeffs[i])) success = 0; } ZmodF_poly_clear(f3); ZmodF_poly_clear(f2); ZmodF_poly_clear(f1); } } mpz_poly_clear(poly4); mpz_poly_clear(poly3); mpz_poly_clear(poly2); mpz_poly_clear(poly1); return success; } /**************************************************************************** Main test functions ****************************************************************************/ #define RUN_TEST(targetfunc) \ printf("Testing " #targetfunc "()... "); \ fflush(stdout); \ success = test_##targetfunc(); \ all_success = all_success && success; \ printf(success ? "ok\n" : "FAIL!\n"); void ZmodF_poly_test_all() { int success, all_success = 1; RUN_TEST(_ZmodF_poly_FFT_iterative); RUN_TEST(_ZmodF_poly_FFT_factor); RUN_TEST(_ZmodF_poly_IFFT_recursive); RUN_TEST(_ZmodF_poly_IFFT_iterative); RUN_TEST(_ZmodF_poly_IFFT); RUN_TEST(ZmodF_poly_convolution); RUN_TEST(ZmodF_poly_convolution_range); RUN_TEST(ZmodF_poly_negacyclic_convolution); printf(all_success ? "\nAll tests passed\n" : "\nAt least one test FAILED!\n"); } int main() { test_support_init(); mpz_init(global_p); ZmodF_poly_test_all(); test_support_cleanup(); flint_stack_cleanup(); return 0; } // end of file **************************************************************** flint-1.011/profiler.h0000644017361200017500000001053611025357254014523 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** Timing/profiling (C) 2007 William Hart and David Harvey ******************************************************************************/ #include #include #ifndef FLINT_PROFILER_H #define FLINT_PROFILER_H #ifdef __cplusplus extern "C" { #endif // number of independent global clocks #define FLINT_NUM_CLOCKS 20 // If this flag is set, profiling will use a cycle counter *if one is // available* (otherwise this flag is ignored) #define FLINT_USE_CYCLE_COUNTER 1 // cycles/second #define FLINT_CLOCKSPEED 1804121000.0 extern double clock_last[FLINT_NUM_CLOCKS]; extern double clock_accum[FLINT_NUM_CLOCKS]; #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) // Relative timings on X86 machines running gcc #define FLINT_HAVE_CYCLE_COUNTER 1 static inline double get_cycle_counter() { // dirty: do we need to ensure these are 32-bit types? unsigned hi; unsigned lo; __asm("rdtsc; movl %%edx,%0; movl %%eax,%1" : "=r" (hi), "=r" (lo) : : "%edx", "%eax"); return (double) hi * (1 << 30) * 4 + lo; } #else #define FLINT_HAVE_CYCLE_COUNTER 0 #endif /* Here we define FLINT_CLOCK_SCALE_FACTOR, which converts the output of get_current_time() into microseconds */ #if FLINT_HAVE_CYCLE_COUNTER && FLINT_USE_CYCLE_COUNTER // microseconds per cycle #define FLINT_CLOCK_SCALE_FACTOR (1000000.0 / FLINT_CLOCKSPEED) #else // we'll use getrusage, which is already in microseconds #define FLINT_CLOCK_SCALE_FACTOR 1.0 #endif static inline double get_current_time() { #if FLINT_HAVE_CYCLE_COUNTER && FLINT_USE_CYCLE_COUNTER return get_cycle_counter(); #else // user time in microseconds struct rusage x; getrusage(RUSAGE_SELF, &x); return x.ru_utime.tv_sec * 1000000.0 + x.ru_utime.tv_usec; #endif } static inline void init_clock(unsigned long n) { clock_accum[n] = 0.0; } static inline void init_all_clocks() { for (unsigned long i = 0; i < FLINT_NUM_CLOCKS; i++) clock_accum[i] = 0.0; } static inline double get_clock(unsigned long n) { return clock_accum[n] * FLINT_CLOCK_SCALE_FACTOR; } static inline void start_clock(unsigned long n) { clock_last[n] = get_current_time(); } static inline void stop_clock(unsigned long n) { double now = get_current_time(); clock_accum[n] += (now - clock_last[n]); } /****************************************************************************** Framework for repeatedly sampling a single target ******************************************************************************/ // A profiling target (a function called with one argument and an // iteration count.) typedef void (*profile_target_t)(void* arg, unsigned long count); /* Calls target(arg) repeatedly, adjusting the iteration count based on the observed running times. The target function should use clock #0 (i.e. with start_clock() and stop_clock()) to mark which code should be timed. Stores minimum/maximum time per iteration (in microseconds) in min and max (either may be NULL, in which case the value is not stored). */ void prof_repeat(double* min, double* max, profile_target_t target, void* arg); // Timing runs need to last at least this many microseconds to be counted: #define DURATION_THRESHOLD 200000 // Microseconds per timing run that the prof_repeat function aims for: #define DURATION_TARGET 300000 #ifdef __cplusplus } #endif #endif // #ifndef FLINT_PROFILER_H flint-1.011/F_mpz_mul-timing.c0000644017361200017500000001020211025357254016077 0ustar tabbotttabbott/*============================================================================ Copyright (C) 2007, William Hart This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ #define TRIALS 1 #define TEST //#define TIMING #include #include #include #include #include "flint.h" #include "mpz_extras.h" #include "profiler.h" /* Runs F_mpz_mul through random data. num_trials = number of trials to perform. coeff_bits = number of bits to use in each coefficient. */ unsigned long run_F_mpz_mul(unsigned long num_trials, unsigned long coeff_bits, int fast, unsigned long tweak) { unsigned long i; // alloc some space mpz_t data1; mpz_t data2; mpz_t data3; mpz_t data4; mpz_init(data1); mpz_init(data2); mpz_init(data3); mpz_init(data4); gmp_randstate_t state; gmp_randinit_default(state); for (i = 0; i < num_trials; i++) { // make up random polys if (i%20==0) { #ifdef TEST mpz_rrandomb(data1, state, coeff_bits); mpz_rrandomb(data2, state, coeff_bits); #else mpz_urandomb(data1, state, coeff_bits); mpz_urandomb(data2, state, coeff_bits); #endif /*if (gmp_urandomb_ui(state, 1)) mpz_neg(data1, data1); if (gmp_urandomb_ui(state, 1)) mpz_neg(data2, data2);*/ } #ifdef TEST // compute product using F_mpz_mul F_mpz_mul(data3, data1, data1); // compute product using GMP mpz_mul(data4, data1, data1); if (mpz_size(data3) != mpz_size(data4)) printf("Sizes don't match! %ld, %ld\n", mpz_size(data3), mpz_size(data4)); if (mpz_cmp(data3,data4)!=0) printf("Failure!!\n"); #endif #ifdef TIMING start_clock(0); if (fast) { __F_mpz_mul(data3, data1, data2, tweak); } else { mpz_mul(data4, data1, data2); } stop_clock(0); #endif } // clean up gmp_randclear(state); mpz_clear(data1); mpz_clear(data2); mpz_clear(data3); mpz_clear(data4); } int main (int argc, const char * argv[]) { double time1, time2; unsigned long besti, imax; double best; unsigned long tweak; unsigned long bits; for (unsigned long words = 1000UL; words < 500000000; words=floor(words*pow(2.0,1.0/32.0))+1) { bits = 64*words; #ifdef TIMING printf("%ld words\n",words); besti = 0; best = 1000.0; imax = 8; tweak = 8; for (unsigned long i = 0; i < imax; i++) { init_clock(0); run_F_mpz_mul(TRIALS, bits, 1, tweak); time1 = get_clock(0) / FLINT_CLOCK_SCALE_FACTOR / 1800000000.0; if (time1 < best) { best = time1; besti = tweak; } tweak++; } printf("FLINT = %lf ", besti, best); init_clock(0); run_F_mpz_mul(TRIALS, bits, 0, 0); time2 = get_clock(0) / FLINT_CLOCK_SCALE_FACTOR / 1800000000.0; printf("GMP = %lf ",time2); printf("ratio = %lf, best = %ld\n",time2/best,besti); run_F_mpz_mul(TRIALS, bits, 1, besti); #else printf("%ld\n",bits); run_F_mpz_mul(TRIALS, bits, 1, 1); #endif } return 0; } flint-1.011/zmod_poly.c0000644017361200017500000032510711025357254014713 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /***************************************************************************** zmod_poly.c: Polynomials over (unsigned) long mod p, for p prime. Copyright (C) 2007, David Howden. *****************************************************************************/ #include "zmod_poly.h" #include "long_extras.h" #include "flint.h" #define PRINT_LIMB(a) print_limb(#a, a); #define PRINT_VAR(a) print_var(#a, a); /**************************************************************************** Initialisation and memory management ****************************************************************************/ void zmod_poly_init(zmod_poly_t poly, unsigned long p) { zmod_poly_init_precomp(poly, p, z_precompute_inverse(p)); } void zmod_poly_init_precomp(zmod_poly_t poly, unsigned long p, double p_inv) { poly->coeffs = (unsigned long*) flint_heap_alloc(1); poly->p = p; poly->p_inv = p_inv; #if PREINV32 if (FLINT_BIT_COUNT(p) <= 32) poly->p32_inv = z_precompute_inverse32(p); #endif poly->alloc = 1; poly->length = 0; } void zmod_poly_init2(zmod_poly_t poly, unsigned long p, unsigned long alloc) { zmod_poly_init2_precomp(poly, p, z_precompute_inverse(p), alloc); } void zmod_poly_init2_precomp(zmod_poly_t poly, unsigned long p, double p_inv, unsigned long alloc) { FLINT_ASSERT(alloc >= 1); poly->coeffs = (unsigned long*) flint_heap_alloc(alloc); poly->p = p; poly->p_inv = p_inv; #if PREINV32 poly->p32_inv = z_precompute_inverse32(p); #endif poly->alloc = alloc; poly->length = 0; } void zmod_poly_clear(zmod_poly_t poly) { flint_heap_free(poly->coeffs); } void zmod_poly_realloc(zmod_poly_t poly, unsigned long alloc) { FLINT_ASSERT(alloc >= 1); // clear any mpz_t's beyond the new array length // for (unsigned long i = alloc; i < poly->alloc; i++) // mpz_clear(poly->coeffs[i]); poly->coeffs = (unsigned long*) flint_heap_realloc(poly->coeffs, alloc); // init any new mpz_t's required // for (unsigned long i = poly->alloc; i < alloc; i++) // mpz_init(poly->coeffs[i]); poly->alloc = alloc; // truncate poly if necessary if (poly->length > alloc) { poly->length = alloc; __zmod_poly_normalise(poly); } } void __zmod_poly_fit_length(zmod_poly_t poly, unsigned long alloc) { FLINT_ASSERT(alloc > poly->alloc); if (alloc < 2*poly->alloc) alloc = 2*poly->alloc; zmod_poly_realloc(poly, alloc); } /**************************************************************************** Setting/retrieving coefficients ****************************************************************************/ void zmod_poly_set_coeff_ui(zmod_poly_t poly, unsigned long n, unsigned long c) { c = z_mod_precomp(c, poly->p, poly->p_inv); zmod_poly_fit_length(poly, n+1); if (n+1 < poly->length) // set interior coefficient poly->coeffs[n] = c; else if (n+1 == poly->length) { // set leading coefficient if (c) poly->coeffs[n] = c; else { // set leading coefficient to zero poly->length--; __zmod_poly_normalise(poly); } } else { // extend polynomial if (!c) return; for (unsigned long i = poly->length; i < n; i++) poly->coeffs[i] = 0; poly->coeffs[n] = c; poly->length = n+1; } } /**************************************************************************** String conversions and I/O ****************************************************************************/ /* Create a zmod_poly_t object from a string. Format: */ int zmod_poly_from_string(zmod_poly_t poly, char* s) { const char* whitespace = " \t\n\r"; unsigned long p, length; if (!sscanf(s, "%lx %lx", &length, &p)) return 0; poly->p = p; poly->p_inv = z_precompute_inverse(p); #if PREINV32 poly->p32_inv = z_precompute_inverse32(p); #endif // jump to next whitespace s += strcspn(s, whitespace); poly->length = 0; zmod_poly_fit_length(poly, length); for (unsigned long i = 0; i < length; i++) { // skip whitespace s += strspn(s, whitespace); if (!sscanf(s, "%ld", &poly->coeffs[i])) return 0; poly->length++; // jump to next whitespace s += strcspn(s, whitespace); } __zmod_poly_normalise(poly); return 1; } /* Convert a zmod_poly into a string. Format: */ char* zmod_poly_to_string(zmod_poly_t poly) { // estimate the size of the string // 20 = enough room for null terminator and length info // and another 20 for p value... unsigned long size = 20*(2+poly->length); for (unsigned long i = 0; i < poly->length; i++) { // +2 is for the sign and a space if (poly->coeffs[i]) size += (unsigned long)ceil(log10(poly->coeffs[i])) + 2; else size += 3; } // write the string char* buf = (char*) malloc(size); char* ptr = buf + sprintf(buf, "%ld %ld ", poly->length, poly->p); for (unsigned long i = 0; i < poly->length; i++) { ptr += sprintf(ptr, "%ld ", poly->coeffs[i]); } ptr--; *ptr = 0; return buf; } /* Convert a zmod_poly to a string and write it to the file f. */ void zmod_poly_fprint(zmod_poly_t poly, FILE* f) { char* s = zmod_poly_to_string(poly); fputs(s, f); free(s); } /* Output the string representation of zmod_poly to stdout */ void zmod_poly_print(zmod_poly_t poly) { zmod_poly_fprint(poly, stdout); } /* Create a zmod_poly from a string representation in file f */ int zmod_poly_fread(zmod_poly_t poly, FILE* f) { // read poly length and mod unsigned long length, p; if (!fscanf(f, "%ld %ld", &length, &p)) return 0; poly->length = 0; poly->p = p; poly->p_inv = z_precompute_inverse(p); #if PREINV32 poly->p32_inv = z_precompute_inverse32(p); #endif zmod_poly_fit_length(poly, length); // read coefficients for (unsigned long i = 0; i < length; i++) { if (!fscanf(f, "%ld", &poly->coeffs[i])) return 0; poly->length++; } __zmod_poly_normalise(poly); return 1; } /* Create a zmod_poly from stdin */ int zmod_poly_read(zmod_poly_t poly) { return zmod_poly_fread(poly, stdin); } /**************************************************************************** Length and degree ****************************************************************************/ void __zmod_poly_normalise(zmod_poly_t poly) { while (poly->length && (poly->coeffs[poly->length-1] == 0L)) poly->length--; } int __zmod_poly_normalised(zmod_poly_t poly) { return (poly->length == 0) || (poly->coeffs[poly->length-1] != 0L); } void zmod_poly_truncate(zmod_poly_t poly, unsigned long length) { // inplace truncation if (length < poly->length) poly->length = length; __zmod_poly_normalise(poly); } /**************************************************************************** Assignment ****************************************************************************/ void _zmod_poly_set(zmod_poly_t res, zmod_poly_t poly) { if (res == poly) return; for (unsigned long i = 0; i < poly->length; i++) res->coeffs[i] = poly->coeffs[i]; res->length = poly->length; res->p = poly->p; res->p_inv = poly->p_inv; #if PREINV32 res->p32_inv = poly->p32_inv; #endif } void zmod_poly_set(zmod_poly_t res, zmod_poly_t poly) { if (res == poly) return; zmod_poly_fit_length(res, poly->length); _zmod_poly_set(res, poly); } /**************************************************************************** Comparison ****************************************************************************/ int zmod_poly_equal(zmod_poly_t poly1, zmod_poly_t poly2) { if (poly1->p != poly2->p) return 0; if (poly1->length != poly2->length) return 0; for (unsigned long i = 0; i < poly1->length; i++) if (poly1->coeffs[i] != poly2->coeffs[i]) return 0; return 1; } /**************************************************************************** Reversal ****************************************************************************/ /* Sets output to the reverse of input (i.e. reverse the order of the coefficients) assuming input to be a polynomial with _length_ coefficients (it may have a length that is less than _length_). */ void _zmod_poly_reverse(zmod_poly_t output, zmod_poly_t input, unsigned long length) { long i; if (input != output) { for (i = 0; i < FLINT_MIN(length, input->length); i++) { output->coeffs[length - i - 1] = input->coeffs[i]; } for ( ; i < length; i++) { output->coeffs[length - i - 1] = 0L; } output->length = length; __zmod_poly_normalise(output); } else { unsigned long temp; for (i = 0; i < length/2; i++) { if (i < input->length) { temp = input->coeffs[i]; } else { temp = 0L; } if (length - i - 1 < input->length) { input->coeffs[i] = input->coeffs[length - i - 1]; } else { input->coeffs[i] = 0L; } input->coeffs[length - i - 1] = temp; } if ((length & 1) && (i >= input->length)) input->coeffs[i] = 0L; output->length = length; __zmod_poly_normalise(output); } } void zmod_poly_reverse(zmod_poly_t output, zmod_poly_t input, unsigned long length) { zmod_poly_fit_length(output, length); _zmod_poly_reverse(output, input, length); } /**************************************************************************** Monic polys ****************************************************************************/ void zmod_poly_make_monic(zmod_poly_t output, zmod_poly_t pol) { if (!pol->length) { output->length = 0; return; } unsigned long lead_inv = pol->coeffs[pol->length-1]; if (lead_inv == 1L) { zmod_poly_set(output, pol); return; } lead_inv = z_invert(lead_inv, pol->p); zmod_poly_scalar_mul(output, pol, lead_inv); } /**************************************************************************** Addition/subtraction ****************************************************************************/ void zmod_poly_add(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2) { // rearrange parameters to make poly1 no longer than poly2 if (poly1->length > poly2->length) SWAP_ZMOD_POLY_PTRS(poly1, poly2); zmod_poly_fit_length(res, poly2->length); unsigned long i, neg1; /* The following standard technique was found in David Harvey's zn_poly */ for (i = 0; i < poly1->length; i++) { res->coeffs[i] = z_addmod(poly1->coeffs[i], poly2->coeffs[i], poly1->p); } for (; i < poly2->length; i++) res->coeffs[i] = poly2->coeffs[i]; res->length = poly2->length; __zmod_poly_normalise(res); } void _zmod_poly_add_without_mod(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2) { // rearrange parameters to make poly1 no longer than poly2 if (poly1->length > poly2->length) SWAP_ZMOD_POLY_PTRS(poly1, poly2); unsigned long i, neg1; for (i = 0; i < poly1->length; i++) { res->coeffs[i] = poly1->coeffs[i] + poly2->coeffs[i]; } for (; i < poly2->length; i++) res->coeffs[i] = poly2->coeffs[i]; res->length = poly2->length; __zmod_poly_normalise(res); } void _zmod_poly_sub(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2) { if (poly1 == poly2) { // equal operands res->length = 0; return; } // rearrange parameters to make poly1 no longer than poly2 int swapped = 0; if (poly1->length > poly2->length) { swapped = 1; SWAP_ZMOD_POLY_PTRS(poly1, poly2); } unsigned long i; if (swapped) { for (i = 0; i < poly1->length; i++) { res->coeffs[i] = z_submod(poly2->coeffs[i], poly1->coeffs[i], poly2->p); } for (; i < poly2->length; i++) res->coeffs[i] = poly2->coeffs[i]; } else { for (i = 0; i < poly1->length; i++) { res->coeffs[i] = z_submod(poly1->coeffs[i], poly2->coeffs[i], poly2->p); } for (; i < poly2->length; i++) { res->coeffs[i] = poly2->p - poly2->coeffs[i]; if (res->coeffs[i] == poly2->p) res->coeffs[i] = 0; } } res->length = poly2->length; __zmod_poly_normalise(res); } void zmod_poly_sub(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2) { if (poly1 == poly2) { // equal operands res->length = 0; return; } // rearrange parameters to make poly1 no longer than poly2 if (poly1->length > poly2->length) { zmod_poly_fit_length(res, poly1->length); } else zmod_poly_fit_length(res, poly2->length); _zmod_poly_sub(res, poly1, poly2); } void zmod_poly_neg(zmod_poly_t res, zmod_poly_t poly) { zmod_poly_fit_length(res, poly->length); for (unsigned long i = 0; i < poly->length; i++) { if (poly->coeffs[i]) res->coeffs[i] = poly->p - poly->coeffs[i]; else res->coeffs[i] = 0L; } res->length = poly->length; } /**************************************************************************** Shifting ****************************************************************************/ void zmod_poly_left_shift(zmod_poly_t res, zmod_poly_t poly, unsigned long k) { zmod_poly_fit_length(res, poly->length + k); unsigned long temp; if (poly == res) { // inplace; just shift the coeffs over for (long i = poly->length - 1; i >= 0; i--) { poly->coeffs[i+k] = poly->coeffs[i]; } for (unsigned long i = 0; i < k; i++) poly->coeffs[i] = 0L; } else { // not inplace; need to copy data for (unsigned long i = 0; i < k; i++) res->coeffs[i] = 0L; for (unsigned long i = 0; i < poly->length; i++) res->coeffs[i + k] = poly->coeffs[i]; res->p = poly->p; res->p_inv = poly->p_inv; #if PREINV32 res->p32_inv = poly->p32_inv; #endif } res->length = poly->length + k; } void zmod_poly_right_shift(zmod_poly_t res, zmod_poly_t poly, unsigned long k) { if (k >= poly->length) { // shift all coefficients off the end res->length = 0; res->p = poly->p; res->p_inv = poly->p_inv; #if PREINV32 res->p32_inv = poly->p32_inv; #endif return; } if (poly == res) { // inplace; just shift the mpz_t's over for (unsigned long i = k; i < poly->length; i++) poly->coeffs[i - k] = poly->coeffs[i]; } else { // not inplace; need to copy data zmod_poly_fit_length(res, poly->length - k); for (unsigned long i = k; i < poly->length; i++) res->coeffs[i - k] = poly->coeffs[i]; res->p = poly->p; res->p_inv = poly->p_inv; #if PREINV32 res->p32_inv = poly->p32_inv; #endif } res->length = poly->length - k; } /******************************************************************************* Polynomial multiplication ********************************************************************************/ void zmod_poly_mul(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2) { if (poly1 == poly2) { zmod_poly_sqr(res, poly1); return; } if (poly1->length + poly2->length <= 6) { zmod_poly_mul_classical(res, poly1, poly2); return; } unsigned long bits = FLINT_BIT_COUNT(poly1->p); if ((bits <= 32) && (poly1->length + poly2->length <= 8)) { zmod_poly_mul_classical(res, poly1, poly2); return; } zmod_poly_mul_KS(res, poly1, poly2, 0); } void zmod_poly_sqr(zmod_poly_t res, zmod_poly_t poly) { if (poly->length <= 4) { zmod_poly_sqr_classical(res, poly); return; } unsigned long bits = FLINT_BIT_COUNT(poly->p); if ((bits >= 32) && (bits <= 50) && (poly->length <= 10)) { zmod_poly_sqr_classical(res, poly); return; } zmod_poly_mul_KS(res, poly, poly, 0); } /* This is just like zmod_poly_mul_classical(), with the following restrictions: * assumes res does not alias poly1 and poly2 * res->alloc >= poly1->length + poly2->length - 1 (i.e. output has enough room for product) */ void _zmod_poly_mul_classical(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2) { FLINT_ASSERT(res != poly1); FLINT_ASSERT(res != poly2); if (!poly1->length || !poly2->length) { // one of the polys is zero res->length = 0; return; } res->length = poly1->length + poly2->length - 1; res->p = poly1->p; res->p_inv = poly1->p_inv; #if PREINV32 res->p32_inv = poly1->p32_inv; #endif unsigned long length; if (poly1->length <= poly2->length) { length = poly1->length; } else { length = poly2->length; } unsigned log_length = 0; while ((1<p)<<1) + log_length; FLINT_ASSERT(res->alloc >= res->length); for (unsigned long i = 0; i < res->length; i++) res->coeffs[i] = 0; if(bits < FLINT_BITS) { // the numbers of bits in the output of each coeff will be less than FLINT_BITS // so don't need to mod to stay in the single limb, hence can leave this for the // end... __zmod_poly_mul_classical_mod_last(res, poly1, poly2, bits); } else { bits = zmod_poly_bits(poly1) + zmod_poly_bits(poly2) + log_length; if (bits < FLINT_BITS) { __zmod_poly_mul_classical_mod_last(res, poly1, poly2, bits); } else { __zmod_poly_mul_classical_mod_throughout(res, poly1, poly2, bits); } } __zmod_poly_normalise(res); } /* Actually computes the classical multiplication, only applying mod at the end of the computations. */ void __zmod_poly_mul_classical_mod_last(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits) { for (unsigned long i = 0; i < poly1->length; i++) for (unsigned long j = 0; j < poly2->length; j++) res->coeffs[i+j] = res->coeffs[i+j] + poly1->coeffs[i] * poly2->coeffs[j]; #if FLINT_BITS == 64 if (bits <= FLINT_D_BITS) { for (unsigned long i = 0; i < res->length; i++) res->coeffs[i] = z_mod_precomp(res->coeffs[i], res->p, res->p_inv); } else { #endif for (unsigned long i = 0; i < res->length; i++) res->coeffs[i] = z_mod2_precomp(res->coeffs[i], res->p, res->p_inv); #if FLINT_BITS == 64 } #endif } /* Computes the classical multiplication, applying mods at each step. */ void __zmod_poly_mul_classical_mod_throughout(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits) { #if FLINT_BITS == 64 if (bits <= FLINT_D_BITS) { for (unsigned long i = 0; i < poly1->length; i++) for (unsigned long j = 0; j < poly2->length; j++) res->coeffs[i+j] = z_addmod(res->coeffs[i+j], z_mulmod_precomp(poly1->coeffs[i], poly2->coeffs[j], poly1->p, poly1->p_inv), poly1->p); } else { #endif for (unsigned long i = 0; i < poly1->length; i++) for (unsigned long j = 0; j < poly2->length; j++) res->coeffs[i+j] = z_addmod(res->coeffs[i+j], z_mulmod2_precomp(poly1->coeffs[i], poly2->coeffs[j], poly1->p, poly1->p_inv), poly1->p); #if FLINT_BITS == 64 } #endif } void zmod_poly_mul_classical(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2) { if (!poly1->length || !poly2->length) { // one of the polys is zero res->length = 0; return; } if (poly1 == poly2) { // polys are identical, so call specialised squaring routine zmod_poly_sqr_classical(res, poly1); return; } unsigned long length = poly1->length + poly2->length - 1; if (res == poly1 || res == poly2) { // output is inplace, so need a temporary zmod_poly_t temp; zmod_poly_init2(temp, poly1->p, length); _zmod_poly_mul_classical(temp, poly1, poly2); zmod_poly_swap(temp, res); zmod_poly_clear(temp); } else { // output not inplace zmod_poly_fit_length(res, length); _zmod_poly_mul_classical(res, poly1, poly2); } } /* This is just like zmod_poly_sqr_classical(), with the following restrictions: * assumes res does not alias poly * res->alloc >= 2*poly->length - 1 (i.e. output has enough room for product) */ void _zmod_poly_sqr_classical(zmod_poly_t res, zmod_poly_t poly) { FLINT_ASSERT(res != poly); FLINT_ASSERT(poly->length); if (!poly->length) { // input is zero res->length = 0; return; } res->length = 2*poly->length - 1; res->p = poly->p; res->p_inv = poly->p_inv; #if PREINV32 res->p32_inv = poly->p32_inv; #endif FLINT_ASSERT(res->alloc >= res->length); unsigned long bits = FLINT_BIT_COUNT(poly->p); for (unsigned long i = 0; i < res->length; i++) res->coeffs[i] = 0; // off-diagonal products #if FLINT_BITS == 64 if (bits <= FLINT_D_BITS) { for (unsigned long i = 1; i < poly->length; i++) for (unsigned long j = 0; j < i; j++) res->coeffs[i+j] = z_addmod(res->coeffs[i+j], z_mulmod_precomp(poly->coeffs[i], poly->coeffs[j], poly->p, poly->p_inv), poly->p); } else { #endif for (unsigned long i = 1; i < poly->length; i++) for (unsigned long j = 0; j < i; j++) res->coeffs[i+j] = z_addmod(res->coeffs[i+j], z_mulmod2_precomp(poly->coeffs[i], poly->coeffs[j], poly->p, poly->p_inv), poly->p); #if FLINT_BITS == 64 } #endif // double the off-diagonal products for (unsigned long i = 1; i < res->length - 1; i++) res->coeffs[i] = z_addmod(res->coeffs[i], res->coeffs[i], poly->p); // add in diagonal products #if FLINT_BITS == 64 if (bits <= FLINT_D_BITS) { for (unsigned long i = 0; i < poly->length; i++) res->coeffs[2*i] = z_addmod(res->coeffs[2*i], z_mulmod_precomp(poly->coeffs[i], poly->coeffs[i], poly->p, poly->p_inv), poly->p); } else { #endif for (unsigned long i = 0; i < poly->length; i++) res->coeffs[2*i] = z_addmod(res->coeffs[2*i], z_mulmod2_precomp(poly->coeffs[i], poly->coeffs[i], poly->p, poly->p_inv), poly->p); #if FLINT_BITS == 64 } #endif __zmod_poly_normalise(res); } void zmod_poly_sqr_classical(zmod_poly_t res, zmod_poly_t poly) { if (!poly->length) { // input is zero res->length = 0; return; } unsigned long length = 2*poly->length - 1; if (res == poly) { // output is inplace, so need a temporary zmod_poly_t temp; zmod_poly_init2(temp, poly->p, length); _zmod_poly_sqr_classical(temp, poly); zmod_poly_swap(temp, res); zmod_poly_clear(temp); } else { // output not inplace // allocate more coefficients if necessary zmod_poly_fit_length(res, length); _zmod_poly_sqr_classical(res, poly); } } //======================================================================= /* This is just like zmod_poly_mul_classical_trunc(), with the following restrictions: * assumes res does not alias poly1 and poly2 * res->alloc >= MIN(trunc, poly1->length + poly2->length - 1) (i.e. output has enough room for truncated product) */ void _zmod_poly_mul_classical_trunc(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long trunc) { FLINT_ASSERT(res != poly1); FLINT_ASSERT(res != poly2); if (!poly1->length || !poly2->length || !trunc) { // one of the polys is zero res->length = 0; return; } if (trunc >= poly1->length + poly2->length - 1) { // there's no truncating to be done _zmod_poly_mul_classical(res, poly1, poly2); return; } res->length = trunc; res->p = poly1->p; res->p_inv = poly1->p_inv; #if PREINV32 res->p32_inv = poly1->p32_inv; #endif unsigned long length; if (poly1->length <= poly2->length) { length = poly1->length; } else { length = poly2->length; } unsigned log_length = 0; while ((1<p)<<1) + log_length; FLINT_ASSERT(res->alloc >= res->length); for (unsigned long i = 0; i < res->length; i++) res->coeffs[i] = 0; if(bits < FLINT_BITS) { // the numbers of bits in the output of each coeff will be less than FLINT_BITS // so don't need to mod to stay in the single limb, hence can leave this for the // end... __zmod_poly_mul_classical_trunc_mod_last(res, poly1, poly2, bits, trunc); } else { bits = zmod_poly_bits(poly1) + zmod_poly_bits(poly2) + log_length; if (bits < FLINT_BITS) { __zmod_poly_mul_classical_trunc_mod_last(res, poly1, poly2, bits, trunc); } else { __zmod_poly_mul_classical_trunc_mod_throughout(res, poly1, poly2, bits, trunc); } } __zmod_poly_normalise(res); } /* Actually computes the truncated classical multiplication, only applying mod at the end of the computations. Assumes neither poly length is zero and trunc is not zero Assumes res does not alias poly1 or poly2 Assumes trunc < poly1->length + poly2->length - 1 */ void __zmod_poly_mul_classical_trunc_mod_last(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits, unsigned long trunc) { for (unsigned long i = 0; i < poly1->length; i++) for (unsigned long j = 0; j < poly2->length; j++) if (i + j < trunc) res->coeffs[i+j] = res->coeffs[i+j] + poly1->coeffs[i] * poly2->coeffs[j]; #if FLINT_BITS == 64 if (bits <= FLINT_D_BITS) { for (unsigned long i = 0; i < trunc; i++) res->coeffs[i] = z_mod_precomp(res->coeffs[i], res->p, res->p_inv); } else { #endif for (unsigned long i = 0; i < trunc; i++) res->coeffs[i] = z_mod2_precomp(res->coeffs[i], res->p, res->p_inv); #if FLINT_BITS == 64 } #endif } /* Computes the classical multiplication, applying mods at each step. Assumes neither poly length is zero and trunc is not zero Assumes res does not alias poly1 or poly2 Assumes trunc < poly1->length + poly2->length - 1 */ void __zmod_poly_mul_classical_trunc_mod_throughout(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits, unsigned long trunc) { #if FLINT_BITS == 64 if (bits <= FLINT_D_BITS) { for (unsigned long i = 0; i < poly1->length; i++) for (unsigned long j = 0; j < poly2->length; j++) if (i + j < trunc) res->coeffs[i+j] = z_addmod(res->coeffs[i+j], z_mulmod_precomp(poly1->coeffs[i], poly2->coeffs[j], poly1->p, poly1->p_inv), poly1->p); } else { #endif for (unsigned long i = 0; i < poly1->length; i++) for (unsigned long j = 0; j < poly2->length; j++) if (i + j < trunc) res->coeffs[i+j] = z_addmod(res->coeffs[i+j], z_mulmod2_precomp(poly1->coeffs[i], poly2->coeffs[j], poly1->p, poly1->p_inv), poly1->p); #if FLINT_BITS == 64 } #endif } void zmod_poly_mul_classical_trunc(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long trunc) { if (!poly1->length || !poly2->length || !trunc) { // one of the polys is zero res->length = 0; return; } /*if (poly1 == poly2) { // polys are identical, so call specialised truncated squaring routine zmod_poly_sqr_classical_trunc(res, poly1. trunc); return; }*/ unsigned long length = poly1->length + poly2->length - 1; if (res == poly1 || res == poly2) { // output is inplace, so need a temporary zmod_poly_t temp; zmod_poly_init2(temp, poly1->p, FLINT_MIN(length, trunc)); _zmod_poly_mul_classical_trunc(temp, poly1, poly2, trunc); zmod_poly_swap(temp, res); zmod_poly_clear(temp); } else { // output not inplace zmod_poly_fit_length(res, FLINT_MIN(length, trunc)); _zmod_poly_mul_classical_trunc(res, poly1, poly2, trunc); } } //=================================================================================== void _zmod_poly_mul_classical_trunc_left(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long trunc) { FLINT_ASSERT(res != poly1); FLINT_ASSERT(res != poly2); if (!poly1->length || !poly2->length || (trunc >= poly1->length + poly2->length - 1)) { // one of the polys is zero res->length = 0; return; } if (trunc == 0) { _zmod_poly_mul_classical(res, poly1, poly2); } res->length = poly1->length + poly2->length - 1; res->p = poly1->p; res->p_inv = poly1->p_inv; #if PREINV32 res->p32_inv = poly1->p32_inv; #endif unsigned long length; if (poly1->length <= poly2->length) { length = poly1->length; } else { length = poly2->length; } unsigned log_length = 0; while ((1<p)<<1) + log_length; FLINT_ASSERT(res->alloc >= res->length); for (unsigned long i = 0; i < res->length; i++) res->coeffs[i] = 0; if(bits < FLINT_BITS) { // the numbers of bits in the output of each coeff will be less than FLINT_BITS // so don't need to mod to stay in the single limb, hence can leave this for the // end... __zmod_poly_mul_classical_trunc_left_mod_last(res, poly1, poly2, bits, trunc); } else { bits = zmod_poly_bits(poly1) + zmod_poly_bits(poly2) + log_length; if (bits < FLINT_BITS) { __zmod_poly_mul_classical_trunc_left_mod_last(res, poly1, poly2, bits, trunc); } else { __zmod_poly_mul_classical_trunc_left_mod_throughout(res, poly1, poly2, bits, trunc); } } __zmod_poly_normalise(res); } /* Actually computes the classical multiplication, only applying mod at the end of the computations. */ void __zmod_poly_mul_classical_trunc_left_mod_last(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits, unsigned long trunc) { for (unsigned long i = 0; i < poly1->length; i++) for (unsigned long j = 0; j < poly2->length; j++) if (i + j >= trunc) res->coeffs[i+j] = res->coeffs[i+j] + poly1->coeffs[i] * poly2->coeffs[j]; #if FLINT_BITS == 64 if (bits <= FLINT_D_BITS) { for (unsigned long i = trunc; i < res->length; i++) res->coeffs[i] = z_mod_precomp(res->coeffs[i], res->p, res->p_inv); } else { #endif for (unsigned long i = trunc; i < res->length; i++) res->coeffs[i] = z_mod2_precomp(res->coeffs[i], res->p, res->p_inv); #if FLINT_BITS == 64 } #endif } /* Computes the classical multiplication, applying mods at each step. */ void __zmod_poly_mul_classical_trunc_left_mod_throughout(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits, unsigned long trunc) { #if FLINT_BITS == 64 if (bits <= FLINT_D_BITS) { for (unsigned long i = 0; i < poly1->length; i++) for (unsigned long j = 0; j < poly2->length; j++) if (i + j >= trunc) res->coeffs[i+j] = z_addmod(res->coeffs[i+j], z_mulmod_precomp(poly1->coeffs[i], poly2->coeffs[j], poly1->p, poly1->p_inv), poly1->p); } else { #endif for (unsigned long i = 0; i < poly1->length; i++) for (unsigned long j = 0; j < poly2->length; j++) if (i + j >= trunc) res->coeffs[i+j] = z_addmod(res->coeffs[i+j], z_mulmod2_precomp(poly1->coeffs[i], poly2->coeffs[j], poly1->p, poly1->p_inv), poly1->p); #if FLINT_BITS == 64 } #endif } void zmod_poly_mul_classical_trunc_left(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long trunc) { if (!poly1->length || !poly2->length) { // one of the polys is zero res->length = 0; return; } /*if (poly1 == poly2) { // polys are identical, so call specialised squaring routine zmod_poly_sqr_classical_trunc_left(res, poly1, trunc); return; }*/ unsigned long length = poly1->length + poly2->length - 1; if (res == poly1 || res == poly2) { // output is inplace, so need a temporary zmod_poly_t temp; zmod_poly_init2(temp, poly1->p, length); _zmod_poly_mul_classical_trunc_left(temp, poly1, poly2, trunc); zmod_poly_swap(temp, res); zmod_poly_clear(temp); } else { // output not inplace zmod_poly_fit_length(res, length); _zmod_poly_mul_classical_trunc_left(res, poly1, poly2, trunc); } } //============================================================================================ /* Debugging function */ void print_var(char *name, unsigned long value) { printf("%s = %d\n", name, value); } void zmod_poly_mul_KS(zmod_poly_t output, zmod_poly_p input1, zmod_poly_p input2, unsigned long bits_input) { unsigned long length1 = input1->length; unsigned long length2 = input2->length; if ((length1 == 0) || (length2 == 0)) { zmod_poly_zero(output); return; } unsigned long length = length1 + length2 - 1; zmod_poly_fit_length(output, length); if (output == input1 || output == input2) { // output is inplace, so need a temporary zmod_poly_t temp; zmod_poly_init2(temp, input1->p, length); _zmod_poly_mul_KS(temp, input1, input2, bits_input); zmod_poly_swap(temp, output); zmod_poly_clear(temp); } else { // output not inplace zmod_poly_fit_length(output, length); _zmod_poly_mul_KS(output, input1, input2, bits_input); } } void _zmod_poly_mul_KS(zmod_poly_t output, zmod_poly_p input1, zmod_poly_p input2, unsigned long bits_input) { unsigned long length1 = input1->length; unsigned long length2 = input2->length; if ((length1 == 0) || (length2 == 0)) { zmod_poly_zero(output); return; } unsigned long final_length = length1 + length2 - 1; if (length2 > length1) { unsigned long temp = length1; length1 = length2; length2 = temp; SWAP_ZMOD_POLY_PTRS(input1, input2); } unsigned long bits1, bits2; bits1 = zmod_poly_bits(input1); bits2 = (input1 == input2) ? bits1 : zmod_poly_bits(input2); unsigned long length = length2; unsigned log_length = 0; while ((1<length = final_length; /* The modulus may not be prime, so normalisation may be necessary */ __zmod_poly_normalise(output); } //========================================================================== void zmod_poly_mul_KS_trunc(zmod_poly_t output, zmod_poly_p input1, zmod_poly_p input2, unsigned long bits_input, unsigned long trunc) { unsigned long length1 = input1->length; unsigned long length2 = input2->length; if ((length1 == 0) || (length2 == 0) || (trunc == 0)) { zmod_poly_zero(output); return; } unsigned long length = length1 + length2 - 1; if (output == input1 || output == input2) { // output is inplace, so need a temporary zmod_poly_t temp; zmod_poly_init2(temp, input1->p, FLINT_MIN(length, trunc)); _zmod_poly_mul_KS_trunc(temp, input1, input2, bits_input, trunc); zmod_poly_swap(temp, output); zmod_poly_clear(temp); } else { // output not inplace zmod_poly_fit_length(output, FLINT_MIN(length, trunc)); _zmod_poly_mul_KS_trunc(output, input1, input2, bits_input, trunc); } } void _zmod_poly_mul_KS_trunc(zmod_poly_t output, zmod_poly_p input1, zmod_poly_p input2, unsigned long bits_input, unsigned long trunc) { if ((input1->length == 0) || (input2->length == 0) || (trunc == 0)) { output->length = 0; return; } unsigned long length1 = FLINT_MIN(input1->length, trunc); unsigned long length2 = FLINT_MIN(input2->length, trunc); while ((length1) && (input1->coeffs[length1-1] == 0)) length1--; while ((length2) && (input2->coeffs[length2-1] == 0)) length2--; if ((length1 == 0) || (length2 == 0)) { zmod_poly_zero(output); return; } unsigned long length = length1 + length2 - 1; if (trunc > length) trunc = length; if (length2 > length1) { unsigned long temp = length1; length1 = length2; length2 = temp; SWAP_ZMOD_POLY_PTRS(input1, input2); } unsigned long bits1, bits2; bits1 = zmod_poly_bits(input1); bits2 = (input1 == input2) ? bits1 : zmod_poly_bits(input2); unsigned long length_short = length2; unsigned log_length = 0; while ((1L<length = output_length; /* The modulus may not be prime, so normalisation may be necessary */ __zmod_poly_normalise(output); } void zmod_poly_mul_trunc_n_precomp(zmod_poly_t output, zmod_poly_p input1, zmod_poly_precomp_t pre, unsigned long trunc) { if ((input1->length == 0) || (pre->length2 == 0) || (trunc == 0)) { output->length = 0; return; } unsigned long length = input1->length + pre->length2 - 1; zmod_poly_fit_length(output, FLINT_MIN(length, trunc)); _zmod_poly_mul_KS_trunc_precomp(output, input1, pre, 0, trunc); } /* Prepare for caching of FFT for a truncated multiplicaton of input2, with the given number of bits per output coefficient (0 if this is to be computed automatically) where the output will be truncated to the given length */ void zmod_poly_mul_trunc_n_precomp_init(zmod_poly_precomp_t pre, zmod_poly_p input2, unsigned long bits_input, unsigned long trunc) { unsigned long length2 = FLINT_MIN(input2->length, trunc); while ((length2) && (input2->coeffs[length2-1] == 0)) length2--; pre->length2 = length2; if ((length2 == 0) || (trunc == 0)) { F_mpn_mul_precomp_init(pre->precomp, NULL, 0, 0); return; } unsigned log_length = 0; while ((1L<p) + log_length; if (bits_input) { bits = bits_input; } unsigned long limbs1 = FLINT_MAX((long)((trunc * bits-1) / FLINT_BITS + 1), 0L); unsigned long limbs2 = FLINT_MAX((long)((length2 * bits-1) / FLINT_BITS + 1), 0L); pre->limbs2 = limbs2; mp_limb_t* mpn2 = (mp_limb_t*) flint_stack_alloc(limbs2); _zmod_poly_bit_pack_mpn(mpn2, input2, bits, length2); F_mpn_mul_precomp_init(pre->precomp, mpn2, limbs2, limbs1); flint_stack_release(); // release mpn2 } void zmod_poly_mul_precomp_init(zmod_poly_precomp_t pre, zmod_poly_t input2, unsigned long bits_input, unsigned long length1) { unsigned long length2 = input2->length; while ((length2) && (input2->coeffs[length2-1] == 0)) length2--; pre->length2 = length2; if (length2 == 0) { F_mpn_mul_precomp_init(pre->precomp, NULL, 0, 0); return; } unsigned log_length = 0; while ((1L<p) + log_length; if (bits_input) { bits = bits_input; } unsigned long limbs1 = FLINT_MAX((long)((length1 * bits-1) / FLINT_BITS + 1), 0L); unsigned long limbs2 = FLINT_MAX((long)((length2 * bits-1) / FLINT_BITS + 1), 0L); pre->limbs2 = limbs2; mp_limb_t* mpn2 = (mp_limb_t*) flint_stack_alloc(limbs2); _zmod_poly_bit_pack_mpn(mpn2, input2, bits, length2); F_mpn_mul_precomp_init(pre->precomp, mpn2, limbs2, limbs1); flint_stack_release(); // release mpn2 } void zmod_poly_precomp_clear(zmod_poly_precomp_t pre) { F_mpn_mul_precomp_clear(pre->precomp); } void _zmod_poly_mul_KS_trunc_precomp(zmod_poly_t output, zmod_poly_t input1, zmod_poly_precomp_t pre, unsigned long bits_input, unsigned long trunc) { unsigned long length1 = FLINT_MIN(input1->length, trunc); while ((length1) && (input1->coeffs[length1-1] == 0)) length1--; unsigned long length2 = pre->length2; unsigned long length = length1 + length2 - 1; if (trunc > length) trunc = length; if ((length1 == 0) || (length2 == 0) || (trunc == 0)) { output->length = 0; return; } unsigned log_length = 0; while ((1L<p) + log_length; if (bits_input) { bits = bits_input; } mp_limb_t *mpn1, *res; unsigned long limbs1, limbs2; limbs1 = FLINT_MAX((long)((length1 * bits-1) / FLINT_BITS + 1), 0L); limbs2 = pre->limbs2; mpn1 = (mp_limb_t*) flint_stack_alloc(limbs1); _zmod_poly_bit_pack_mpn(mpn1, input1, bits, length1); res = (mp_limb_t*) flint_stack_alloc(limbs1+limbs2); F_mpn_clear(res, limbs1+limbs2); unsigned long output_length = FLINT_MIN(trunc, length); F_mpn_mul_precomp_trunc(res, mpn1, limbs1, pre->precomp, (output_length*bits-1)/FLINT_BITS+1); _zmod_poly_bit_unpack_mpn(output, res, output_length, bits); flint_stack_release(); //release res flint_stack_release(); //release mpn1 output->length = output_length; /* The modulus may not be prime, so normalisation may be necessary */ __zmod_poly_normalise(output); } void _zmod_poly_mul_KS_precomp(zmod_poly_t output, zmod_poly_t input1, zmod_poly_precomp_t pre, unsigned long bits_input) { unsigned long length1 = input1->length; while ((length1) && (input1->coeffs[length1-1] == 0)) length1--; unsigned long length2 = pre->length2; unsigned long length = length1 + length2 - 1; if ((length1 == 0) || (length2 == 0)) { output->length = 0; return; } unsigned log_length = 0; while ((1L<p) + log_length; if (bits_input) { bits = bits_input; } mp_limb_t *mpn1, *res; unsigned long limbs1, limbs2; limbs1 = FLINT_MAX((long)((length1 * bits-1) / FLINT_BITS + 1), 0L); limbs2 = pre->limbs2; mpn1 = (mp_limb_t*) flint_stack_alloc(limbs1); _zmod_poly_bit_pack_mpn(mpn1, input1, bits, length1); res = (mp_limb_t*) flint_stack_alloc(limbs1+limbs2); res[limbs1+limbs2-1] = 0L; F_mpn_mul_precomp(res, mpn1, limbs1, pre->precomp); zmod_poly_fit_length(output, length); _zmod_poly_bit_unpack_mpn(output, res, length, bits); flint_stack_release(); //release res flint_stack_release(); //release mpn1 output->length = length; /* The modulus may not be prime, so normalisation may be necessary */ __zmod_poly_normalise(output); } #if USE_MIDDLE_PRODUCT void _zmod_poly_mul_KS_middle_precomp(zmod_poly_t output, zmod_poly_p input1, zmod_poly_precomp_t pre, unsigned long bits_input, unsigned long trunc) { unsigned long length1 = FLINT_MIN(input1->length, trunc); while ((length1) && (input1->coeffs[length1-1] == 0)) length1--; unsigned long length2 = pre->length2; unsigned long length = length1 + length2 - 1; if (trunc > length) trunc = length; unsigned log_length = 0; while ((1L<p) + log_length; if (bits_input) { bits = bits_input; } mp_limb_t *mpn1, *res; unsigned long limbs1, limbs2; limbs1 = FLINT_MAX((long)((length1 * bits-1) / FLINT_BITS + 1), 0L); limbs2 = pre->limbs2; mpn1 = (mp_limb_t*) flint_stack_alloc(limbs1); _zmod_poly_bit_pack_mpn(mpn1, input1, bits, length1); res = (mp_limb_t*) flint_stack_alloc(limbs1+limbs2); res[limbs1+limbs2-1] = 0L; unsigned long output_length = FLINT_MIN(length1 + length2 - 1, trunc); unsigned long start = (trunc-1)/2; __F_mpn_mul_middle_precomp(res, mpn1, limbs1, pre->precomp, (start*bits)/FLINT_BITS, (output_length*bits-1)/FLINT_BITS+1); _zmod_poly_bit_unpack_mpn(output, res, output_length, bits); flint_stack_release(); //release res flint_stack_release(); //release mpn1 output->length = output_length; /* The modulus may not be prime, so normalisation may be necessary */ __zmod_poly_normalise(output); } void _zmod_poly_mul_KS_middle(zmod_poly_t output, zmod_poly_p input1, zmod_poly_p input2, unsigned long bits_input, unsigned long trunc) { unsigned long length1 = FLINT_MIN(input1->length, trunc); unsigned long length2 = FLINT_MIN(input2->length, trunc); while ((length1) && (input1->coeffs[length1-1] == 0)) length1--; while ((length2) && (input2->coeffs[length2-1] == 0)) length2--; if ((length1 == 0) || (length2 == 0)) { zmod_poly_zero(output); return; } unsigned long length = length1 + length2 - 1; if (trunc > length) trunc = length; if (length2 > length1) { unsigned long temp = length1; length1 = length2; length2 = temp; SWAP_ZMOD_POLY_PTRS(input1, input2); } unsigned long bits1, bits2; bits1 = zmod_poly_bits(input1); bits2 = zmod_poly_bits(input2); unsigned long length_short = length2; unsigned log_length = 0; while ((1L<coeffs[i] = 0L; output->length = output_length; /* The modulus may not be prime, so normalisation may be necessary */ __zmod_poly_normalise(output); } void zmod_poly_mul_KS_middle(zmod_poly_t output, zmod_poly_p input1, zmod_poly_p input2, unsigned long bits_input, unsigned long trunc) { unsigned long length1 = input1->length; unsigned long length2 = input2->length; if ((length1 == 0) || (length2 == 0) || (trunc == 0)) { zmod_poly_zero(output); return; } unsigned long length = length1 + length2 - 1; if (output == input1 || output == input2) { // output is inplace, so need a temporary zmod_poly_t temp; zmod_poly_init2(temp, input1->p, FLINT_MIN(length, trunc)); _zmod_poly_mul_KS_middle(temp, input1, input2, bits_input, trunc); zmod_poly_swap(temp, output); zmod_poly_clear(temp); } else { // output not inplace zmod_poly_fit_length(output, FLINT_MIN(length, trunc)); _zmod_poly_mul_KS_middle(output, input1, input2, bits_input, trunc); } } #endif /******************************************************************************* Bitpacking functions ********************************************************************************/ /* Determines the maximum number of bits used in the coefficients of poly */ unsigned long zmod_poly_bits(zmod_poly_t poly) { unsigned long bits = 0; unsigned long mask = -1L; for(unsigned long i = 0; i < poly->length; i++) { if(poly->coeffs[i]) { if(poly->coeffs[i] & mask) { bits = FLINT_BIT_COUNT(poly->coeffs[i]); if(bits == FLINT_BITS) break; else mask = -1L - ((1L<>FLINT_LG_BITS_PER_LIMB) + 1), 0L); res[0] = 0L; if (bits < FLINT_BITS) { unsigned long boundary_limit_bit = FLINT_BITS - bits; //printf("Packing polynomial ****************************************\n"); //print_limb("res[0]", res[0]); for(unsigned long i = 0; i < length; i++) { if (current_bit > boundary_limit_bit) { // the coefficient will be added accross a limb boundary, // so need the lower and upper parts (lower for limb with // lower index). //printf("coeff won't fit in limb...\n"); //print_limb("poly->coeffs[i] ", poly->coeffs[i]); // the part of the coeff that will be in the current limb temp_lower = (poly->coeffs[i] << current_bit); //print_limb("temp_lower ", temp_lower); // the part of the coeff that will be in the next limb temp_upper = (poly->coeffs[i] >> (FLINT_BITS - current_bit)); //print_limb("temp_upper ", temp_upper); //print_limb("res[current_limb] ", res[current_limb]); res[current_limb] |= temp_lower; //print_limb("res[current_limb] |= temp_lower", res[current_limb]); current_limb++; res[current_limb] = temp_upper; //print_limb("res[current_limb+1] ", res[current_limb]); current_bit = bits + current_bit - FLINT_BITS; } else { // the coefficient will fit in the current limb //printf("coeff will fit in limb...\n"); temp_lower = poly->coeffs[i] << current_bit; //print_limb("poly->coeffs[i] ", poly->coeffs[i]); //print_limb("temp_lower ", temp_lower); //print_limb("res[current_limb] ", res[current_limb]); res[current_limb] |= temp_lower; //print_limb("res[current_limb] |= temp_lower", res[current_limb]); current_bit += bits; } if (current_bit >= FLINT_BITS) { current_limb++; if (current_limb < total_limbs) res[current_limb] = 0L; current_bit -= FLINT_BITS; } } } else if (bits == FLINT_BITS) { for (unsigned long i = 0; i < length; i++) { res[i] = poly->coeffs[i]; } } else if (bits == 2*FLINT_BITS) { for (unsigned long i = 0; i < length; i++) { res[current_limb] = poly->coeffs[i]; current_limb++; res[current_limb] = 0L; current_limb++; } } else if (bits < 2*FLINT_BITS) { //printf("Packing Coeffs in Poly ============================================="); for(unsigned long i = 0; i < length; i++) { //PRINT_VAR(current_bit); // the coefficient will be added accross a limb boundary, // so need the lower and upper parts (lower for limb with // lower index). //printf("coeff won't fit in limb... HERE\n"); //print_limb("poly->coeffs[i] ", poly->coeffs[i]); // the part of the coeff that will be in the current limb temp_lower = poly->coeffs[i] << current_bit; //print_limb("temp_lower ", temp_lower); // the part of the coeff that will be in the next limb if (current_bit) { //print_var("current_bit", current_bit); temp_upper = poly->coeffs[i] >> (FLINT_BITS - current_bit); } else { temp_upper = 0L; } //print_limb("temp_upper ", temp_upper); //print_limb("res[current_limb] ", res[current_limb]); res[current_limb] |= temp_lower; //print_limb("res[current_limb] |= temp_lower", res[current_limb]); current_limb++; res[current_limb] = temp_upper; //print_limb("res[current_limb+1] ", res[current_limb]); current_bit += bits - FLINT_BITS; //PRINT_VAR(current_bit); if (current_bit >= FLINT_BITS) { //printf("GOT HERE ****************\n"); current_bit -= FLINT_BITS; current_limb++; if (current_limb < total_limbs) res[current_limb] = 0L; } } } else // 2*FLINT_BITS < bits < 3*FLINT_BITS { for(unsigned long i = 0; i < length; i++) { // the part of the coeff that will be in the current limb temp_lower = poly->coeffs[i] << current_bit; // the part of the coeff that will be in the next limb if (current_bit) { temp_upper = poly->coeffs[i] >> (FLINT_BITS - current_bit); } else { temp_upper = 0L; } res[current_limb] |= temp_lower; current_limb++; res[current_limb] = temp_upper; current_limb++; if (current_limb < total_limbs) res[current_limb] = 0L; current_bit += bits - 2*FLINT_BITS; if (current_bit >= FLINT_BITS) { current_bit -= FLINT_BITS; current_limb++; if (current_limb < total_limbs) res[current_limb] = 0L; } } } } /* Unpacks a zmod_poly of length `length` from an mpn `mpn` with coeffs packed in `bits` bits. */ void _zmod_poly_bit_unpack_mpn(zmod_poly_t res, mp_limb_t * mpn, unsigned long length, unsigned long bits) { unsigned long i; //PRINT_VAR(bits); #if FLINT_BITS == 64 && PREINV32 if (bits < 32) { unsigned long current_limb = 0; unsigned long current_bit = 0; unsigned long boundary_limit_bit = FLINT_BITS - bits; unsigned long temp_lower; unsigned long temp_upper; unsigned long norm; count_lead_zeros(norm, res->p); norm -= 32; unsigned long mask; mask = 1L; i = bits - 1; while(i) { mask <<= 1; mask |= 1L; i--; } for (i = 0; i < length; i++) { if (current_bit > boundary_limit_bit) { // the coeff will be across a limb boundary... //printf("coeff won't only be in current limb...\n"); // temp lower contains the part in the current limb temp_lower = mpn[current_limb]; //print_limb("mpn[current_limb] ", temp_lower); // need (bits - (FLINT_BITS - current_bit)) bits // from the LSB side of this limb to complete the coeff... current_limb++; //print_limb("mpn[current_limb+1] ", mpn[current_limb]); // so shift them up, OR with the lower part and apply the mask temp_upper = mpn[current_limb] << (FLINT_BITS - current_bit); //print_limb("temp_upper ", temp_upper); temp_upper |= temp_lower; //print_limb("temp_upper |= temp_lower", temp_upper); temp_upper &= mask; //print_limb("temp_upper &= mask ", temp_upper); _zmod_poly_set_coeff_ui(res, i, z_mod32_precomp(temp_upper<p<p32_inv)>>norm); current_bit = bits + current_bit - FLINT_BITS; mpn[current_limb] = mpn[current_limb] >> current_bit; //print_limb("mpn[current_limb+1] ", mpn[current_limb]); } else { // the coeff will fit in the current limb... //printf("coeff will be in current limb...\n"); //print_limb("mpn[current_limb] ", mpn[current_limb]); temp_lower = mpn[current_limb] & mask; //print_limb("temp_lower ", temp_lower); // less than a limb in size, so must be smaller than an unsigned long... //zmod_poly_set_coeff_ui(res, i, temp_lower); _zmod_poly_set_coeff_ui(res, i, z_mod32_precomp(temp_lower<p<p32_inv)>>norm); mpn[current_limb] = mpn[current_limb] >> bits; //print_limb("mpn[current_limb] ", mpn[current_limb]); current_bit += bits; } if(current_bit == FLINT_BITS) { current_bit = 0; current_limb++; } } } else #endif if (bits < FLINT_BITS) { unsigned long current_limb = 0; unsigned long current_bit = 0; unsigned long boundary_limit_bit = FLINT_BITS - bits; unsigned long temp_lower; unsigned long temp_upper; unsigned long mask; mask = 1L; i = bits - 1; while(i) { mask <<= 1; mask |= 1L; i--; } for (i = 0; i < length; i++) { if (current_bit > boundary_limit_bit) { // the coeff will be across a limb boundary... //printf("coeff won't only be in current limb...\n"); // temp lower contains the part in the current limb temp_lower = mpn[current_limb]; //print_limb("mpn[current_limb] ", temp_lower); // need (bits - (FLINT_BITS - current_bit)) bits // from the LSB side of this limb to complete the coeff... current_limb++; //print_limb("mpn[current_limb+1] ", mpn[current_limb]); // so shift them up, OR with the lower part and apply the mask temp_upper = mpn[current_limb] << (FLINT_BITS - current_bit); //print_limb("temp_upper ", temp_upper); temp_upper |= temp_lower; //print_limb("temp_upper |= temp_lower", temp_upper); temp_upper &= mask; //print_limb("temp_upper &= mask ", temp_upper); #if FLINT_BITS == 64 if (bits <= FLINT_D_BITS) _zmod_poly_set_coeff_ui(res, i, z_mod_precomp(temp_upper, res->p, res->p_inv)); else #endif _zmod_poly_set_coeff_ui(res, i, z_mod2_precomp(temp_upper, res->p, res->p_inv)); current_bit = bits + current_bit - FLINT_BITS; mpn[current_limb] = mpn[current_limb] >> current_bit; //print_limb("mpn[current_limb+1] ", mpn[current_limb]); } else { // the coeff will fit in the current limb... //printf("coeff will be in current limb...\n"); //print_limb("mpn[current_limb] ", mpn[current_limb]); temp_lower = mpn[current_limb] & mask; //print_limb("temp_lower ", temp_lower); // less than a limb in size, so must be smaller than an unsigned long... //zmod_poly_set_coeff_ui(res, i, temp_lower); #if FLINT_BITS == 64 if (bits <= FLINT_D_BITS) _zmod_poly_set_coeff_ui(res, i, z_mod_precomp(temp_lower, res->p, res->p_inv)); else #endif _zmod_poly_set_coeff_ui(res, i, z_mod2_precomp(temp_lower, res->p, res->p_inv)); mpn[current_limb] = mpn[current_limb] >> bits; //print_limb("mpn[current_limb] ", mpn[current_limb]); current_bit += bits; } if(current_bit == FLINT_BITS) { current_bit = 0; current_limb++; } } } else if (bits == FLINT_BITS) { for (i = 0; i < length; i++) { _zmod_poly_set_coeff_ui(res, i, z_ll_mod_precomp(0L, mpn[i], res->p, res->p_inv)); } } else if (bits == 2*FLINT_BITS) { unsigned long current_limb = 0; for (i = 0; i < length; i++) { _zmod_poly_set_coeff_ui(res, i, z_ll_mod_precomp(mpn[current_limb+1], mpn[current_limb], res->p, res->p_inv)); current_limb+=2; } } else if (bits < 2*FLINT_BITS) // FLINT_BITS < bits < 2*FLINT_BITS { unsigned long current_limb = 0; unsigned long current_bit = 0; unsigned long double_boundary_limit_bit = bits - FLINT_BITS; unsigned long temp_lower; unsigned long temp_upper; for (i = 0; i < length; i++) { if(current_bit == 0) { // printf("Coeff across one boundary... current_bit == 0\n"); temp_lower = mpn[current_limb]; // PRINT_LIMB(temp_lower); current_limb++; temp_upper = (mpn[current_limb] << (2*FLINT_BITS - bits)) >> (2*FLINT_BITS - bits); // PRINT_LIMB(temp_upper); _zmod_poly_set_coeff_ui(res, i, z_ll_mod_precomp(temp_upper, temp_lower, res->p, res->p_inv)); mpn[current_limb] >>= (bits - FLINT_BITS); // PRINT_LIMB(mpn[current_limb]); current_bit = 2*FLINT_BITS - bits; // PRINT_VAR(current_bit); } else if (current_bit < double_boundary_limit_bit) { // printf("Coeff across two boundaries...\n"); // the coeff will be across two limb boundaries... temp_lower = mpn[current_limb]; // PRINT_LIMB(temp_lower); // PRINT_VAR(current_bit); current_limb++; // PRINT_LIMB(mpn[current_limb] << current_bit); temp_lower |= (mpn[current_limb] << current_bit); // PRINT_LIMB(temp_lower); // FLINT_BITS - current_bit != FLINT_BITS as current_bit > double_boundary_limit_bit temp_upper = mpn[current_limb] >> (FLINT_BITS - current_bit); // PRINT_LIMB(temp_upper); current_limb++; // PRINT_LIMB(mpn[current_limb]); temp_upper |= (mpn[current_limb] << current_bit); temp_upper <<= 2*FLINT_BITS - bits; temp_upper >>= 2*FLINT_BITS - bits; // PRINT_LIMB(temp_upper) _zmod_poly_set_coeff_ui(res, i, z_ll_mod_precomp(temp_upper, temp_lower, res->p, res->p_inv)); mpn[current_limb] >>= (bits - current_bit - FLINT_BITS); current_bit = 2*FLINT_BITS + current_bit - bits; // PRINT_VAR(current_bit); } else { // the coeff will be across one limb boundary... // printf("Coeff across one boundary...\n"); temp_lower = mpn[current_limb] | (mpn[current_limb+1] << current_bit); // PRINT_LIMB(mpn[current_limb]); // PRINT_LIMB(mpn[current_limb+1]); // PRINT_LIMB(temp_lower); current_limb++; //PRINT_LIMB(temp_lower); temp_upper = (mpn[current_limb] << (FLINT_BITS + current_bit - bits)) >> (2*FLINT_BITS - bits); // PRINT_LIMB(temp_upper); _zmod_poly_set_coeff_ui(res, i, z_ll_mod_precomp(temp_upper, temp_lower, res->p, res->p_inv)); mpn[current_limb] >>= (bits - current_bit); // PRINT_LIMB(mpn[current_limb]); current_bit = FLINT_BITS + current_bit - bits; // PRINT_VAR(current_bit); if(!current_bit) current_limb++; } if(current_bit == FLINT_BITS) { current_bit = 0; // PRINT_VAR(current_bit); current_limb++; } } } else // 2*FLINT_BITS < bits < 3*FLINT_BITS { unsigned long current_limb = 0; unsigned long current_bit = 0; unsigned long double_boundary_limit_bit = bits - 2*FLINT_BITS; unsigned long temp_lower; unsigned long temp_upper; unsigned long temp_upper2; for (i = 0; i < length; i++) { if(current_bit == 0) { // printf("Coeff across two boundaries... current_bit == 0\n"); temp_lower = mpn[current_limb+1]; temp_upper = (mpn[current_limb+2] << (3*FLINT_BITS - bits)) >> (3*FLINT_BITS - bits); temp_upper = z_ll_mod_precomp(temp_upper, temp_lower, res->p, res->p_inv); temp_lower = mpn[current_limb]; current_limb+=2; _zmod_poly_set_coeff_ui(res, i, z_ll_mod_precomp(temp_upper, temp_lower, res->p, res->p_inv)); mpn[current_limb] >>= (bits - 2*FLINT_BITS); current_bit = 3*FLINT_BITS - bits; } else if (current_bit >= double_boundary_limit_bit) { // the coeff will be across two limb boundaries... temp_lower = mpn[current_limb]; current_limb++; temp_lower |= (mpn[current_limb] << current_bit); temp_upper = mpn[current_limb] >> (FLINT_BITS - current_bit); current_limb++; temp_upper |= (mpn[current_limb] << current_bit); temp_upper2 = mpn[current_limb] >> (FLINT_BITS - current_bit); temp_upper2 <<= 3*FLINT_BITS - bits; temp_upper2 >>= 3*FLINT_BITS - bits; temp_upper = z_ll_mod_precomp(temp_upper2, temp_upper, res->p, res->p_inv); _zmod_poly_set_coeff_ui(res, i, z_ll_mod_precomp(temp_upper, temp_lower, res->p, res->p_inv)); mpn[current_limb] >>= (bits - current_bit - FLINT_BITS); current_bit = 2*FLINT_BITS + current_bit - bits; if (!current_bit) current_limb++; } else { // the coeff will be across three limb boundaries... temp_lower = mpn[current_limb]; current_limb++; temp_lower |= (mpn[current_limb] << current_bit); temp_upper = mpn[current_limb] >> (FLINT_BITS - current_bit); current_limb++; temp_upper |= (mpn[current_limb] << current_bit); temp_upper2 = mpn[current_limb] >> (FLINT_BITS - current_bit); current_limb++; temp_upper2 |= (mpn[current_limb] << current_bit); temp_upper2 <<= 3*FLINT_BITS - bits; temp_upper2 >>= 3*FLINT_BITS - bits; temp_upper = z_ll_mod_precomp(temp_upper2, temp_upper, res->p, res->p_inv); _zmod_poly_set_coeff_ui(res, i, z_ll_mod_precomp(temp_upper, temp_lower, res->p, res->p_inv)); mpn[current_limb] >>= (bits - current_bit - 2*FLINT_BITS); current_bit = 3*FLINT_BITS + current_bit - bits; } if(current_bit == FLINT_BITS) { current_bit = 0; current_limb++; } } } } void zmod_poly_mul_trunc_n(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long trunc) { if (poly1->length + poly2->length <= 6) { zmod_poly_mul_classical_trunc(res, poly1, poly2, trunc); return; } if ((FLINT_BIT_COUNT(poly1->p) <= 30) && (poly1->length + poly2->length <= 16)) { zmod_poly_mul_classical_trunc(res, poly1, poly2, trunc); return; } zmod_poly_mul_KS_trunc(res, poly1, poly2, 0, trunc); } void zmod_poly_mul_trunc_left_n(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long trunc) { if (poly1->length + poly2->length <= 10) { zmod_poly_mul_classical_trunc_left(res, poly1, poly2, trunc); return; } if ((FLINT_BIT_COUNT(poly1->p) <= 30) && (poly1->length + poly2->length < 30)) { zmod_poly_mul_classical_trunc_left(res, poly1, poly2, trunc); return; } zmod_poly_mul_KS(res, poly1, poly2, 0); } /******************************************************************************* Scalar multiplication ********************************************************************************/ /* Scalar multiplication Assumes the scalar is reduced modulo poly->p */ void __zmod_poly_scalar_mul_without_mod(zmod_poly_t res, zmod_poly_t poly, unsigned long scalar) { if (poly != res) zmod_poly_fit_length(res, poly->length); if (scalar == 0) { res->length = 0; return; } if (scalar == 1L) { _zmod_poly_set(res, poly); return; } for (unsigned long i = 0; i < poly->length; i++) { res->coeffs[i] = poly->coeffs[i] * scalar; } res->length = poly->length; __zmod_poly_normalise(res); } void _zmod_poly_scalar_mul(zmod_poly_t res, zmod_poly_t poly, unsigned long scalar) { if (scalar == 0) { res->length = 0; return; } if (scalar == 1L) { _zmod_poly_set(res, poly); return; } unsigned long bits = FLINT_BIT_COUNT(poly->p); #if FLINT_BITS == 64 if (bits <= FLINT_D_BITS) { for (unsigned long i = 0; i < poly->length; i++) { res->coeffs[i] = z_mulmod_precomp(poly->coeffs[i], scalar, poly->p, poly->p_inv); } } else { #endif for (unsigned long i = 0; i < poly->length; i++) { res->coeffs[i] = z_mulmod2_precomp(poly->coeffs[i], scalar, poly->p, poly->p_inv); } #if FLINT_BITS == 64 } #endif res->length = poly->length; __zmod_poly_normalise(res); } void zmod_poly_scalar_mul(zmod_poly_t res, zmod_poly_t poly, unsigned long scalar) { if (poly != res) zmod_poly_fit_length(res, poly->length); _zmod_poly_scalar_mul(res, poly, scalar); } /* Used to reduce a polynomial modulo its modulus if it has been left without reduction for a while. Assumes all the coefficients are positive and at most FLINT_D_BITS. */ void __zmod_poly_scalar_mod(zmod_poly_t poly) { unsigned long p = poly->p; double p_inv = poly->p_inv; for (unsigned long i = 0; i < poly->length; i++) { poly->coeffs[i] = z_mod_precomp(poly->coeffs[i], p, p_inv); } __zmod_poly_normalise(poly); } /* Classical basecase division Requires that the leading coefficient be invertible modulo B->p */ void zmod_poly_divrem_classical(zmod_poly_t Q, zmod_poly_t R, zmod_poly_t A, zmod_poly_t B) { if (B->length == 0) { printf("Error: Divide by zero\n"); abort(); } if (A->length < B->length) { zmod_poly_zero(Q); zmod_poly_set(R, A); return; } unsigned long p = B->p; if (2*FLINT_BIT_COUNT(p) + FLINT_BIT_COUNT(A->length - B->length + 1) <= FLINT_D_BITS) { __zmod_poly_divrem_classical_mod_last(Q, R, A, B); return; } double p_inv = B->p_inv; unsigned long lead_inv = z_invert(B->coeffs[B->length - 1], p); unsigned long * coeff_Q; zmod_poly_t qB; zmod_poly_init2(qB, p, B->length); zmod_poly_t Bm1; _zmod_poly_attach_truncate(Bm1, B, B->length - 1); long coeff = A->length - 1; zmod_poly_set(R, A); if (A->length >= B->length) { zmod_poly_fit_length(Q, A->length - B->length + 1); Q->length = A->length - B->length + 1; } else zmod_poly_zero(Q); coeff_Q = Q->coeffs - B->length + 1; #if FLINT_BITS == 64 int small = (FLINT_BIT_COUNT(p) <= FLINT_D_BITS); #endif while (coeff >= (long) B->length - 1) { while ((coeff >= (long) B->length - 1) && (R->coeffs[coeff] == 0L)) { coeff_Q[coeff] = 0L; coeff--; } if (coeff >= (long) B->length - 1) { #if FLINT_BITS == 64 if (small) coeff_Q[coeff] = z_mulmod_precomp(R->coeffs[coeff], lead_inv, p, p_inv); else #endif coeff_Q[coeff] = z_mulmod2_precomp(R->coeffs[coeff], lead_inv, p, p_inv); zmod_poly_scalar_mul(qB, Bm1, coeff_Q[coeff]); zmod_poly_t R_sub; R_sub->p = p; R_sub->coeffs = R->coeffs + coeff - B->length + 1; R_sub->length = B->length - 1; _zmod_poly_sub(R_sub, R_sub, qB); coeff--; } } R->length = B->length - 1; __zmod_poly_normalise(R); zmod_poly_clear(qB); } void __zmod_poly_divrem_classical_mod_last(zmod_poly_t Q, zmod_poly_t R, zmod_poly_t A, zmod_poly_t B) { if (B->length == 0) { printf("Error: Divide by zero\n"); abort(); } if (A->length < B->length) { zmod_poly_zero(Q); zmod_poly_set(R, A); return; } unsigned long p = B->p; double p_inv = B->p_inv; unsigned long lead_inv = z_invert(B->coeffs[B->length - 1], p); unsigned long * coeff_Q; zmod_poly_t qB; zmod_poly_init2(qB, p, B->length); zmod_poly_t Bm1; _zmod_poly_attach_truncate(Bm1, B, B->length - 1); long coeff = A->length - 1; zmod_poly_set(R, A); if (A->length >= B->length) { zmod_poly_fit_length(Q, A->length - B->length + 1); Q->length = A->length - B->length + 1; } else zmod_poly_zero(Q); coeff_Q = Q->coeffs - B->length + 1; while (coeff >= (long) B->length - 1) { R->coeffs[coeff] = z_mod_precomp(R->coeffs[coeff], p, p_inv); while ((coeff >= (long) B->length - 1) && (R->coeffs[coeff] == 0L)) { coeff_Q[coeff] = 0L; coeff--; if (coeff >= (long) B->length - 1) { R->coeffs[coeff] = z_mod_precomp(R->coeffs[coeff], p, p_inv); } } if (coeff >= (long) B->length - 1) { coeff_Q[coeff] = z_mulmod_precomp(R->coeffs[coeff], lead_inv, p, p_inv); __zmod_poly_scalar_mul_without_mod(qB, Bm1, z_negmod(coeff_Q[coeff], p)); zmod_poly_t R_sub; R_sub->p = p; R_sub->coeffs = R->coeffs + coeff - B->length + 1; R_sub->length = B->length - 1; _zmod_poly_add_without_mod(R_sub, R_sub, qB); coeff--; } } R->length = B->length - 1; __zmod_poly_scalar_mod(R); __zmod_poly_normalise(R); zmod_poly_clear(qB); } /* Classical basecase division, without remainder Requires that the leading coefficient be invertible modulo B->p */ void zmod_poly_div_classical(zmod_poly_t Q, zmod_poly_t A, zmod_poly_t B) { if (B->length == 0) { printf("Error: Divide by zero\n"); abort(); } if (A->length < B->length) { zmod_poly_zero(Q); return; } unsigned long p = B->p; if (2*FLINT_BIT_COUNT(p) + FLINT_BIT_COUNT(A->length - B->length + 1) <= FLINT_D_BITS) { __zmod_poly_div_classical_mod_last(Q, A, B); return; } double p_inv = B->p_inv; unsigned long lead_inv = z_invert(B->coeffs[B->length - 1], p); unsigned long * coeff_Q; zmod_poly_t qB; zmod_poly_init2(qB, p, B->length); zmod_poly_t R; zmod_poly_init(R, p); zmod_poly_t Bm1; _zmod_poly_attach_truncate(Bm1, B, B->length - 1); long coeff = A->length - 1; zmod_poly_set(R, A); if (A->length >= B->length) { zmod_poly_fit_length(Q, A->length - B->length + 1); Q->length = A->length - B->length + 1; } else zmod_poly_zero(Q); coeff_Q = Q->coeffs - B->length + 1; #if FLINT_BITS == 64 int small = (FLINT_BIT_COUNT(p) <= FLINT_D_BITS); #endif while (coeff >= (long) B->length - 1) { while ((coeff >= (long) B->length - 1) && (R->coeffs[coeff] == 0L)) { coeff_Q[coeff] = 0L; coeff--; } if (coeff >= (long) B->length - 1) { #if FLINT_BITS == 64 if (small) coeff_Q[coeff] = z_mulmod_precomp(R->coeffs[coeff], lead_inv, p, p_inv); else #endif coeff_Q[coeff] = z_mulmod2_precomp(R->coeffs[coeff], lead_inv, p, p_inv); if (coeff >= (long) B->length) { zmod_poly_scalar_mul(qB, Bm1, coeff_Q[coeff]); zmod_poly_t R_sub; R_sub->p = p; R_sub->coeffs = R->coeffs + coeff - B->length + 1; R_sub->length = B->length - 1; _zmod_poly_sub(R_sub, R_sub, qB); } coeff--; } } zmod_poly_clear(R); zmod_poly_clear(qB); } void __zmod_poly_div_classical_mod_last(zmod_poly_t Q, zmod_poly_t A, zmod_poly_t B) { if (B->length == 0) { printf("Error: Divide by zero\n"); abort(); } if (A->length < B->length) { zmod_poly_zero(Q); return; } unsigned long p = B->p; double p_inv = B->p_inv; unsigned long lead_inv = z_invert(B->coeffs[B->length - 1], p); unsigned long * coeff_Q; zmod_poly_t qB; zmod_poly_init2(qB, p, B->length); zmod_poly_t R; zmod_poly_init(R, p); zmod_poly_t Bm1; _zmod_poly_attach_truncate(Bm1, B, B->length - 1); long coeff = A->length - 1; zmod_poly_set(R, A); if (A->length >= B->length) { zmod_poly_fit_length(Q, A->length - B->length + 1); Q->length = A->length - B->length + 1; } else zmod_poly_zero(Q); coeff_Q = Q->coeffs - B->length + 1; while (coeff >= (long) B->length - 1) { R->coeffs[coeff] = z_mod_precomp(R->coeffs[coeff], p, p_inv); while ((coeff >= (long) B->length - 1) && (R->coeffs[coeff] == 0L)) { coeff_Q[coeff] = 0L; coeff--; if (coeff >= (long) B->length - 1) R->coeffs[coeff] = z_mod_precomp(R->coeffs[coeff], p, p_inv); } if (coeff >= (long) B->length - 1) { coeff_Q[coeff] = z_mulmod_precomp(R->coeffs[coeff], lead_inv, p, p_inv); if (coeff >= (long) B->length) { zmod_poly_scalar_mul(qB, Bm1, z_negmod(coeff_Q[coeff], p)); zmod_poly_t R_sub; R_sub->p = p; R_sub->coeffs = R->coeffs + coeff - B->length + 1; R_sub->length = B->length - 1; _zmod_poly_add_without_mod(R_sub, R_sub, qB); } coeff--; } } zmod_poly_clear(R); zmod_poly_clear(qB); } /* Divide and conquer division */ void zmod_poly_div_divconquer_recursive(zmod_poly_t Q, zmod_poly_t BQ, zmod_poly_t A, zmod_poly_t B) { if (A->length < B->length) { zmod_poly_zero(Q); zmod_poly_zero(BQ); return; } // A->length is now >= B->length unsigned long p = A->p; unsigned long crossover = 16; unsigned long crossover2 = 128; if ((B->length <= crossover) || ((A->length > 2*B->length - 1) && (A->length < crossover2))) { /* Use the classical algorithm to compute the quotient and remainder, then use A-R to compute BQ */ zmod_poly_t Rb; zmod_poly_init(Rb, p); zmod_poly_divrem_classical(Q, Rb, A, B); zmod_poly_sub(BQ, A, Rb); zmod_poly_clear(Rb); return; } zmod_poly_t d1, d2, d3, d4, p1, q1, q2, dq1, dq2, d1q1, d2q1, d2q2, d1q2, t, temp; unsigned long n1 = (B->length+1)/2; unsigned long n2 = B->length - n1; /* We let B = d1*x^n2 + d2 */ _zmod_poly_attach_shift(d1, B, n2); _zmod_poly_attach_truncate(d2, B, n2); _zmod_poly_attach_shift(d3, B, n1); _zmod_poly_attach_truncate(d4, B, n1); if (A->length <= n2 + B->length - 1) { /* If A->length <= B->length + n2 - 1 then only a single quotient is needed We do a division of at most 2*n2 - 1 terms by n2 terms yielding a quotient of at most n2 terms */ // Set p1 to be A without the last // n1 coefficients // 2*n2-1 >= p1->length > 0 zmod_poly_init(p1, p); zmod_poly_right_shift(p1, A, n1); // Since A was normalised, then p1 will be // d3 is the leading terms of B and so must be normalised // d3 is length n2, so we get at most n2 terms in the quotient zmod_poly_init(d1q1, p); zmod_poly_div_divconquer_recursive(Q, d1q1, p1, d3); zmod_poly_clear(p1); /* Compute d2q1 = Q*d4 It is of length at most n1+n2-1 terms */ zmod_poly_init(d2q1, p); zmod_poly_mul(d2q1, Q, d4); /* Compute BQ = d1q1*x^n1 + d2q1 It has length at most n1+2*n2-1 */ zmod_poly_left_shift(BQ, d1q1, n1); zmod_poly_clear(d1q1); zmod_poly_add(BQ, BQ, d2q1); zmod_poly_clear(d2q1); return; } if (A->length > 2*B->length - 1) { // We shift A right until it is length 2*B->length -1 // We call this polynomial p1 unsigned long shift = A->length - 2*B->length + 1; _zmod_poly_attach_shift(p1, A, shift); /* Set q1 to p1 div B This is a 2*B->length-1 by B->length division so q1 ends up being at most length B->length d1q1 = d1*q1 is length at most 2*B->length-1 */ zmod_poly_init(d1q1, p); zmod_poly_init(q1, p); zmod_poly_div_divconquer_recursive(q1, d1q1, p1, B); /* Compute dq1 = d1*q1*x^shift dq1 is then of length at most A->length dq1 is normalised since d1q1 was */ zmod_poly_init(dq1, p); zmod_poly_left_shift(dq1, d1q1, shift); zmod_poly_clear(d1q1); /* Compute t = A - dq1 The first B->length coefficients cancel if the division is exact, leaving A->length - B->length significant terms otherwise we truncate at this length */ zmod_poly_init(t, p); zmod_poly_sub(t, A, dq1); zmod_poly_truncate(t, A->length - B->length); /* Compute q2 = t div B It is a smaller division than the original since t->length <= A->length-B->length */ zmod_poly_init(q2, p); zmod_poly_init(dq2, p); zmod_poly_div_divconquer_recursive(q2, dq2, t, B); zmod_poly_clear(t); /* Write out Q = q1*x^shift + q2 Q has length at most B->length+shift Note q2 has length at most shift since at most it is an A->length-B->length by B->length division */ zmod_poly_left_shift(Q, q1, shift); zmod_poly_clear(q1); zmod_poly_add(Q, Q, q2); zmod_poly_clear(q2); /* Write out BQ = dq1 + dq2 */ zmod_poly_add(BQ, dq1, dq2); zmod_poly_clear(dq1); zmod_poly_clear(dq2); return; } // n2 + B->length - 1 < A->length <= n1 + n2 + B->length - 1 /* We let A = a1*x^(n1+2*n2-1) + a2*x^(n1+n2-1) + a3 where a1 is length at most n1 (and at least 1), a2 is length n2 and a3 is length n1+n2-1 We set p1 = a1*x^(n1-1)+ other terms, so it has length at most 2*n1-1 */ zmod_poly_init(p1, p); zmod_poly_right_shift(p1, A, 2*n2); /* Set q1 to p1 div d1 This is at most a 2*n1-1 by n1 division so q1 ends up being at most length n1 d1q1 = d1*q1 is length at most 2*n1-1 */ zmod_poly_init(d1q1, p); zmod_poly_init(q1, p); zmod_poly_div_divconquer_recursive(q1, d1q1, p1, d1); zmod_poly_clear(p1); /* Compute d2q1 = d2*q1 which ends up being at most length n1+n2-1 */ zmod_poly_init(d2q1, p); zmod_poly_mul(d2q1, d2, q1); /* Compute dq1 = d1*q1*x^n2 + d2*q1 dq1 is then of length at most 2*n1+n2-1 */ zmod_poly_init(dq1, p); zmod_poly_left_shift(dq1, d1q1, n2); zmod_poly_clear(d1q1); zmod_poly_add(dq1, dq1, d2q1); zmod_poly_clear(d2q1); /* Compute t = p1*x^(n1+n2-1) + p2*x^(n1-1) - dq1 which has length at most 2*n1+n2-1, but we are not interested in up to the first n1 coefficients, so it has effective length at most n1+n2-1 */ zmod_poly_init(t, p); zmod_poly_right_shift(t, A, n2); zmod_poly_sub(t, t, dq1); zmod_poly_truncate(t, B->length - 1); /* Compute q2 = t div d1 It is at most an n1+n2-1 by n1 division, so the length of q2 will be at most n2 Also compute d1q2 of length at most n1+n2-1 */ zmod_poly_init(d1q2, p); zmod_poly_init(q2, p); zmod_poly_div_divconquer_recursive(q2, d1q2, t, d1); zmod_poly_clear(t); /* Compute d2q2 = d2*q2 which is of length at most n1+n2-1 */ zmod_poly_init(d2q2, p); zmod_poly_mul(d2q2, d2, q2); /* Compute dq2 = d1*q2*x^n2 + d2q2 which is of length at most n1+2*n2-1 */ zmod_poly_init(dq2, p); zmod_poly_left_shift(dq2, d1q2, n2); zmod_poly_clear(d1q2); zmod_poly_add(dq2, dq2, d2q2); zmod_poly_clear(d2q2); /* Write out Q = q1*x^n2 + q2 Q has length at most n1+n2 */ zmod_poly_left_shift(Q, q1, n2); zmod_poly_clear(q1); zmod_poly_add(Q, Q, q2); zmod_poly_clear(q2); /* Write out BQ = dq1*x^n2 + dq2 BQ has length at most 2*(n1+n2)-1 */ zmod_poly_left_shift(BQ, dq1, n2); zmod_poly_add(BQ, BQ, dq2); zmod_poly_clear(dq2); zmod_poly_clear(dq1); } void zmod_poly_div_divconquer(zmod_poly_t Q, zmod_poly_t A, zmod_poly_t B) { if (A->length < B->length) { zmod_poly_zero(Q); return; } // A->length is now >= B->length unsigned long crossover = 16; unsigned long crossover2 = 256; unsigned long p = B->p; if ((B->length <= crossover) || ((A->length > 2*B->length - 1) && (A->length < crossover2))) { zmod_poly_div_classical(Q, A, B); return; } // B->length is now >= crossover (16) zmod_poly_t d1, d2, d3, p1, q1, q2, dq1, dq2, d1q1, d2q1, d2q2, d1q2, t, temp; unsigned long n1 = (B->length+1)/2; unsigned long n2 = B->length - n1; // n1 and n2 are at least 4 /* We let B = d1*x^n2 + d2 d1 is of length n1 and d2 of length n2 */ _zmod_poly_attach_shift(d1, B, n2); _zmod_poly_attach_truncate(d2, B, n2); _zmod_poly_attach_shift(d3, B, n1); if (A->length <= n2 + B->length - 1) { /* If A->length <= B->length + n2 - 1 then only a single quotient is needed We do a division of at most 2*n2 - 1 terms by n2 terms yielding a quotient of at most n2 terms */ // Set p1 to be A without the last // n1 coefficients // 2*n2-1 >= p1->length > 0 zmod_poly_init(p1, p); zmod_poly_right_shift(p1, A, n1); // Since A was normalised, then p1 will be // d3 is the leading terms of B and so must be normalised // d3 is length n2, so we get at most n2 terms in the quotient zmod_poly_div_divconquer(Q, p1, d3); zmod_poly_clear(p1); return; } if (A->length > 2*B->length - 1) { // We shift A right until it is length 2*B->length -1 // We call this polynomial p1 unsigned long shift = A->length - 2*B->length + 1; _zmod_poly_attach_shift(p1, A, shift); /* Set q1 to p1 div B This is a 2*B->length-1 by B->length division so q1 ends up being at most length B->length d1q1 = low(d1*q1) is length at most 2*B->length-1 We discard the lower B->length-1 terms */ zmod_poly_init(d1q1, p); zmod_poly_init(q1, p); zmod_poly_div_divconquer_recursive(q1, d1q1, p1, B); /* Compute dq1 = d1*q1*x^shift dq1 is then of length at most A->length dq1 is normalised since d1q1 was */ zmod_poly_init(dq1, p); zmod_poly_left_shift(dq1, d1q1, shift); zmod_poly_clear(d1q1); /* Compute t = A - dq1 The first B->length coefficients cancel if the division is exact, leaving A->length - B->length significant terms otherwise we truncate at this length */ zmod_poly_init(t, p); zmod_poly_sub(t, A, dq1); zmod_poly_clear(dq1); zmod_poly_truncate(t, A->length - B->length); /* Compute q2 = t div B It is a smaller division than the original since t->length <= A->length-B->length */ zmod_poly_init(q2, p); zmod_poly_div_divconquer(q2, t, B); zmod_poly_clear(t); /* Write out Q = q1*x^shift + q2 Q has length at most B->length+shift Note q2 has length at most shift since at most it is an A->length-B->length by B->length division */ zmod_poly_left_shift(Q, q1, shift); zmod_poly_clear(q1); zmod_poly_add(Q, Q, q2); zmod_poly_clear(q2); return; } // We now have n2 + B->length - 1 < A->length <= 2*B->length - 1 /* We let A = a1*x^(n1+2*n2-1) + a2*x^(n1+n2-1) + a3 where a1 is length at most n1 and a2 is length n2 and a3 is length n1+n2-1 */ // Set p1 to a1*x^(n1-1) + other terms // It has length at most 2*n1-1 and is normalised // A->length >= 2*n2 zmod_poly_init(p1, p); zmod_poly_right_shift(p1, A, 2*n2); /* Set q1 to p1 div d1 This is at most a 2*n1-1 by n1 division so q1 ends up being at most length n1 d1q1 = low(d1*q1) is length at most n1-1 Thus we have discarded the leading n1 terms (at most) */ zmod_poly_init(d1q1, p); zmod_poly_init(q1, p); zmod_poly_div_divconquer_recursive(q1, d1q1, p1, d1); zmod_poly_clear(p1); /* Compute d2q1 = d2*q1 with low n1 - 1 terms zeroed d2*q1 is length at most n1+n2-1 leaving at most n2 non-zero terms to the left */ zmod_poly_init(d2q1, p); zmod_poly_mul_trunc_left_n(d2q1, d2, q1, n1 - 1); /* Compute dq1 = d1*q1*x^n2 + d2*q1 dq1 is then of length at most 2*n1+n2-1 but may have any length below this */ zmod_poly_init(dq1, p); zmod_poly_left_shift(dq1, d1q1, n2); zmod_poly_clear(d1q1); zmod_poly_add(dq1, dq1, d2q1); /* Compute t = a1*x^(2*n2-1) + a2*x^(n2-1) - dq1 after shifting dq1 to the right by (n1-n2) which has length at most 2*n1+n2-1, but we discard up to n1 coefficients, so it has effective length 2*n2-1 with the last n2-1 coefficients ignored. Thus there are at most n2 significant coefficients */ zmod_poly_init(t, p); zmod_poly_right_shift(t, A, n1); _zmod_poly_attach_shift(temp, dq1, n1-n2); zmod_poly_sub(t, t, temp); zmod_poly_truncate(t, 2*n2-1); /* Compute q2 = t div d3 It is at most a 2*n2-1 by n2 division, so the length of q2 will be n2 at most */ zmod_poly_init(q2, p); zmod_poly_div_divconquer(q2, t, d3); zmod_poly_clear(t); zmod_poly_clear(dq1); zmod_poly_clear(d2q1); /* Write out Q = q1*x^n2 + q2 Q has length n1+n2 */ zmod_poly_left_shift(Q, q1, n2); zmod_poly_clear(q1); zmod_poly_add(Q, Q, q2); zmod_poly_clear(q2); } void zmod_poly_divrem_divconquer(zmod_poly_t Q, zmod_poly_t R, zmod_poly_t A, zmod_poly_t B) { zmod_poly_t QB; zmod_poly_init(QB, B->p); zmod_poly_div_divconquer_recursive(Q, QB, A, B); zmod_poly_sub(R, A, QB); zmod_poly_clear(QB); } /**************************************************************************** Newton Inversion ****************************************************************************/ #define FLINT_ZMOD_NEWTON_INVERSE_BASECASE_CUTOFF 64 //32 /* Compute the polynomial X^{2n} / Q. Used by Newton iteration to bootstrap power series inversion. Q must have length >= n and leading coefficient invertible with respect to the modulus. */ void zmod_poly_newton_invert_basecase(zmod_poly_t Q_inv, zmod_poly_t Q, unsigned long n) { zmod_poly_t X2n, Qn; zmod_poly_init2(X2n, Q->p, 2*n-1); zmod_poly_set_coeff_ui(X2n, 2*n - 2, 1L); _zmod_poly_attach_shift(Qn, Q, Q->length - n); zmod_poly_div_divconquer(Q_inv, X2n, Qn); zmod_poly_clear(X2n); } /* Recursively compute 1 / Q mod x^n using Newton iteration Assumes Q is given as a power series to the full precision n required with invertible constant term with respect to the modulus */ #define FLINT_ZMOD_NEWTON_INVERSE_CACHE_CUTOFF 2000 void zmod_poly_newton_invert(zmod_poly_t Q_inv, zmod_poly_t Q, unsigned long n) { if (n < FLINT_ZMOD_NEWTON_INVERSE_BASECASE_CUTOFF) { zmod_poly_t Q_rev; zmod_poly_init2(Q_rev, Q->p, n); _zmod_poly_reverse(Q_rev, Q, n); zmod_poly_newton_invert_basecase(Q_inv, Q_rev, n); zmod_poly_reverse(Q_inv, Q_inv, n); zmod_poly_clear(Q_rev); return; } unsigned long m = (n+1)/2; unsigned long p = Q->p; zmod_poly_t g0, prod, prod2; zmod_poly_init(g0, p); zmod_poly_init(prod, p); zmod_poly_init(prod2, p); zmod_poly_newton_invert(g0, Q, m); #if USE_MIDDLE_PRODUCT if (n < FLINT_ZMOD_NEWTON_INVERSE_CACHE_CUTOFF) { #endif zmod_poly_mul_trunc_n(prod, Q, g0, n); prod->coeffs[0] = z_submod(prod->coeffs[0], 1L, p); //zmod_poly_mul_trunc_n(prod2, prod, g0, n); zmod_poly_t prod_s; zmod_poly_attach_shift(prod_s, prod, (n+1)/2); zmod_poly_t prod2_s; zmod_poly_fit_length(prod2, n); zmod_poly_attach_shift(prod2_s, prod2, (n+1)/2); _zmod_poly_mul_KS_trunc(prod2_s, prod_s, g0, 0, n - (n+1)/2); prod2->length = (n+1)/2 + prod2_s->length; for (unsigned long i = 0; i < (n+1)/2; i++) prod2->coeffs[i] = 0L; #if USE_MIDDLE_PRODUCT } else { zmod_poly_precomp_t pre; zmod_poly_mul_trunc_n_precomp_init(pre, g0, 0, (n+1)/2); zmod_poly_fit_length(prod, n); _zmod_poly_mul_KS_middle_precomp(prod, Q, pre, 0, n); zmod_poly_t prod_s; zmod_poly_attach_shift(prod_s, prod, (n+1)/2); zmod_poly_t prod2_s; zmod_poly_fit_length(prod2, n); zmod_poly_attach_shift(prod2_s, prod2, (n+1)/2); _zmod_poly_mul_KS_trunc_precomp(prod2_s, prod_s, pre, 0, n - (n+1)/2); zmod_poly_precomp_clear(pre); prod2->length = (n+1)/2 + prod2_s->length; for (unsigned long i = 0; i < (n+1)/2; i++) prod2->coeffs[i] = 0L; } #endif zmod_poly_sub(Q_inv, g0, prod2); zmod_poly_clear(prod2); zmod_poly_clear(prod); zmod_poly_clear(g0); } /**************************************************************************** Newton Division ****************************************************************************/ /* Yields a precision n power series quotient of A by B assuming A and B are both given to precision n and B is normalised (i.e. constant coefficient is invertible). */ void zmod_poly_div_series(zmod_poly_t Q, zmod_poly_t A, zmod_poly_t B, unsigned long n) { zmod_poly_t Ain, Bin; unsigned long p = B->p; if (A == Q) { zmod_poly_init(Ain, p); zmod_poly_set(Ain, A); } else _zmod_poly_attach(Ain, A); if (B == Q) { zmod_poly_init(Bin, p); zmod_poly_set(Bin, B); } else _zmod_poly_attach(Bin, B); zmod_poly_t B_inv; zmod_poly_init(B_inv, p); zmod_poly_newton_invert(B_inv, Bin, n); zmod_poly_mul_trunc_n(Q, B_inv, Ain, n); zmod_poly_clear(B_inv); if (A == Q) zmod_poly_clear(Ain); if (B == Q) zmod_poly_clear(Bin); } /* Polynomial division of A by B The remainder is not computed, to save time */ void zmod_poly_div_newton(zmod_poly_t Q, zmod_poly_t A, zmod_poly_t B) { if (A->length < B->length) { zmod_poly_zero(Q); return; } unsigned long p = B->p; zmod_poly_t A_rev, B_rev; zmod_poly_init2(A_rev, p, A->length); zmod_poly_init2(B_rev, p, B->length); zmod_poly_reverse(A_rev, A, A->length); zmod_poly_reverse(B_rev, B, B->length); zmod_poly_div_series(Q, A_rev, B_rev, A->length - B->length + 1); zmod_poly_reverse(Q, Q, A->length - B->length + 1); zmod_poly_clear(B_rev); zmod_poly_clear(A_rev); } void zmod_poly_divrem_newton(zmod_poly_t Q, zmod_poly_t R, zmod_poly_t A, zmod_poly_t B) { if (A->length < B->length) { zmod_poly_zero(Q); zmod_poly_set(R, A); return; } zmod_poly_t QB, A_trunc; zmod_poly_init(QB, B->p); zmod_poly_div_newton(Q, A, B); zmod_poly_mul_trunc_n(QB, Q, B, B->length - 1); _zmod_poly_attach_truncate(A_trunc, A, B->length - 1); zmod_poly_sub(R, A_trunc, QB); zmod_poly_clear(QB); } void zmod_poly_gcd(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2) { zmod_poly_t Q, R, A, B; if ((poly1->length == 0) || (poly2->length == 0)) { zmod_poly_zero(res); return; } if ((poly1->length == 1) || (poly2->length == 1)) { zmod_poly_set_coeff_ui(res, 0, 1L); res->length = 1; return; } unsigned long p = poly1->p; zmod_poly_init(Q, p); zmod_poly_init(R, p); if (poly1->length > poly2->length) { _zmod_poly_attach(A, poly1); _zmod_poly_attach(B, poly2); } else { _zmod_poly_attach(A, poly2); _zmod_poly_attach(B, poly1); } int steps = 1; while (B->length > 1) { zmod_poly_divrem(Q, R, A, B); zmod_poly_swap(A, B); if (steps > 2) zmod_poly_clear(B); _zmod_poly_attach(B, R); zmod_poly_init(R, p); steps++; } if (B->length == 1) { zmod_poly_set_coeff_ui(res, 0, 1L); res->length = 1; } else zmod_poly_set(res, A); if (steps > 2) { zmod_poly_clear(A); } zmod_poly_clear(B); zmod_poly_clear(R); zmod_poly_clear(Q); } /* Computes poly1^(-1) mod poly2 Assumes poly1 is not zero and is already reduced mod poly2 */ int zmod_poly_gcd_invert(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2) { zmod_poly_t Q, R, A, B, u1, u2, prod; unsigned long a, coprime; if (poly1->length == 0) { printf("FLINT Exception: Divide by zero\n"); abort(); } if (poly1->length == 1) { z_gcd_invert(&a, poly1->coeffs[0], poly2->p); zmod_poly_set_coeff_ui(res, 0, a); res->length = 1; return 1; } unsigned long p = poly1->p; zmod_poly_init(Q, p); zmod_poly_init(R, p); zmod_poly_init(u1, p); zmod_poly_init(u2, p); zmod_poly_init(prod, p); zmod_poly_set_coeff_ui(u2, 0, 1L); u2->length = 1; zmod_poly_zero(u1); _zmod_poly_attach(A, poly2); _zmod_poly_attach(B, poly1); int steps = 1; while (B->length > 1) { zmod_poly_divrem(Q, R, A, B); zmod_poly_mul(prod, Q, u2); zmod_poly_swap(u1, u2); zmod_poly_sub(u2, u2, prod); zmod_poly_swap(A, B); if (steps > 2) zmod_poly_clear(B); _zmod_poly_attach(B, R); zmod_poly_init(R, p); steps++; } if (B->length == 1) { zmod_poly_swap(u1, u2); zmod_poly_set(res, u1); zmod_poly_scalar_mul(res, res, z_invert(B->coeffs[0], p)); coprime = 1; } else { coprime = 0; } if (steps > 2) { zmod_poly_clear(A); } zmod_poly_clear(u1); zmod_poly_clear(u2); zmod_poly_clear(prod); zmod_poly_clear(B); zmod_poly_clear(R); zmod_poly_clear(Q); return coprime; } /* Compute res = gcd(poly1, poly2) Find s and t such that res = s*poly1 + t*poly2 */ void zmod_poly_xgcd(zmod_poly_t res, zmod_poly_t s, zmod_poly_t t, zmod_poly_t poly1, zmod_poly_t poly2) { zmod_poly_t Q, R, A, B, u1, u2, v1, v2, prod; unsigned long a; if ((poly1->length == 0) || (poly2->length == 0)) { zmod_poly_zero(s); zmod_poly_zero(t); zmod_poly_zero(res); return; } if (poly1->length == 1) { a = z_invert(poly1->coeffs[0], poly2->p); zmod_poly_set_coeff_ui(s, 0, a); s->length = 1; zmod_poly_set_coeff_ui(res, 0, 1L); res->length = 1; zmod_poly_zero(t); return; } if (poly2->length == 1) { a = z_invert(poly2->coeffs[0], poly2->p); zmod_poly_set_coeff_ui(t, 0, a); t->length = 1; zmod_poly_set_coeff_ui(res, 0, 1L); res->length = 1; zmod_poly_zero(s); return; } unsigned long p = poly1->p; zmod_poly_init(Q, p); zmod_poly_init(R, p); zmod_poly_init(u1, p); zmod_poly_init(u2, p); zmod_poly_init(v1, p); zmod_poly_init(v2, p); zmod_poly_init(prod, p); zmod_poly_set_coeff_ui(u1, 0, 1L); u1->length = 1; zmod_poly_zero(u2); zmod_poly_set_coeff_ui(v2, 0, 1L); v2->length = 1; zmod_poly_zero(v1); if (poly1->length > poly2->length) { _zmod_poly_attach(A, poly1); _zmod_poly_attach(B, poly2); } else { _zmod_poly_attach(A, poly2); _zmod_poly_attach(B, poly1); zmod_poly_swap(u1, u2); zmod_poly_swap(v1, v2); } int steps = 1; while (B->length > 1) { zmod_poly_divrem(Q, R, A, B); zmod_poly_mul(prod, Q, u2); zmod_poly_swap(u1, u2); zmod_poly_sub(u2, u2, prod); zmod_poly_mul(prod, Q, v2); zmod_poly_swap(v1, v2); zmod_poly_sub(v2, v2, prod); zmod_poly_swap(A, B); if (steps > 2) zmod_poly_clear(B); _zmod_poly_attach(B, R); zmod_poly_init(R, p); steps++; } if (B->length == 1) { zmod_poly_swap(u1, u2); zmod_poly_swap(v1, v2); zmod_poly_set(res, B); } else zmod_poly_set(res, A); zmod_poly_set(s, u1); zmod_poly_set(t, v1); zmod_poly_scalar_mul(s, s, z_invert(res->coeffs[res->length-1], p)); zmod_poly_scalar_mul(t, t, z_invert(res->coeffs[res->length-1], p)); zmod_poly_make_monic(res, res); if (steps > 2) { zmod_poly_clear(A); } zmod_poly_clear(u1); zmod_poly_clear(u2); zmod_poly_clear(v1); zmod_poly_clear(v2); zmod_poly_clear(prod); zmod_poly_clear(B); zmod_poly_clear(R); zmod_poly_clear(Q); } unsigned long zmod_poly_resultant_euclidean(zmod_poly_t a, zmod_poly_t b) { unsigned long res; if ((a->length == 0) || (b->length == 0)) { return 0; } if ((a->length == 1) || (b->length == 1)) { return 1; } unsigned long p = a->p; double p_inv = a->p_inv; unsigned long long l0, l1, l2; unsigned long lc; res = 1L; zmod_poly_t u, v, q; zmod_poly_init(u, p); zmod_poly_init(v, p); zmod_poly_init(q, p); zmod_poly_set(u, a); zmod_poly_set(v, b); for (;;) { l0 = u->length; l1 = v->length; lc = v->coeffs[v->length - 1]; zmod_poly_divrem(q, u, u, v); zmod_poly_swap(u, v); l2 = v->length; if (l2 >= 1) { lc = z_powmod2_precomp(lc, l0 - l2, p, p_inv); res = z_mulmod2_precomp(res, lc, p, p_inv); if (((l0 | l1) & 1) == 0) { if (res) res = p - res; } } else { if (l1 == 1) { lc = z_powmod2_precomp(lc, l0 - 1, p, p_inv); res = z_mulmod2_precomp(res, lc, p, p_inv); } else res = 0L; break; } } zmod_poly_clear(q); zmod_poly_clear(u); zmod_poly_clear(v); return res; } flint-1.011/NTL-interface-test.cpp0000644017361200017500000001324711025357254016606 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** NTL-interface-test.cpp: Test functions for conversion between NTL and FLINT format Copyright (C) 2007, William Hart *****************************************************************************/ #include #include #include #include #include #include "NTL-interface.h" #include "fmpz.h" #include "fmpz_poly.h" #include "flint.h" #include "mpz_poly.h" #include "memory-manager.h" #include "test-support.h" #define VARY_BITS 0 #define SIGNS 1 #define SPARSE 1 #define DEBUG 0 // prints debug information #define DEBUG2 1 unsigned long randint(unsigned long randsup) { if (randsup == 0) return 0; static unsigned long randval = 4035456057U; randval = ((unsigned long)randval*1025416097U+286824428U)%(unsigned long)4294967291U; return (unsigned long)randval%randsup; } void randpoly(mpz_poly_t pol, long length, unsigned long maxbits) { unsigned long bits; mpz_t temp; mpz_init(temp); mpz_poly_zero(pol); for (long i = 0; i < length; i++) { #if VARY_BITS bits = randint(maxbits+1); #else bits = maxbits; #endif if (bits == 0) mpz_set_ui(temp,0); else { #if SPARSE if (randint(10) == 1) mpz_rrandomb(temp, randstate, bits); else mpz_set_ui(temp, 0); #else mpz_rrandomb(temp, randstate, bits); #endif #if SIGNS if (randint(2)) mpz_neg(temp,temp); #endif } mpz_poly_set_coeff(pol, i, temp); } mpz_clear(temp); } void randpoly_unsigned(mpz_poly_t pol, long length, unsigned long maxbits) { unsigned long bits; mpz_t temp; mpz_init(temp); mpz_poly_zero(pol); for (long i = 0; i < length; i++) { #if VARY_BITS bits = randint(maxbits+1); #else bits = maxbits; #endif if (bits == 0) mpz_set_ui(temp,0); else { mpz_rrandomb(temp, randstate, bits); } mpz_poly_set_coeff(pol, i, temp); } mpz_clear(temp); } #define RUN_TEST(targetfunc) \ printf("Testing " #targetfunc "()... "); \ fflush(stdout); \ success = test_##targetfunc(); \ all_success = all_success && success; \ printf(success ? "ok\n" : "FAIL!\n"); int test_ZZ_to_fmpz() { int result = 1; unsigned long limbs, limbs2, randlimbs; fmpz_t int1, int2; ZZ z; for (unsigned long i = 0; (i < 1000) && (result == 1); i++) { limbs = randint(100)+1; randlimbs = randint(limbs); int1 = fmpz_init(limbs); fmpz_random_limbs2(int1, randlimbs); fmpz_to_ZZ(z, int1); limbs2 = ZZ_limbs(z); int2 = fmpz_init(limbs2); ZZ_to_fmpz(int2, z); result = fmpz_equal(int1, int2); fmpz_clear(int1); fmpz_clear(int2); } } int test_ZZX_to_fmpz_poly() { mpz_poly_t test_poly; ZZX ZZX_poly; fmpz_poly_t test_fmpz_poly, test_fmpz_poly2; int result = 1; unsigned long bits, length; mpz_poly_init(test_poly); for (unsigned long count1 = 1; (count1 < 300) && (result == 1) ; count1++) { bits = random_ulong(1000) + 1; fmpz_poly_init2(test_fmpz_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init(test_fmpz_poly2); for (unsigned long count2 = 0; (count2 < 10) && (result == 1); count2++) { length = random_ulong(1000); #if DEBUG printf("%ld, %ld\n",length, bits); #endif fmpz_poly_fit_length(test_fmpz_poly, length); randpoly(test_poly, length, bits); mpz_poly_normalise(test_poly); mpz_poly_to_fmpz_poly(test_fmpz_poly, test_poly); fmpz_poly_to_ZZX(ZZX_poly, test_fmpz_poly); ZZX_to_fmpz_poly(test_fmpz_poly2, ZZX_poly); #if DEBUG fmpz_poly_print(test_fmpz_poly); printf("\n"); fmpz_poly_print(test_fmpz_poly2); printf("\n"); #endif result = fmpz_poly_equal(test_fmpz_poly, test_fmpz_poly2); } fmpz_poly_clear(test_fmpz_poly); fmpz_poly_clear(test_fmpz_poly2); } mpz_poly_clear(test_poly); return result; } void fmpz_poly_test_all() { int success, all_success = 1; RUN_TEST(ZZ_to_fmpz); RUN_TEST(ZZX_to_fmpz_poly); printf(all_success ? "\nAll tests passed\n" : "\nAt least one test FAILED!\n"); } int main() { test_support_init(); fmpz_poly_test_all(); test_support_cleanup(); flint_stack_cleanup(); return 0; } flint-1.011/memory-manager.c0000644017361200017500000004166411025357254015622 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** memory-manager.c: FLINT-wide memory management Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #include #include #include #include #include "flint.h" #include "memory-manager.h" #define DEBUG 0 // Switches to debugging memory allocators #define DEBUG_PRINT 0 // Prints info about all allocations and releases #if DEBUG /*----------------------------------------------------------------------------------------------- Debug version of stack based memory managers ------------------------------------------------------------------------------------------------*/ void * mempts[200000]; unsigned long upto = 0; void * flint_stack_alloc(unsigned long length) { #if DEBUG_PRINT printf("Allocating %ld limbs\n", length); #endif if (upto == 200000) { printf("Error: no free stack nodes in flint_stack_alloc\n"); abort(); } unsigned long * block = malloc((length+101)*sizeof(unsigned long)); if (block == NULL) { printf("Error: unable to allocate memory in flint_stack_alloc\n"); abort(); } mempts[upto] = (void*) block; upto++; block[0] = length; for (unsigned long i = 0; i < 100; i++) block[length+i+1] = 0; return (void*) (block+1); } void * flint_stack_alloc_bytes(unsigned long bytes) { unsigned long length = ((bytes-1)>>FLINT_LG_BYTES_PER_LIMB)+1; #if DEBUG_PRINT printf("Allocating %ld limbs\n", length); #endif if (upto == 200000) { printf("Error: no free stack nodes in flint_stack_alloc_bytes\n"); abort(); } unsigned long * block = malloc(sizeof(unsigned long)*(length + 101)); if (block == NULL) { printf("Error: unable to allocate memory in flint_stack_alloc_bytes\n"); abort(); } mempts[upto] = (void*) block; upto++; block[0] = length; for (unsigned long i = 0; i < 100; i++) block[length+i+1] = 0L; return (void*) (block+1); } void flint_stack_release() { if (upto == 0) { printf("Error: attempt to free unallocated block in flint_stack_release\n"); abort(); } upto--; unsigned long * block = mempts[upto]; unsigned long length = block[0]; #if DEBUG_PRINT printf("Releasing %ld limbs\n", length); #endif for (unsigned long i = 0; i < 100; i++) if (block[length+i+1] != 0L) { printf("Error: Block overrun detected by stack memory allocator!!\n"); abort(); } free(mempts[upto]); } void * flint_stack_alloc_small(unsigned long length) { return flint_stack_alloc(length); } void flint_stack_release_small(void) { flint_stack_release(); } void flint_stack_cleanup(void) { if (upto) printf("Error: stack allocator detected mismatch on cleanup!\n"); } #else /* Stack based memory manager to allocate an array of limbs of the given length. It returns a (void*) which needs to be typecast to the required object, e.g. mp_limb_t* or mp_limb_t**, etc. Limbs must be released in the reverse order to that in which they were allocated. */ #define EXPIRE_AFTER 3 // 1/4-th number of allocs before expiring unused allocated space #define RESALLOC 100 //allocate this many mp_limb_t's at once to save on overheads typedef struct limb_mem_t //record for managing all allocations of limbs of a certain bitsize { unsigned long remaining; //number of remaining limbs which are available for allocation unsigned long length; //total length of array of limbs this record manages mp_limb_t* point; //pointer to the next available mp_limb_t int expire; //how long till we expire int allocated; //1 if some of the space is allocated, 0 otherwise (will not expire if allocated) struct limb_mem_t* next; //next record struct limb_mem_t* prev; //previous record } limb_mem_t; typedef struct limb_memp_t //record for managing a particular allocation of memory for a limb array { limb_mem_t* point; //which record controls this allocation unsigned long length; //how many limbs allocated } limb_memp_t; limb_mem_t* head_mpn = NULL; //start of doubly linked list of records limb_mem_t* last_mpn = NULL; //last record in linked list limb_memp_t* top_mpn = NULL; //top of stack of limb_memp_t's limb_memp_t* reservoir_mpn; //array of preallocated limb_memp_t's unsigned int rescount_mpn=0; //counter for which limb_memp_t we are upto in the reservoir_mpn // todo: deal with possible out of memory situation when allocating void* flint_stack_alloc(unsigned long length) { static limb_mem_t* curr; static limb_mem_t* temp; static int initialised = 0; //has limb_alloc been initialised static unsigned int currentalloc = 0; //total number of limb_memp_t's in reservoir_mpn static limb_memp_t* tempres; static int check=0; void* alloc_d; check++; //allocate another block of limb_memp_t's if none are currently allocated, or the reservoir_mpn is depleted if (rescount_mpn==currentalloc) // need more limb_memp_t's { if (!initialised) { reservoir_mpn = (limb_memp_t*)malloc(RESALLOC*sizeof(limb_memp_t)); rescount_mpn=0; initialised = 1; currentalloc = RESALLOC; } else { //copy old reservoir_mpn into larger one tempres = reservoir_mpn; reservoir_mpn = (limb_memp_t*)malloc((currentalloc+RESALLOC)*sizeof(limb_memp_t)); memcpy(reservoir_mpn,tempres,currentalloc*sizeof(limb_memp_t)); currentalloc+=RESALLOC; //free old reservoir_mpn free(tempres); } } curr = head_mpn; if (curr != NULL) { do { //search for data of requested size if ((curr->remaining >= length) && (curr->remaining < 2*length))//appropriate unallocated space found, so allocate it { alloc_d = (void*)curr->point; curr->point+=length; curr->remaining-=length; curr->allocated=1; top_mpn=&reservoir_mpn[rescount_mpn]; top_mpn->point=curr; top_mpn->length=length; //check if any remaining nodes have expired, expire them and return if ((check&3)==0) { do { if (!curr->allocated) { curr->expire--; if (curr->expire == 0) { free(curr->point); if (curr==last_mpn) last_mpn = curr->prev; else curr->next->prev = curr->prev; if (curr==head_mpn) head_mpn=curr->next; else (curr->prev->next = curr->next); temp=curr; curr = curr->next; free(temp); } else curr = curr->next; } else curr = curr->next; } while (curr != NULL); } rescount_mpn++; return alloc_d; } //update expiry information for curr if necessary and possibly expire space if (((check&3)==0)&&(!curr->allocated)) { curr->expire--; if (curr->expire == 0) { free(curr->point); if (curr==last_mpn) last_mpn = curr->prev; else curr->next->prev = curr->prev; if (curr==head_mpn) head_mpn=curr->next; else (curr->prev->next = curr->next); temp=curr; curr = curr->next; free(temp); } else curr = curr->next; } else curr = curr->next; } while (curr != NULL); //Nothing suitable found, so initialise data of the requested type //attach to last_mpn->next alloc_d = malloc(length*sizeof(mp_limb_t)); //set up the new record, and last_mpn to point to this new record last_mpn->next = (limb_mem_t*) malloc(sizeof(limb_mem_t)); last_mpn->next->prev = last_mpn; last_mpn=last_mpn->next; last_mpn->point = (mp_limb_t*)alloc_d+length; last_mpn->next = NULL; last_mpn->remaining=0; last_mpn->allocated=1; last_mpn->length=length; top_mpn=&reservoir_mpn[rescount_mpn]; top_mpn->point=last_mpn; top_mpn->length=length; rescount_mpn++; return alloc_d; } /*first time anything has been allocated so do the actual allocation of limbs*/ //set up the new record, and head_mpn and last_mpn to point to this single new record alloc_d = malloc(length*sizeof(mp_limb_t)); head_mpn = (limb_mem_t*) malloc(sizeof(limb_mem_t)); head_mpn->point = (mp_limb_t*)alloc_d+length; head_mpn->next = NULL; head_mpn->prev = NULL; head_mpn->remaining=0; head_mpn->allocated=1; head_mpn->length=length; last_mpn = head_mpn; top_mpn=&reservoir_mpn[rescount_mpn]; top_mpn->point=head_mpn; top_mpn->length=length; rescount_mpn++; return alloc_d; } void* flint_stack_alloc_bytes(unsigned long bytes) { return flint_stack_alloc((bytes-1)/FLINT_BYTES_PER_LIMB+1); } void flint_stack_release() { unsigned long length = top_mpn->length; //adjust record to reflect the fact that the limbs have been released back to the stack top_mpn->point->point-=length; top_mpn->point->remaining+=length; //if no limbs of that size are allocated any more set them to expire if not eventually used if (top_mpn->point->remaining == top_mpn->point->length) { top_mpn->point->allocated=0; top_mpn->point->expire=EXPIRE_AFTER; } //release limb_memp_t back into reservoir_mpn top_mpn--; rescount_mpn--; } /*-----------------------------------------------------------------------------------------------*/ #define FLINT_SMALL_BLOCK_SIZE 10000L mp_limb_t * block_ptr = NULL; unsigned long block_left = 0; void * flint_stack_alloc_small(unsigned long length) { if (length + 1L > block_left) // not enough space left, allocate a new block { if (length + 3L > FLINT_SMALL_BLOCK_SIZE) { printf("Error: attempt to allocate %ld limbs in small stack memory manager!\n", length); abort(); } if (block_ptr == NULL) { block_ptr = (mp_limb_t *) flint_heap_alloc(FLINT_SMALL_BLOCK_SIZE); block_left = FLINT_SMALL_BLOCK_SIZE - 2; block_ptr[0] = 0; block_ptr[1] = (unsigned long) NULL; block_ptr += 2; } else { mp_limb_t * temp = (mp_limb_t *) flint_heap_alloc(FLINT_SMALL_BLOCK_SIZE); temp[0] = block_left; temp[1] = (unsigned long) block_ptr; block_ptr = temp + 2; block_left = FLINT_SMALL_BLOCK_SIZE - 2; } } block_ptr[length] = length; block_ptr += (length+1L); block_left -= (length+1L); return (void *) (block_ptr - (length + 1L)); } void flint_stack_release_small(void) { if (block_left == FLINT_SMALL_BLOCK_SIZE - 2) { block_ptr -= 2; block_left = block_ptr[0]; mp_limb_t * temp = block_ptr; block_ptr = (mp_limb_t *) block_ptr[1]; flint_heap_free(temp); } block_ptr--; unsigned long temp = (*block_ptr); block_left += (temp+1); block_ptr -= temp; } void flint_stack_cleanup() { limb_mem_t* curr = head_mpn; limb_mem_t* temp; if (curr != NULL) { do { if (curr->allocated) { printf("Warning: FLINT stack memory allocation cleanup detected mismatched allocation/releases\n"); } free(curr->point); if (curr==last_mpn) last_mpn = curr->prev; else curr->next->prev = curr->prev; if (curr==head_mpn) head_mpn=curr->next; else (curr->prev->next = curr->next); temp=curr; curr = curr->next; free(temp); } while (curr != NULL); free(reservoir_mpn); } if (block_ptr != NULL) { if (block_left != FLINT_SMALL_BLOCK_SIZE - 2) { printf("Warning: FLINT small stack memory allocator detected mismatched alloc/release\n"); while (block_left != FLINT_SMALL_BLOCK_SIZE - 2) flint_stack_release_small(); } block_ptr -= 2; flint_heap_free(block_ptr); } } #endif /*-----------------------------------------------------------------------------------------------*/ void flint_memory_failure(void) { printf("Error: unable to alloc/realloc memory\n"); abort(); } #if DEBUG void* flint_heap_alloc(unsigned long limbs) { #if DEBUG_PRINT printf("Allocating %ld limbs on heap\n", limbs); #endif unsigned long * buf = malloc((101+limbs) * sizeof(mp_limb_t)); if (!buf) flint_memory_failure(); buf[0] = limbs; for (unsigned long i = 0; i < 100; i++) buf[limbs+i+1] = 0; return (void*) (buf+1); } void* flint_heap_alloc_bytes(unsigned long bytes) { unsigned long limbs = ((bytes-1)>>FLINT_LG_BYTES_PER_LIMB)+1; #if DEBUG_PRINT printf("Allocating %ld limbs on heap\n", limbs); #endif unsigned long * buf = malloc((101+limbs) * sizeof(mp_limb_t)); if (!buf) flint_memory_failure(); buf[0] = limbs; for (unsigned long i = 0; i < 100; i++) buf[limbs+i+1] = 0; return (void*) (buf+1); } void* flint_heap_realloc(void * block_void, unsigned long limbs) { unsigned long * block = (unsigned long *) block_void; block--; unsigned long length = block[0]; for (unsigned long i = 0; i < 100; i++) if (block[length+i+1] != 0L) { printf("Error: Block overrun detected by heap memory (re)allocator!!\n"); abort(); } #if DEBUG_PRINT printf("Reallocing from %ld to %ld limbs on heap\n", length, limbs); #endif unsigned long* buf = realloc(block, (limbs+101) * sizeof(mp_limb_t)); if (!buf) flint_memory_failure(); buf[0] = limbs; for (unsigned long i = 0; i < 100; i++) buf[limbs+i+1] = 0; return (void*) (buf+1); } void* flint_heap_realloc_bytes(void * block_void, unsigned long bytes) { unsigned long * block = (unsigned long *) block_void; unsigned long limbs = ((bytes-1)>>FLINT_LG_BYTES_PER_LIMB)+1; #if DEBUG_PRINT printf("Reallocing to %ld limbs on heap\n", limbs); #endif unsigned long* buf = realloc(block-1, (limbs+101) * sizeof(mp_limb_t)); if (!buf) flint_memory_failure(); buf[0] = limbs; for (unsigned long i = 0; i < 100; i++) buf[limbs+i+1] = 0; return (void*) (buf+1); } void flint_heap_free(void * block_void) { unsigned long * block = (unsigned long *) block_void; block--; unsigned long length = block[0]; #if DEBUG_PRINT printf("Releasing %ld limbs from heap\n", length); #endif for (unsigned long i = 0; i < 100; i++) if (block[length+i+1] != 0L) { printf("Error: Block overrun detected by heap memory allocator!!\n"); abort(); } free(block); } #else void* flint_heap_alloc(unsigned long limbs) { void* buf = malloc(limbs * sizeof(mp_limb_t)); if (!buf) flint_memory_failure(); return buf; } void* flint_heap_alloc_bytes(unsigned long bytes) { void* buf = malloc(bytes); if (!buf) flint_memory_failure(); return buf; } void* flint_heap_realloc(void* block, unsigned long limbs) { void* buf = realloc(block, limbs * sizeof(mp_limb_t)); if (!buf) flint_memory_failure(); return buf; } void* flint_heap_realloc_bytes(void* block, unsigned long bytes) { void* buf = realloc(block, bytes); if (!buf) flint_memory_failure(); return buf; } void flint_heap_free(void* block) { free(block); } #endif flint-1.011/long_extras.h0000644017361200017500000001777711025357254015244 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** long_extras.h Header file for long_extras.c. (C) 2006 William Hart Some of the macros in this file were borrowed from GMP, (C) Free Software Foundation ******************************************************************************/ #ifndef LONGEXTRAS_H #define LONGEXTRAS_H #ifdef __cplusplus extern "C" { #endif #include #include "longlong_wrapper.h" #include "longlong.h" #define PREINV32 0 // Whether or not to use the 32 bit precomputed inverse code //====================================================================================== // // The code in this section is borrowed from the GMP library v 4.2.1 // See gmp-impl.h, (C) Free Software Foundation // //====================================================================================== #define invert_limb(invxl, xl) \ do { \ mp_limb_t dummy; \ udiv_qrnnd (invxl, dummy, ~(xl), ~(0L), xl); \ } while (0) #define LIMB_HIGHBIT_TO_MASK(n) \ (((mp_limb_signed_t) -1 >> 1) < 0 \ ? (mp_limb_signed_t) (n) >> (FLINT_BITS - 1) \ : (n) & (1L<<(FLINT_BITS-1)) ? (~ (mp_limb_t) 0L) : (0L)) #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \ do { \ mp_limb_t _n2, _n10, _nmask, _nadj, _q1; \ mp_limb_t _xh, _xl; \ _n2 = (nh); \ _n10 = (nl); \ _nmask = LIMB_HIGHBIT_TO_MASK (_n10); \ _nadj = _n10 + (_nmask & (d)); \ umul_ppmm (_xh, _xl, di, _n2 - _nmask); \ add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj); \ _q1 = ~_xh; \ umul_ppmm (_xh, _xl, _q1, d); \ add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \ _xh -= (d); /* xh = 0 or -1 */ \ (r) = _xl + ((d) & _xh); \ (q) = _xh - _q1; \ } while (0) //===================================================================================== typedef struct factor_s { int num; unsigned long p[15]; unsigned long exp[15]; } factor_t; #define pre_inv_t double #define pre_inv2_t double #define pre_inv_ll_t double unsigned long z_randint(unsigned long limit); unsigned long z_randbits(unsigned long bits); double z_precompute_inverse(unsigned long n); double z_precompute_inverse2(unsigned long n); double z_ll_precompute_inverse(unsigned long n); #if PREINV32 uint32_t z_precompute_inverse32(unsigned long n); uint32_t z_mod32_precomp(unsigned long n64, uint32_t d, uint32_t di); unsigned long z_mulmod32_precomp(unsigned long a, unsigned long b, unsigned long n, uint32_t ninv); #endif static inline unsigned long z_addmod(unsigned long a, unsigned long b, unsigned long p) { unsigned long neg1 = p - a; if (neg1 > b) return a + b; else return b - neg1; } static inline unsigned long z_submod(unsigned long a, unsigned long b, unsigned long p) { if (a < b) return p + a - b; else return a - b; } static inline unsigned long z_negmod(unsigned long a, unsigned long p) { if (a) return p - a; else return 0; } unsigned long z_mod_precomp(unsigned long a, unsigned long n, double ninv); unsigned long z_div_64_precomp(unsigned long a, unsigned long n, double ninv); unsigned long z_mod_64_precomp(unsigned long a, unsigned long n, double ninv); unsigned long z_ll_mod_precomp(unsigned long a_hi, unsigned long a_lo, unsigned long n, double ninv); unsigned long z_mulmod_precomp(unsigned long a, unsigned long b, unsigned long n, double ninv); unsigned long z_mulmod_64_precomp(unsigned long a, unsigned long b, unsigned long n, double ninv); unsigned long z_powmod(unsigned long a, long exp, unsigned long n); unsigned long z_powmod_64(unsigned long a, long exp, unsigned long n); unsigned long z_powmod_precomp(unsigned long a, long exp, unsigned long n, double ninv); unsigned long z_powmod_64_precomp(unsigned long a, long exp, unsigned long n, double ninv); #if FLINT_BITS == 64 #define z_div2_precomp z_div_64_precomp #define z_mod2_precomp z_mod_64_precomp #define z_mulmod2_precomp z_mulmod_64_precomp #define z_powmod2 z_powmod_64 #define z_powmod2_precomp z_powmod_64_precomp #else #define z_div2_precomp z_div_64_precomp #define z_mod2_precomp z_mod_precomp #define z_mulmod2_precomp z_mulmod_precomp #define z_powmod2 z_powmod #define z_powmod2_precomp z_powmod_precomp #endif int z_jacobi_precomp(unsigned long a, unsigned long p, double pinv); int z_isprime(unsigned long n); int z_isprime_precomp(unsigned long n, double ninv); unsigned long z_nextprime(unsigned long n); unsigned long z_pow(unsigned long a, unsigned long exp); unsigned long z_sqrtmod(unsigned long a, unsigned long p); unsigned long z_cuberootmod(unsigned long * cuberoot1, unsigned long a, unsigned long p); unsigned long z_invert(unsigned long a, unsigned long p); long z_gcd_invert(long* a, long x, long y); long z_extgcd(long* a, long* b, long x, long y); unsigned long z_gcd(long x, long y); static inline unsigned long z_intsqrt(unsigned long n) { return (unsigned long) floor(sqrt((double)n)); } static inline int z_issquare(long x) { static int mod64[64] = {1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0}; static int mod65[65] = {1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1}; static int mod_ui[63] = {1,1,0,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,1,1,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0}; if (x < 0) return 0; if (!mod64[x%64]) return 0; if (!mod_ui[x%63]) return 0; if (!mod65[x%65]) return 0; unsigned long sqroot = (unsigned long) sqrt((double)x); return (x == sqroot*sqroot); } unsigned long z_CRT(unsigned long x1, unsigned long n1, unsigned long x2, unsigned long n2); int z_issquarefree(unsigned long n); int z_remove_precomp(unsigned long * n, unsigned long p, double pinv); int z_remove(unsigned long * n, unsigned long p); unsigned long z_factor_trial(factor_t * factors, unsigned long n); unsigned long z_factor_SQUFOF(unsigned long n); int z_factor(factor_t * factors, unsigned long n); unsigned long z_primitive_root(unsigned long p); unsigned long z_primitive_root_precomp(unsigned long p, double p_inv); #ifdef __cplusplus } #endif #endif flint-1.011/test-support.c0000644017361200017500000000620111025357254015357 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** test-support.c: Support code for test modules Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #include #include "flint.h" #include "test-support.h" gmp_randstate_t randstate; // a bunch of global mpz's, guaranteed to be init'd mpz_t test_mpz[TEST_MPZ_COUNT]; void test_support_init() { gmp_randinit_default(randstate); for (unsigned long i = 0; i < TEST_MPZ_COUNT; i++) mpz_init(test_mpz[i]); } void test_support_cleanup() { gmp_randclear(randstate); for (unsigned long i = 0; i < TEST_MPZ_COUNT; i++) mpz_clear(test_mpz[i]); } unsigned long random_ulong(unsigned long max) { return gmp_urandomm_ui(randstate, max); } unsigned long random_ulong2(unsigned long max) { unsigned long bits = FLINT_BIT_COUNT(max); mpz_t rand; mpz_init(rand); mpz_rrandomb(rand, randstate, bits); unsigned long randi = mpz_get_ui(rand) % max; mpz_clear(rand); return randi; } mp_limb_t random_limb() { return gmp_urandomb_ui(randstate, FLINT_BITS); } void urandom_limbs(mp_limb_t* dest, unsigned long limbs) { for (unsigned long i = 0; i < limbs; i++) dest[i] = gmp_urandomb_ui(randstate, FLINT_BITS); } void random_limbs(mp_limb_t* dest, unsigned long limbs) { mpz_rrandomb(test_mpz[0], randstate, limbs*FLINT_BITS); if (random_ulong(2)) { // GMP always sets the high bit equal to 1, // so with probability 1/2 we flip all the bits mpz_set_ui(test_mpz[1], 1); mpz_mul_2exp(test_mpz[1], test_mpz[1], limbs*FLINT_BITS); mpz_sub_ui(test_mpz[1], test_mpz[1], 1); mpz_sub(test_mpz[0], test_mpz[1], test_mpz[0]); } memset(dest, 0, limbs * sizeof(mp_limb_t)); mpz_export(dest, NULL, -1, sizeof(mp_limb_t), 0, 0, test_mpz[0]); } void mpz_to_mpn(mp_limb_t* dest, unsigned long limbs, mpz_t src) { memset(dest, 0, limbs * sizeof(mp_limb_t)); mpz_export(dest, NULL, -1, sizeof(mp_limb_t), 0, 0, src); } void mpn_to_mpz(mpz_t dest, mp_limb_t* src, unsigned long limbs) { mpz_import(dest, limbs, -1, sizeof(mp_limb_t), 0, 0, src); } // end of file **************************************************************** flint-1.011/long_extras.c0000644017361200017500000010627111035133775015224 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** long_extras.c: extra functions for longs and unsigned longs Copyright (C) 2007, William Hart *****************************************************************************/ #include #include #include #include #include #include #include "flint.h" #include "long_extras.h" #include "longlong_wrapper.h" #include "longlong.h" #include "memory-manager.h" /* Generate a random integer in the range [0, limit) If limit == 0, return a random limb */ unsigned long z_randint(unsigned long limit) { #if FLINT_BITS == 32 static uint64_t randval = 4035456057U; randval = ((uint64_t)randval*(uint64_t)1025416097U+(uint64_t)286824430U)%(uint64_t)4294967311U; if (limit == 0L) return (unsigned long) randval; return (unsigned long)randval%limit; #else static unsigned long randval = 4035456057U; static unsigned long randval2 = 6748392731U; randval = ((unsigned long)randval*(unsigned long)1025416097U+(unsigned long)286824428U)%(unsigned long)4294967311U; randval2 = ((unsigned long)randval2*(unsigned long)1647637699U+(unsigned long)286824428U)%(unsigned long)4294967357U; if (limit == 0L) return (unsigned long) randval; return (unsigned long)(randval+(randval2<<32))%limit; #endif } /* Generate a random integer with up to the given number of bits [0, FLINT_BITS] */ unsigned long z_randbits(unsigned long bits) { return z_randint(l_shift(1L, bits)); } /* Computes a double floating point approximate inverse, i.e. 53 bits of 1 / n */ double z_precompute_inverse(unsigned long n) { return (double) 1 / (double) n; } /* Returns a % n given a precomputed approx inverse ninv Operation is *unsigned* Requires that n be no more than FLINT_D_BITS bits and _a_ be less than n^2 */ unsigned long z_mod_precomp(unsigned long a, unsigned long n, double ninv) { if (a < n) return a; unsigned long quot = (unsigned long) ((double) a * ninv); unsigned long rem = a - quot*n; if (rem >= n) return rem - n; else return rem; } /* Returns a / n given a precomputed approx inverse ninv Operation is *unsigned* Requires that n be no more than FLINT_BITS-1 bits but there are no restrictions on _a_ */ unsigned long z_div_64_precomp(unsigned long a, unsigned long n, double ninv) { if (a < n) return 0; unsigned long quot = (unsigned long) ((double) a * ninv); long rem = a - quot*n; if (rem < (long)(-n)) quot -= (unsigned long) ((double) (-rem) * ninv); else if (rem >= (long) n) quot += (unsigned long) ((double) rem * ninv); else if (rem < 0L) return quot - 1; else return quot; rem = a - quot*n; if (rem >= (long) n) return quot + 1; else if (rem < 0L) return quot - 1; else return quot; } /* Returns a % n given a precomputed approx inverse ninv Operation is *unsigned* Requires that n be no more than FLINT_BITS-1 bits but there are no restrictions on _a_ */ unsigned long z_mod_64_precomp(unsigned long a, unsigned long n, double ninv) { if (a < n) return a; unsigned long quot = (unsigned long) ((double) a * ninv); long rem = a - quot*n; if (rem < (long)(-n)) quot -= (unsigned long) ((double) (-rem) * ninv); else if (rem >= (long) n) quot += (unsigned long) ((double) rem * ninv); else if (rem < 0L) return rem + n; else return rem; rem = a - quot*n; if (rem >= (long) n) return rem - n; else if (rem < 0L) return rem + n; else return rem; } /* Computes a_hi a_lo mod n given an approximate inverse Assumes n is no more than FLINT_BITS-1 bits, but there are no restrictions on a_hi or a_lo Operation is unsigned. */ unsigned long z_ll_mod_precomp(unsigned long a_hi, unsigned long a_lo, unsigned long n, double ninv) { unsigned long t1; unsigned long norm, q, r, orig_n; if (a_hi >= n) { if (((n>>(FLINT_BITS/2)) == 0) && (a_hi >= n*n)) a_hi = a_hi%n; else a_hi = z_mod2_precomp(a_hi, n, ninv); } #if UDIV_NEEDS_NORMALIZATION count_lead_zeros(norm, n); udiv_qrnnd(q, r, (a_hi<>(FLINT_BITS-norm)), a_lo<>= norm; #else udiv_qrnnd(q, r, a_hi, a_lo, n); #endif return r; } /* I don't trust the code below. It is for precomputed inverses of 32 bits. It should work without modification up to 32 bits, but does not. The code is a brutalisation of code found in GMP. */ #if PREINV32 #define invert_limb32(invxl, xl) \ do { \ invxl = (~(((unsigned long)xl)<<32))/xl; \ } while (0) #define LIMB_HIGHBIT_TO_MASK32(n) \ ((n) & (1<<31)) ? (~0) : (0)) uint32_t z_mod32_precomp(unsigned long n64, uint32_t d, uint32_t di) { uint32_t xh, xl, nh, nl, nmask, nadj, q1; unsigned long x; nh = ((uint32_t) (n64 >> 32)); nl = ((uint32_t) n64); nmask = LIMB_HIGHBIT_TO_MASK (nl); nadj = nl + (nmask & d); x = (unsigned long) di * (unsigned long) (nh - nmask); x += (((unsigned long)nh)<<32) + (unsigned long) nadj; q1 = ~(x>>32); x = (unsigned long) q1 * (unsigned long) d; x += n64; xh = (uint32_t) (x >> 32); xl = (uint32_t) x; xh -= d; return xl + ((xh) & d); } uint32_t z_precompute_inverse32(unsigned long n) { unsigned long norm; count_lead_zeros(norm, n); uint32_t ninv; invert_limb32(ninv, (n<<(norm-32))); return ninv; } unsigned long z_mulmod32_precomp(unsigned long a, unsigned long b, unsigned long n, uint32_t ninv) { unsigned long norm; unsigned long prod = a*b; count_lead_zeros(norm, n); norm -= 32; unsigned long res = (unsigned long) z_mod32_precomp(prod<>= norm; if (res >= n) res -= n; return res; } #endif /* Computes a*b mod n, given a precomputed inverse ninv Assumes a an b are both in [0,n). Requires that n be no more than FLINT_D_BITS bits */ unsigned long z_mulmod_precomp(unsigned long a, unsigned long b, unsigned long n, double ninv) { unsigned long quot = (unsigned long) ((double) a * (double) b * ninv); long rem = a*b - quot*n; if (rem < 0) { rem += n; if (rem < 0) return rem + n; } else if (rem >= n) return rem - n; return rem; } /* Computes a*b mod n, given a precomputed inverse ninv Assumes a an b are both in [0,n). There is no restriction on a*b, i.e. it can be two limbs */ unsigned long z_mulmod_64_precomp(unsigned long a, unsigned long b, unsigned long n, double ninv) { unsigned long p1, p2; umul_ppmm(p2, p1, a, b); return z_ll_mod_precomp(p2, p1, n, ninv); } /* Returns a^exp modulo n Assumes a is reduced mod n Requires that n be no more than FLINT_D_BITS bits There are no restrictions on exp, which can also be negative. */ unsigned long z_powmod(unsigned long a, long exp, unsigned long n) { double ninv = z_precompute_inverse(n); unsigned long x, y; unsigned long e; if (exp < 0) e = (unsigned long) -exp; else e = exp; x = 1; y = a; while (e) { if (e & 1) x = z_mulmod_precomp(x, y, n, ninv); y = z_mulmod_precomp(y, y, n, ninv); e = e >> 1; } if (exp < 0) x = z_invert(x, n); return x; } /* Returns a^exp modulo n Assumes a is reduced mod n Requires that n be no more than FLINT_BITS-1 bits There are no restrictions on exp, which can also be negative. */ unsigned long z_powmod_64(unsigned long a, long exp, unsigned long n) { double ninv = z_precompute_inverse(n); unsigned long x, y; unsigned long e; if (exp < 0) e = (unsigned long) -exp; else e = exp; x = 1; y = a; while (e) { if (e & 1) x = z_mulmod_64_precomp(x, y, n, ninv); y = z_mulmod_64_precomp(y, y, n, ninv); e = e >> 1; } if (exp < 0) x = z_invert(x, n); return x; } /* Returns a^exp modulo n given a precomputed inverse Assumes a is reduced mod n Requires that n be no more than FLINT_D_BITS bits There are no restrictions on exp, which may also be negative */ unsigned long z_powmod_precomp(unsigned long a, long exp, unsigned long n, double ninv) { unsigned long x, y; unsigned long e; if (exp < 0) e = (unsigned long) -exp; else e = exp; x = 1; y = a; while (e) { if (e & 1) x = z_mulmod_precomp(x, y, n, ninv); y = z_mulmod_precomp(y, y, n, ninv); e = e >> 1; } if (exp < 0) x = z_invert(x, n); return x; } /* Returns a^exp modulo n given a precomputed inverse Assumes a is reduced mod n Requires that n be no more than FLINT_BITS-1 bits There are no restrictions on exp, which may also be negative */ unsigned long z_powmod_64_precomp(unsigned long a, long exp, unsigned long n, double ninv) { unsigned long x, y; unsigned long e; if (exp < 0) e = (unsigned long) -exp; else e = exp; x = 1; y = a; while (e) { if (e & 1) x = z_mulmod_64_precomp(x, y, n, ninv); y = z_mulmod_64_precomp(y, y, n, ninv); e = e >> 1; } if (exp < 0) x = z_invert(x, n); return x; } /* Computes the Jacobi symbol of _a_ modulo p Assumes p is a prime of no more than FLINT_BITS-1 bits and that _a_ is reduced modulo p */ int z_jacobi_precomp(unsigned long a, unsigned long p, double pinv) { if (a == 0) return 0; if (z_powmod2_precomp(a, (p-1)/2, p, pinv) == p-1) return -1; else return 1; } /* Computes a square root of _a_ modulo p. Assumes p is a prime of no more than FLINT_BITS-1 bits, that _a_ is reduced modulo p. Returns 0 if _a_ is a quadratic non-residue modulo p. */ unsigned long z_sqrtmod(unsigned long a, unsigned long p) { unsigned int r, k, m; unsigned long p1, b, g, bpow, gpow, res; double pinv; if ((a==0) || (a==1)) { return a; } pinv = z_precompute_inverse(p); if (z_jacobi_precomp(a, p, pinv) == -1) return 0; if ((p&3)==3) { return z_powmod2_precomp(a, (p+1)/4, p, pinv); } r = 0; p1 = p-1; do { p1>>=1UL; r++; } while ((p1&1UL) == 0); b = z_powmod2_precomp(a, p1, p, pinv); for (k=2UL; ;k++) { if (z_jacobi_precomp(k, p, pinv) == -1) break; } g = z_powmod2_precomp(k, p1, p, pinv); res = z_powmod2_precomp(a, (p1+1)/2, p, pinv); if (b == 1UL) { return res; } while (b != 1) { bpow = b; for (m = 1; (m <= r-1) && (bpow != 1); m++) { bpow = z_mulmod2_precomp(bpow, bpow, p, pinv); } gpow = g; for (int i = 1; i < r-m; i++) { gpow = z_mulmod2_precomp(gpow, gpow, p, pinv); } res = z_mulmod2_precomp(res, gpow, p, pinv); gpow = z_mulmod2_precomp(gpow, gpow, p, pinv); b = z_mulmod2_precomp(b, gpow, p, pinv); gpow = g; r = m; } return res; } /* Computes a cube root of _a_ mod p for a prime p and returns a cube root of unity if the cube roots of _a_ are distinct else the cube root is set to 1 If _a_ is not a cube modulo p then 0 is returned This function assumes _a_ is reduced modulo p Requires p be no more than FLINT_BITS-1 bits */ unsigned long z_cuberootmod(unsigned long * cuberoot1, unsigned long a, unsigned long p) { unsigned long x; double pinv; if (a == 0) return 0; pinv = z_precompute_inverse(p); if ((p % 3) == 2) { *cuberoot1 = 1; return z_powmod2_precomp(a, 2*((p+1)/3)-1, p, pinv); } unsigned long e=0; unsigned long q = p-1; unsigned long l; unsigned long n = 2; unsigned long z, y, r, temp, temp2, b, m, s, t; r = 1; while ((q%3) == 0) { q = q/3; e++; } l = q%3; x = z_powmod2_precomp(a, (q-l)/3, p, pinv); temp = z_powmod2_precomp(a, l, p, pinv); temp2 = z_powmod2_precomp(x, 3UL, p, pinv); b = z_mulmod2_precomp(temp, temp2, p, pinv); if (l == 2) x = z_mulmod2_precomp(a, x, p, pinv); while(z_powmod2_precomp(n, (p-1)/3, p, pinv)==1) n++; z = z_powmod2_precomp(n, q, p, pinv); y = z; r = e; while (b!=1) { s = z_powmod2_precomp(b, 3UL, p, pinv); m = 1; while(s!=1) { s = z_powmod2_precomp(s, 3UL, p, pinv); m++; } if(m>=r) return(0); t = z_powmod2_precomp(y, z_pow(3UL, r-m-1UL), p, pinv); y = z_powmod2_precomp(t, 3UL, p, pinv); r = m; x = z_mulmod2_precomp(t, x, p, pinv); b = z_mulmod2_precomp(y, b, p, pinv); } if (r==1) *cuberoot1 = y; else *cuberoot1 = z_powmod2_precomp(y, z_pow(3UL, r-1), p, pinv); if (l==2) return(x); else return(z_invert(x, p)); } /* returns a^exp */ unsigned long z_pow(unsigned long a, unsigned long exp) { if (exp == 0) return 1; if (a == 1) return 1; unsigned long power = a; for (unsigned long i = 1; i < exp; i++) power *= a; return power; } /* Tests whether n is an a-Strong Pseudo Prime Assumes d is set to the largest odd factor of n-1 Assumes n is at most FLINT_D_BITS bits Requires _a_ to be reduced mod n */ static inline int SPRP(unsigned long a, unsigned long d, unsigned long n, double ninv) { unsigned long t = d; unsigned long y; y = z_powmod_precomp(a, t , n, ninv); while ((t != n-1) && (y != 1) && (y != n-1)) { y = z_mulmod_precomp(y, y, n, ninv); t <<= 1; } if ((y != n-1) && ((t&1) == 0)) return 0; return 1; } /* Tests whether n is an a-Strong Pseudo Prime Assumes d is set to the largest odd factor of n-1 Assumes n is at most FLINT_BITS-1 bits Requires _a_ to be reduced mod n */ static inline int SPRP_64(unsigned long a, unsigned long d, unsigned long n, double ninv) { unsigned long t = d; unsigned long y; y = z_powmod_64_precomp(a, t , n, ninv); while ((t != n-1) && (y != 1) && (y != n-1)) { y = z_mulmod_64_precomp(y, y, n, ninv); t <<= 1; } if ((y != n-1) && ((t&1) == 0)) return 0; return 1; } /* Miller-Rabin primality test. If reps is set to 5 a couple of pseudoprimes on average will pass the test out of each 10^11 tests. Every increase of reps by 1 decreases the chance or composites passing by a factor of 4. Requires n be no more than FLINT_BITS-1 bits */ int z_miller_rabin_precomp(unsigned long n, double ninv, unsigned long reps) { unsigned long d = n-1, a, t, y; do { d>>=1UL; } while ((d&1UL) == 0); for (unsigned long i = 0; i < reps; i++) { a = z_randint(n-2)+1UL; t = d; y = z_powmod2_precomp(a, t , n, ninv); while ((t != n-1) && (y != 1UL) && (y != n-1)) { y = z_mulmod2_precomp(y, y, n, ninv); t <<= 1UL; } if ((y != n-1) && ((t&1UL) == 0UL)) return 0; } return 1; } /* This is a deterministic prime test up to 10^16. Todo: use the table here: http://oldweb.cecm.sfu.ca/pseudoprime/ to make this into an unconditional primality test for larger n This test is intended to be run after checking for divisibility by primes up to 257 say. Requires n is no more than FLINT_BITS-1 bits */ int z_isprime_precomp(unsigned long n, double ninv) { unsigned long d = n-1; do { d>>=1; } while ((d&1) == 0); if (n < 9080191UL) { if (SPRP(31UL, d, n, ninv) && SPRP(73UL, d, n, ninv)) return 1; else return 0; } #if FLINT_BITS == 64 if (n < 4759123141UL) { if (SPRP(2UL, d, n, ninv) && SPRP(7UL, d, n, ninv) && SPRP(61UL, d, n, ninv)) return 1; else return 0; } if (n < 1122004669633UL) { if (SPRP(2UL, d, n, ninv) && SPRP(13UL, d, n, ninv) && SPRP(23UL, d, n, ninv) && SPRP(1662803UL, d, n, ninv)) if (n != 46856248255981UL) return 1; return 0; } if (n < 10000000000000000UL) { if (SPRP_64(2UL, d, n, ninv) && SPRP_64(3UL, d, n, ninv) && SPRP_64(7UL, d, n, ninv) && SPRP_64(61UL, d, n, ninv) && SPRP_64(24251UL, d, n, ninv)) if (n != 46856248255981UL) return 1; return 0; } return z_miller_rabin_precomp(n, ninv, 6); #else if (SPRP(2UL, d, n, ninv) && SPRP(7UL, d, n, ninv) && SPRP(61UL, d, n, ninv)) return 1; else return 0; #endif } /* This is a deterministic prime test up to 10^16. Todo: use the table here: http://oldweb.cecm.sfu.ca/pseudoprime/ to make this into an unconditional primality test for larger n This test is intended to be run after checking for divisibility by primes up to 257 say. Requires n to be at most FLINT_BITS-1 bits */ int z_isprime(unsigned long n) { double ninv; ninv = z_precompute_inverse(n); return z_isprime_precomp(n, ninv); } unsigned int nextmod30[] = { 1, 6, 5, 4, 3, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 2, 1, 4, 3, 2, 1, 6, 5, 4, 3, 2, 1, 2 }; unsigned int nextindex[] = { 1, 7, 7, 7, 7, 7, 7, 11, 11, 11, 11, 13, 13, 17, 17, 17, 17, 19, 19, 23, 23, 23, 23, 29, 29, 29, 29, 29, 29, 1 }; unsigned int primes[] = { 2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97, 101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181, 191,193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277, 281,283,293,307,311,313,317,331,337,347,349,353,359,367,373,379,383, 389,397,401,409,419,421,431,433,439,443,449,457,461,463,467,479,487, 491,499,503,509,521,523,541,547,557,563,569,571,577,587,593,599,601, 607,613,617,619,631,641,643,647,653,659,661,673,677,683,691,701,709, 719,727,733,739,743,751,757,761,769,773,787,797,809,811,821,823,827, 829,839,853,857,859,863,877,881,883,887,907,911,919,929,937,941,947, 953,967,971,977,983,991,997 }; #define NEXTPRIME_PRIMES 54 #define NUMBER_OF_PRIMES 168 /* Returns the next prime after n Assumes the result will fit in an unsigned long */ unsigned long z_nextprime(unsigned long n) { if (n < 7) { if (n<2) return 2; n++; n|=1; return n; } unsigned long index = n%30; n+=nextmod30[index]; index = nextindex[index]; if (n <= primes[NEXTPRIME_PRIMES-1]) { if (n == 7) return 7; if (n == 11) return 11; if (n == 13) return 13; while (((n%7)==0)||((n%11)==0)||((n%13)==0)) { n += nextmod30[index]; index = nextindex[index]; } return n; } unsigned int * moduli = (unsigned int *) flint_stack_alloc_bytes(NEXTPRIME_PRIMES * sizeof(unsigned int)); for (unsigned int i = 3; i < NEXTPRIME_PRIMES; i++) moduli[i] = (n % primes[i]); while (1) { unsigned int composite = 0; unsigned int diff, acc, pr;; diff = nextmod30[index]; /* First check residues */ for (unsigned int i = 3; i < NEXTPRIME_PRIMES; i++) { composite |= (moduli[i] == 0); acc = moduli[i] + diff; pr = primes[i]; moduli[i] = acc >= pr ? acc - pr : acc; } if (composite) { n += diff; index = nextindex[index]; continue; } /* Miller-Rabin test */ if (z_isprime(n)) break; else { n += diff; index = nextindex[index]; } } flint_stack_release(); return n; } /* returns the inverse of a modulo p */ unsigned long z_invert(unsigned long a, unsigned long p) { if (a == 0) return 0; if (a == 1) return 1; // Important to optimise for Newton inversion long u1=1, u3=a; long v1=0, v3=p; long t1=0, t3=0; long quot; while (v3) { quot=u3-v3; if (u3 < (v3<<2)) { if (quot < v3) { if (quot < 0) { t1 = u1; u1 = v1; v1 = t1; t3 = u3; u3 = v3; v3 = t3; } else { t1 = u1 - v1; u1 = v1; v1 = t1; t3 = u3 - v3; u3 = v3; v3 = t3; } } else if (quot < (v3<<1)) { t1 = u1 - (v1<<1); u1 = v1; v1 = t1; t3 = u3 - (v3<<1); u3 = v3; v3 = t3; } else { t1 = u1 - v1*3; u1 = v1; v1 = t1; t3 = u3 - v3*3; u3 = v3; v3 = t3; } } else { quot=u3/v3; t1 = u1 - v1*quot; u1 = v1; v1 = t1; t3 = u3 - v3*quot; u3 = v3; v3 = t3; } } if (u1<0) u1+=p; return u1; } /* returns gcd(x, y) = a*x + b*y. If gcd = 1 then a = x^-1 mod y We ensure a is reduced mod y */ long z_gcd_invert(long* a, long x, long y) { long u1=1; long u2=0; long t1; long u3, v3; long quot, rem; long xsign = 0; if (x < 0) { x = -x; xsign = 1; } if (y < 0) { y = -y; } u3 = x, v3 = y; while (v3) { quot=u3-v3; if (u3 < (v3<<2)) { if (quot < v3) { if (quot < 0) { rem = u3; t1 = u2; u2 = u1; u1 = t1; u3 = v3; v3 = rem; } else { t1 = u2; u2 = u1 - u2; u1 = t1; u3 = v3; v3 = quot; } } else if (quot < (v3<<1)) { t1 = u2; u2 = u1 - (u2<<1); u1 = t1; u3 = v3; v3 = quot-u3; } else { t1 = u2; u2 = u1 - 3*u2; u1 = t1; u3 = v3; v3 = quot-(u3<<1); } } else { quot=u3/v3; rem = u3 - v3*quot; t1 = u2; u2 = u1 - quot*u2; u1 = t1; u3 = v3; v3 = rem; } } if (xsign) u1 = -u1; if (u1 < 0L) u1 += y; *a = u1; return u3; } /* returns gcd(x, y) = a*x + b*y. */ long z_extgcd(long* a, long* b, long x, long y) { long u1=1, v1=0; long u2=0, v2=1; long t1, t2; long u3, v3; long quot, rem; long xsign = 0; long ysign = 0; if (x < 0) { x = -x; xsign = 1; } if (y < 0) { y = -y; ysign = 1; } u3 = x, v3 = y; while (v3) { quot=u3-v3; if (u3 < (v3<<2)) { if (quot < v3) { t2 = v2; if (quot < 0) { rem = u3; t1 = u2; u2 = u1; u1 = t1; u3 = v3; v2 = v1; v1 = t2; v3 = rem; } else { t1 = u2; u2 = u1 - u2; u1 = t1; u3 = v3; v2 = v1 - v2; v1 = t2; v3 = quot; } } else if (quot < (v3<<1)) { t1 = u2; u2 = u1 - (u2<<1); u1 = t1; u3 = v3; t2 = v2; v2 = v1 - (v2<<1); v1 = t2; v3 = quot-u3; } else { t1 = u2; u2 = u1 - 3*u2; u1 = t1; u3 = v3; t2 = v2; v2 = v1 - 3*v2; v1 = t2; v3 = quot-(u3<<1); } } else { quot=u3/v3; rem = u3 - v3*quot; t1 = u2; u2 = u1 - quot*u2; u1 = t1; u3 = v3; t2 = v2; v2 = v1 - quot*v2; v1 = t2; v3 = rem; } } if (xsign) u1 = -u1; if (ysign) v1 = -v1; *a = u1; *b = v1; return u3; } /* returns gcd(x, y) */ unsigned long z_gcd(long x, long y) { if (x < 0) { x = -x; } if (y < 0) { y = -y; } long u3 = x, v3 = y; long quot, rem; while (v3) { quot=u3-v3; if (u3 < (v3<<2)) { if (quot < v3) { if (quot < 0) { rem = u3; u3 = v3; v3 = rem; } else { u3 = v3; v3 = quot; } } else if (quot < (v3<<1)) { u3 = v3; v3 = quot-u3; } else { u3 = v3; v3 = quot-(u3<<1); } } else { rem = u3 % v3; u3 = v3; v3 = rem; } } return u3; } /* Return 0 <= a < n1*n2 such that a mod n1 = x1 and a mod n2 = x2 Assumes gcd(n1, n2) = 1 and that n1*n2 is at most FLINT_BITS-1 bits Assumes x1 is reduced modulo n1 and x2 is reduced modulo n2 Requires n1*n2 to be at most FLINT_BITS-1 bits */ unsigned long z_CRT(unsigned long x1, unsigned long n1, unsigned long x2, unsigned long n2) { unsigned long n, res, ch; double ninv; n = n1*n2; if (n == 1) return 0; ninv = z_precompute_inverse(n); res = z_invert(n2,n1); res = z_mulmod2_precomp(res, n2, n, ninv); res = z_mulmod2_precomp(res, x1, n, ninv); ch = z_invert(n1,n2); ch = z_mulmod2_precomp(ch, n1, n, ninv); ch = z_mulmod2_precomp(ch, x2, n, ninv); res = res+ch; if (res >= n) return res - n; else return res; } #define SQFREE_TF_PRIMES_LIMIT 168 #define SQFREE_TF_CUTOFF 1000000 int z_issquarefree_trial(unsigned long n) { unsigned long quot, rem; if ((n&1) == 0) { if ((n&3) == 0) return 0; else n = (n>>1); } for (unsigned long i = 1; (i < SQFREE_TF_PRIMES_LIMIT) && (primes[i]*primes[i] <= n); i++) { quot = n/primes[i]; rem = n - quot*primes[i]; if (rem == 0) { if ((quot % primes[i]) == 0) return 0; else n = quot; } } return 1; } /* Tests if n is squarefree or not Currently only works for numbers up to 65535 */ int z_issquarefree(unsigned long n) { if (n < SQFREE_TF_CUTOFF) return z_issquarefree_trial(n); else { printf("Not implemented yet!\n"); abort(); } } /* Removes the highest power of p possible from n and returns the exponent to which it appeared in n n can be up to FLINT_BITS-1 bits */ int z_remove_precomp(unsigned long * n, unsigned long p, double pinv) { unsigned long quot, rem; int exp = 0; quot = z_div2_precomp(*n, p, pinv); rem = (*n) - quot*p; while (rem == 0); { exp++; (*n) = quot; quot = z_div2_precomp(*n, p, pinv); rem = (*n) - quot*p; } return exp; } /* Removes the highest power of p possible from n and returns the exponent to which it appeared in n */ int z_remove(unsigned long * n, unsigned long p) { unsigned long exp; int i; unsigned long powp[7]; // One more than I calculate is necessary for 64 bits unsigned long quot, rem; if (p == 2) { count_trail_zeros(exp, *n); if (exp) { *n = ((*n)>>exp); return exp; } } powp[0] = p; for (i = 0; ; i++) { quot = *n/powp[i]; rem = *n - quot*powp[i]; if (rem != 0) break; powp[i + 1] = powp[i] * powp[i]; *n = quot; } exp = (1< 0) { i--; quot = *n/powp[i]; rem = *n - quot*powp[i]; if (rem == 0) { exp += (1<p[num_factors] = primes[i]; factors->exp[num_factors] = exp; num_factors++; } } factors->num = num_factors; return n; } /* Square forms factoring algorithm of Shanks Adapted from the (simplified) algorithm as described by Gower and Wagstaff Math of Comp. (Preprint May 2007) */ #define SQUFOF_ITERS 50000 unsigned long _z_factor_SQUFOF(unsigned long n) { unsigned long sqroot = z_intsqrt(n); unsigned long p = sqroot; unsigned long q = n - sqroot*sqroot; if (q == 0) { return sqroot; } unsigned long l = 1 + 2*z_intsqrt(2*p); unsigned long l2 = l/2; unsigned long iq, pnext; unsigned long qarr[50]; unsigned long qupto = 0; unsigned long qlast = 1; unsigned long i, j, t, r; for (i = 0; i < SQUFOF_ITERS; i++) { iq = (sqroot + p)/q; pnext = iq*q - p; if (q <= l) { if ((q & 1) == 0) { qarr[qupto] = q/2; qupto++; if (qupto >= 50) return 0; } else if (q <= l2) { qarr[qupto] = q; qupto++; if (qupto >= 50) return 0; } } t = qlast + iq*(p - pnext); qlast = q; q = t; p = pnext; if ((i&1) == 1) continue; if (!z_issquare(q)) continue; r = z_intsqrt(q); if (qupto == 0) break; for (j = 0; j < qupto; j++) if (r == qarr[j]) goto cont; break; cont: ; if (r == 1) return 0; } if (i == SQUFOF_ITERS) return 0; // taken too long, give up qlast = r; p = p + r*((sqroot - p)/r); q = (n - p*p)/qlast; for (j = 0; j < SQUFOF_ITERS; j++) { iq = (sqroot + p)/q; pnext = iq*q - p; if (p == pnext) break; t = qlast + iq*(p - pnext); qlast = q; q = t; p = pnext; } if ((q & 1) == 0) q /= 2; return q; } /* Factor n using as many rounds of SQUFOF as it takes Assumes trial factoring of n has already been done and that n is not a prime */ unsigned long z_factor_SQUFOF(unsigned long n) { unsigned long factor = _z_factor_SQUFOF(n); unsigned long multiplier; unsigned long quot, rem, kn; unsigned long s1, s2; if (factor) return factor; for (unsigned long i = 1; (i < NUMBER_OF_PRIMES) && !factor; i++) { multiplier = primes[i]; count_lead_zeros(s1, multiplier); s1 = FLINT_BITS - s1; count_lead_zeros(s2, n); if (s1 > s2) return 0; // kn is more than one limb kn = multiplier*n; factor = _z_factor_SQUFOF(kn); if (factor) { quot = factor/multiplier; rem = factor - quot*multiplier; if (!rem) factor = quot; if ((factor == 1) || (factor == n)) factor = 0; } } return factor; } void insert_factor(factor_t * factors, unsigned long p) { int i = 0; for (i = 0; i < factors->num; i++) { if (factors->p[i] == p) { factors->exp[i]++; break; } } if (i == factors->num) { factors->p[i] = p; factors->exp[i] = 1; factors->num++; } } /* Find the factors of n This function may fail if n is so large that SQUFOF multipliers (rarely up to 1000) times n push it over a limb in size If factoring fails (very rare), this function returns 0, else it returns 1 */ int z_factor(factor_t * factors, unsigned long n) { unsigned long cofactor; unsigned long factor_arr[TF_FACTORS_IN_LIMB]; unsigned long cutoff = primes[TF_CUTOFF-1]*primes[TF_CUTOFF-1]; unsigned long factors_left = 1; unsigned long factor; cofactor = z_factor_trial(factors, n); if (cofactor != 1) { factor = factor_arr[0] = cofactor; while (factors_left > 0) { factor = factor_arr[factors_left-1]; if ((factor < cutoff) || z_isprime(factor)) { insert_factor(factors, factor); factors_left--; } else { factor = factor_arr[factors_left] = z_factor_SQUFOF(factor); if (!factor_arr[factors_left]) return 0; factor_arr[factors_left-1] /= factor; factors_left++; } } return 1; } return 1; } /* Finds the smallest primitive root of the prime p Initial attempt - could be extremely sub-optimal! */ unsigned long z_primitive_root(unsigned long p) { FLINT_ASSERT(p > 2); FLINT_ASSERT(z_isprime(p) == 1); unsigned long res; factor_t factors; if(z_factor(&factors, (p - 1)) == 0) { return 0; } res = 2; int i = 0; do { if(z_powmod(res, (p-1) / factors.p[i], p) == 1) { res++; i = 0; } else { i++; } } while(i != factors.num); return res; } unsigned long z_primitive_root_precomp(unsigned long p, double p_inv) { FLINT_ASSERT(p > 2); FLINT_ASSERT(z_isprime(p) == 1); unsigned long res; factor_t factors; if(z_factor(&factors, (p - 1)) == 0) { return 0; } res = 2; int i = 0; do { if(z_powmod_precomp(res, (p-1) / factors.p[i], p, p_inv) == 1) { res++; i = 0; } else { i++; } } while(i != factors.num); return res; } flint-1.011/mpz_poly-tune.c0000644017361200017500000001212411025357254015511 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /* mpz_poly-tune Program for tuning the mpz_poly module. This program writes to standard output an automatically tuned version of mpz_poly-tuning.c. (If DEBUG is set, it also writes logging info to standard error.) (C) 2007 David Harvey and William Hart */ #include #include #include "flint.h" #include "test-support.h" #include "profiler.h" #include "mpz_poly.h" #include "mpz_poly-tuning.h" #define DEBUG 1 typedef struct { unsigned long length; unsigned long limbs; unsigned long crossover; } sample_kara_t; // arg should point to a sample_kara_t void sample_kara(void* arg, unsigned long count) { unsigned long length = ((sample_kara_t*) arg)->length; unsigned long limbs = ((sample_kara_t*) arg)->limbs; unsigned long crossover = ((sample_kara_t*) arg)->crossover; mpz_t* buf; buf = (mpz_t*) malloc(6 * length * sizeof(mpz_t)); for (unsigned long i = 0; i < 6*length; i++) mpz_init2(buf[i], 3*limbs*FLINT_BITS); for (unsigned long i = 0; i < 2*length; i++) // (leave a few zero high bits to prevent carries in the multiplication) mpz_urandomb(buf[i], randstate, limbs*FLINT_BITS - FLINT_BITS/3); mpz_t* in1 = buf; mpz_t* in2 = in1 + length; mpz_t* out = in2 + length; mpz_t* scratch = out + length; // warm up for (unsigned long i = 0; i < count/4; i++) _mpz_poly_mul_kara_recursive(out, in1, length, in2, length, scratch, 1, crossover); // time it start_clock(0); for (unsigned long i = 0; i < count; i++) _mpz_poly_mul_kara_recursive(out, in1, length, in2, length, scratch, 1, crossover); stop_clock(0); for (unsigned long i = 0; i < 6*length; i++) mpz_clear(buf[i]); free(buf); } /* Compares: * one layer of karatubsa followed by classical multiplication, vs * straight classical multiplication, for a given polynomial length and coefficient size, using the _mpz_poly_mul_kara_recursive() function. Returns nonzero if the first strategy wins. */ int compare_kara(unsigned long length, unsigned long limbs, FILE* f) { double time1, time2; sample_kara_t info; info.length = length; info.limbs = limbs; // try with one layer of karatsuba info.crossover = length*length; prof_repeat(&time1, NULL, sample_kara, &info); // try with plain classical info.crossover = 2*length*length; prof_repeat(&time2, NULL, sample_kara, &info); #if DEBUG fprintf(f, "length = %ld, limbs = %ld, %s wins (%lf vs %lf)\n", length, limbs, (time1 < time2) ? "karatsuba" : "classical" , FLINT_MIN(time1, time2), FLINT_MAX(time1, time2)); #endif return time1 < time2; } /* Finds crossover length for switching from classical to karatsuba multiplication for the given coefficient length. */ unsigned long crossover_kara(unsigned long limbs, FILE* f) { for (unsigned long length = 2; ; length++) // if karatsuba seems to win, run it twice just to check if (compare_kara(length, limbs, f) && compare_kara(length, limbs, f)) return length; } int main(int argc, char* argv[]) { FILE* fout = stdout; FILE* flog = stderr; test_support_init(); fprintf(fout, "/*\n"); fprintf(fout, " Tuning values for mpz_poly module\n"); fprintf(fout, "\n"); fprintf(fout, " Automatically generated by mpz_poly-tune program\n"); fprintf(fout, "*/\n\n"); fprintf(fout, "#include \"mpz_poly-tuning.h\"\n"); fprintf(fout, "#include \"mpz_poly.h\"\n"); fprintf(fout, "\n"); fprintf(fout, "unsigned long mpz_poly_kara_crossover_table[] = {"); fflush(fout); unsigned long limbs; for (limbs = 1; ; limbs++) { unsigned long crossover = crossover_kara(limbs, flog); if (crossover == 2) break; fprintf(fout, "%ld, ", crossover); fflush(fout); } fprintf(fout, "0};\n"); fprintf(fout, "unsigned long mpz_poly_kara_crossover_table_size = %ld;\n", limbs); fprintf(fout, "\n\n"); fprintf(fout, "// end of file *********************************\n"); test_support_cleanup(); return 0; } // end of file **************************************************************** flint-1.011/ZmodF_mul.h0000644017361200017500000001420611025357254014573 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** ZmodF_mul.h Copyright (C) 2007, David Harvey and William Hart Routines for multiplication of elements of Z/pZ where p = B^n + 1, B = 2^FLINT_BITS. ******************************************************************************/ #ifndef FLINT_ZMODF_MUL_H #define FLINT_ZMODF_MUL_H #ifdef __cplusplus extern "C" { #endif #include #include #include "mpn_extras.h" #include "ZmodF_poly.h" /* Several algorithms for multiplication mod p: ZMODF_MUL_PLAIN: use mpn_mul_n and then reduce mod p ZMODF_MUL_THREEWAY: if n = 3m, do multiplication mod B^m + 1 and mod B^2m - B^m + 1 separately, combine using CRT to get answer mod B^n + 1. ZMODF_MUL_FFT: split into polynomial of length 2^depth with coefficients mod (B^m + 1)*B^k, where k = 0, 1, or 2. Do negacyclic convolution mod B^m + 1 using FFT; do convolution mod B^k using naive algorithm; combine results by CRT. */ #define ZMODF_MUL_ALGO_PLAIN 0 #define ZMODF_MUL_ALGO_THREEWAY 1 #define ZMODF_MUL_ALGO_FFT 2 /* This struct manages temporary allocation and tuning parameters for a specific n. */ typedef struct { unsigned long n; // possible values for algo are the ZMOD_MUL_ALGO_xyz constants above int algo; // this flag indicates that this struct is being used for squaring, in // which case less memory will be allocated in the init routines int squaring; // scratch buffer: // of length 2n (for ZMODF_MUL_ALGO_PLAIN) // or length 3n+1 (for ZMODF_MUL_ALGO_THREEWAY) // or length 3*k*2^depth (for ZMODF_MUL_ALGO_FFT) // or NULL if not allocated mp_limb_t* scratch; // for ZMODF_MUL_ALGO_THREEWAY, m = n/3. // for ZMODF_MUL_ALGO_FFT, the FFT operates mod B^m + 1, and the naive // algorithm works mod B^k unsigned long m, k; // used only for ZMODF_MUL_ALGO_FFT // (if squaring == 1, only the first one is init'd) ZmodF_poly_t polys[2]; } ZmodF_mul_info_struct; // ZmodF_mul_info_t allows reference-like semantics for // ZmodF_mul_info_struct: typedef ZmodF_mul_info_struct ZmodF_mul_info_t[1]; /* Initialises ZmodF_mul_info_t for a given n. This function automatically selects the best underlying multiplication algorithm for the given n. The squaring flag is 1 if you intend to use this object for squaring. The object will then use up less memory. If squaring == 0, it's still possible to use this object for squaring, but if squaring == 1, you can't use it for arbitrary multiplication. WARNING: the multiplication time does NOT increase monotonically with n. * If n is divisible by 3, the "threeway" algorithm is available, which is faster than the "plain" algorithm for n >= 24 (on our opteron test machine). * For large n, the "fft" algorithm is available, but there are conditions on the 2-divisibility of n (not very onerous though). NOTE: this function uses stack based memory management. */ void ZmodF_mul_info_init(ZmodF_mul_info_t info, unsigned long n, int squaring); // the following functions initialise with a specific algorithm: void ZmodF_mul_info_init_plain(ZmodF_mul_info_t info, unsigned long n, int squaring); void ZmodF_mul_info_init_threeway(ZmodF_mul_info_t info, unsigned long n, int squaring); void ZmodF_mul_info_init_fft(ZmodF_mul_info_t info, unsigned long n, unsigned long depth, unsigned long m, unsigned long k, int squaring); // releases resources void ZmodF_mul_info_clear(ZmodF_mul_info_t info); // sets res := a * b using the given ZmodF_mul_info_t object void ZmodF_mul_info_mul(ZmodF_mul_info_t, ZmodF_t res, ZmodF_t a, ZmodF_t b); // the following functions are for standalone multiplying/squaring, without // the use of the precomputation stuff above: /* res := a * b PRECONDITIONS: Any combination of aliasing among res, a, b is allowed. scratch must be a buffer of length 2*n, and must NOT alias a, b, res. */ void ZmodF_mul(ZmodF_t res, ZmodF_t a, ZmodF_t b, mp_limb_t* scratch, unsigned long n); /* res := a * a PRECONDITIONS: a may alias res. scratch must be a buffer of length 2*n, and must NOT overlap a or res. */ void ZmodF_sqr(ZmodF_t res, ZmodF_t a, mp_limb_t* scratch, unsigned long n); // ============================================================================ // the following functions are exported for testing purposes: void _ZmodF_mul_fft_split(ZmodF_poly_t poly, ZmodF_t x, unsigned long n); void _ZmodF_mul_fft_combine(ZmodF_t x, ZmodF_poly_t poly, unsigned long m, unsigned long k, unsigned long n); void _ZmodF_mul_fft_convolve_modB( mp_limb_t* out, mp_limb_t* in1, mp_limb_t* in2, unsigned long len); void _ZmodF_mul_fft_convolve_modB2( mp_limb_t* out, mp_limb_t* in1, mp_limb_t* in2, unsigned long len); void _ZmodF_mul_threeway_reduce1(ZmodF_t res, ZmodF_t a, unsigned long m); void _ZmodF_mul_threeway_reduce2(mp_limb_t* res, ZmodF_t a, unsigned long m); #ifdef __cplusplus } #endif #endif // end of file **************************************************************** flint-1.011/profiler.c0000644017361200017500000000627311025357254014521 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** Timing/profiling (C) 2007 William Hart and David Harvey ******************************************************************************/ #include "profiler.h" #include /* clock_last[i] is the last read clock value for clock #i. clock_accum[i] is the total time attributed to clock #i so far. These should not be read directly; use get_clock(i) instead. */ double clock_last[FLINT_NUM_CLOCKS]; double clock_accum[FLINT_NUM_CLOCKS]; void prof_repeat(double* min, double* max, profile_target_t target, void* arg) { // the number of timings that were at least DURATION_THRESHOLD microseconds: unsigned long good_count = 0; double max_time, min_time; // first try one loop unsigned long num_trials = 1; init_clock(0); target(arg, num_trials); double last_time = get_clock(0); // loop until we have enough good times while (1) { double per_trial = last_time / num_trials; // if the last recorded time was long enough, record it if (last_time > DURATION_THRESHOLD) { if (good_count) { if (per_trial > max_time) max_time = per_trial; if (per_trial < min_time) min_time = per_trial; } else max_time = min_time = per_trial; if (++good_count == 5) { // we've got enough data break; } } // adjust num_trials so that the elapsed time gravitates towards // DURATION_TARGET; num_trials can be changed by a factor of // at most 25%, and must be at least 1 if (last_time < 0.0001) last_time = 0.0001; double adjust_ratio = DURATION_TARGET / last_time; if (adjust_ratio > 1.25) adjust_ratio = 1.25; if (adjust_ratio < 0.75) adjust_ratio = 0.75; num_trials = (unsigned long) ceil(adjust_ratio * num_trials); // just to be safe: if (num_trials == 0) num_trials = 1; // run another trial init_clock(0); target(arg, num_trials); last_time = get_clock(0); } // store results if (min) *min = min_time; if (max) *max = max_time; } // end of file **************************************************************** flint-1.011/zmod_poly.h0000644017361200017500000003544711025357254014725 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /***************************************************************************** zmod_poly.h: Polynomials over (unsigned) long mod p, for p prime. Copyright (C) 2007, David Howden *****************************************************************************/ #include #include #include #include #include #include #include "flint.h" #include "memory-manager.h" #include "mpn_extras.h" #include "long_extras.h" #ifndef _ZMOD_POLY_H_ #define _ZMOD_POLY_H_ #ifdef __cplusplus extern "C" { #endif #define USE_MIDDLE_PRODUCT 0 // Middle product code currently has no proof // and so is not switched on by default typedef struct { unsigned long *coeffs; unsigned long alloc; unsigned long length; unsigned long p; double p_inv; #if PREINV32 uint32_t p32_inv; #endif } zmod_poly_struct; typedef zmod_poly_struct zmod_poly_t[1]; typedef zmod_poly_struct* zmod_poly_p; typedef struct { unsigned long length2; unsigned long limbs2; F_mpn_precomp_t precomp; } zmod_poly_precomp_struct; typedef zmod_poly_precomp_struct zmod_poly_precomp_t[1]; #define SWAP_ZMOD_POLY_PTRS(x, y) \ do { \ zmod_poly_p zzz_ptr = (x); \ (x) = (y); \ (y) = zzz_ptr; \ } while (0); // ------------------------------------------------------ // Initialisation and memory management void zmod_poly_init(zmod_poly_t poly, unsigned long p); void zmod_poly_init_precomp(zmod_poly_t poly, unsigned long p, double p_inv); void zmod_poly_init2(zmod_poly_t poly, unsigned long p, unsigned long alloc); void zmod_poly_init2_precomp(zmod_poly_t poly, unsigned long p, double p_inv, unsigned long alloc); void zmod_poly_clear(zmod_poly_t poly); void zmod_poly_realloc(zmod_poly_t poly, unsigned long alloc); // _bits_ only applies to newly allocated coefficients, not existing ones... // this non-inlined version REQUIRES that alloc > poly->alloc void __zmod_poly_fit_length(zmod_poly_t poly, unsigned long alloc); // this is arranged so that the initial comparison (very frequent) is inlined, // but the actual allocation (infrequent) is not static inline void zmod_poly_fit_length(zmod_poly_t poly, unsigned long alloc) { if (alloc > poly->alloc) __zmod_poly_fit_length(poly, alloc); } // ------------------------------------------------------ // Setting/retrieving coefficients static inline unsigned long zmod_poly_get_coeff_ui(zmod_poly_t poly, unsigned long n) { if (n >= poly->length) return 0; return poly->coeffs[n]; } static inline unsigned long _zmod_poly_get_coeff_ui(zmod_poly_t poly, unsigned long n) { return poly->coeffs[n]; } void zmod_poly_set_coeff_ui(zmod_poly_t poly, unsigned long n, unsigned long c); static inline void _zmod_poly_set_coeff_ui(zmod_poly_t poly, unsigned long n, unsigned long c) { poly->coeffs[n] = c; } // ------------------------------------------------------ // String conversions and I/O int zmod_poly_from_string(zmod_poly_t poly, char* s); char* zmod_poly_to_string(zmod_poly_t poly); void zmod_poly_print(zmod_poly_t poly); void zmod_poly_fprint(zmod_poly_t poly, FILE* f); int zmod_poly_read(zmod_poly_t poly); int zmod_poly_fread(zmod_poly_t poly, FILE* f); // ------------------------------------------------------ // Length and degree void __zmod_poly_normalise(zmod_poly_t poly); int __zmod_poly_normalised(zmod_poly_t poly); void zmod_poly_truncate(zmod_poly_t poly, unsigned long length); static inline unsigned long zmod_poly_length(zmod_poly_t poly) { return poly->length; } static inline long zmod_poly_degree(zmod_poly_t poly) { return (long) poly->length - 1; } static inline unsigned long zmod_poly_modulus(zmod_poly_t poly) { return poly->p; } static inline double zmod_poly_precomputed_inverse(zmod_poly_t poly) { return poly->p_inv; } // ------------------------------------------------------ // Assignment void _zmod_poly_set(zmod_poly_t res, zmod_poly_t poly); void zmod_poly_set(zmod_poly_t res, zmod_poly_t poly); static inline void zmod_poly_zero(zmod_poly_t poly) { poly->length = 0; } static inline void zmod_poly_swap(zmod_poly_t poly1, zmod_poly_t poly2) { unsigned long* temp_coeffs; unsigned long temp; double temp_p_inv; temp_coeffs = poly2->coeffs; poly2->coeffs = poly1->coeffs; poly1->coeffs = temp_coeffs; temp = poly1->alloc; poly1->alloc = poly2->alloc; poly2->alloc = temp; temp = poly1->length; poly1->length = poly2->length; poly2->length = temp; temp = poly1->p; poly1->p = poly2->p; poly2->p = temp; temp_p_inv = poly1->p_inv; poly1->p_inv = poly2->p_inv; poly2->p_inv = temp_p_inv; } /* Subpolynomials */ static inline void _zmod_poly_attach(zmod_poly_t output, zmod_poly_t input) { output->length = input->length; output->coeffs = input->coeffs; output->p = input->p; output->p_inv = input->p_inv; } static inline void zmod_poly_attach(zmod_poly_t output, zmod_poly_t input) { _zmod_poly_attach(output, input); } /* Attach input shifted right by n to output */ static inline void _zmod_poly_attach_shift(zmod_poly_t output, zmod_poly_t input, unsigned long n) { if (input->length >= n) output->length = input->length - n; else output->length = 0; output->coeffs = input->coeffs + n; output->p = input->p; output->p_inv = input->p_inv; } static inline void zmod_poly_attach_shift(zmod_poly_t output, zmod_poly_t input, unsigned long n) { _zmod_poly_attach_shift(output, input, n); } /* Attach input to first n coefficients of input */ static inline void _zmod_poly_attach_truncate(zmod_poly_t output, zmod_poly_t input, unsigned long n) { if (input->length < n) output->length = input->length; else output->length = n; output->coeffs = input->coeffs; output->p = input->p; output->p_inv = input->p_inv; __zmod_poly_normalise(output); } static inline void zmod_poly_attach_truncate(zmod_poly_t output, zmod_poly_t input, unsigned long n) { _zmod_poly_attach_truncate(output, input, n); } /* Comparison functions */ int zmod_poly_equal(zmod_poly_t poly1, zmod_poly_t poly2); static inline int zmod_poly_is_one(zmod_poly_t poly1) { if ((poly1->length == 1) && (poly1->coeffs[0] == 1L)) return 1; return 0; } /* Reversal */ void _zmod_poly_reverse(zmod_poly_t output, zmod_poly_t input, unsigned long length); void zmod_poly_reverse(zmod_poly_t output, zmod_poly_t input, unsigned long length); /* Monic polys */ void zmod_poly_make_monic(zmod_poly_t output, zmod_poly_t pol); /* Addition and subtraction */ void zmod_poly_add(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2); void zmod_poly_add_without_mod(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2); void zmod_poly_sub(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2); void _zmod_poly_sub(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2); void zmod_poly_neg(zmod_poly_t res, zmod_poly_t poly); /* Shifting functions */ void zmod_poly_left_shift(zmod_poly_t res, zmod_poly_t poly, unsigned long k); void zmod_poly_right_shift(zmod_poly_t res, zmod_poly_t poly, unsigned long k); /* Polynomial multiplication All multiplication functions require that the modulus be no more than FLINT_BITS-1 bits */ void zmod_poly_mul(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2); void zmod_poly_sqr(zmod_poly_t res, zmod_poly_t poly); /* Requires that poly1 bits + poly2 bits + log_length is not greater than 2*FLINT_BITS */ void zmod_poly_mul_KS(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits_input); void _zmod_poly_mul_KS(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits_input); void zmod_poly_mul_KS_trunc(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits_input, unsigned long trunc); void _zmod_poly_mul_KS_trunc(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits_input, unsigned long trunc); void _zmod_poly_mul_KS_trunc_precomp(zmod_poly_t output, zmod_poly_p input1, zmod_poly_precomp_t pre, unsigned long bits_input, unsigned long trunc); #if USE_MIDDLE_PRODUCT void _zmod_poly_mul_KS_middle(zmod_poly_t output, zmod_poly_p input1, zmod_poly_p input2, unsigned long bits_input, unsigned long trunc); void zmod_poly_mul_KS_middle(zmod_poly_t output, zmod_poly_p input1, zmod_poly_p input2, unsigned long bits_input, unsigned long trunc); #endif void _zmod_poly_mul_classical(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2); void __zmod_poly_mul_classical_mod_last(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits); void __zmod_poly_mul_classical_mod_throughout(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits); void zmod_poly_mul_classical(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2); void _zmod_poly_sqr_classical(zmod_poly_t res, zmod_poly_t poly); void zmod_poly_sqr_classical(zmod_poly_t res, zmod_poly_t poly); void _zmod_poly_mul_classical_trunc(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long trunc); void __zmod_poly_mul_classical_trunc_mod_last(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits, unsigned long trunc); void __zmod_poly_mul_classical_trunc_mod_throughout(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits, unsigned long trunc); void zmod_poly_mul_classical_trunc(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long trunc); void _zmod_poly_mul_classical_trunc_left(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long trunc); void __zmod_poly_mul_classical_trunc_left_mod_last(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits, unsigned long trunc); void __zmod_poly_mul_classical_trunc_left_mod_throughout(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long bits, unsigned long trunc); void zmod_poly_mul_classical_trunc_left(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long trunc); void zmod_poly_mul_trunc_n(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long trunc); void zmod_poly_mul_trunc_left_n(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long trunc); void zmod_poly_mul_trunc_n_precomp_init(zmod_poly_precomp_t pre, zmod_poly_p input2, unsigned long bits_input, unsigned long length1); void zmod_poly_precomp_clear(zmod_poly_precomp_t pre); void zmod_poly_mul_trunc_n_precomp(zmod_poly_t output, zmod_poly_p input1, zmod_poly_precomp_t pre, unsigned long trunc); #if USE_MIDDLE_PRODUCT void _zmod_poly_mul_KS_middle_precomp(zmod_poly_t output, zmod_poly_p input1, zmod_poly_precomp_t pre, unsigned long bits_input, unsigned long trunc); #endif void _zmod_poly_mul_KS_precomp(zmod_poly_t output, zmod_poly_t input1, zmod_poly_precomp_t pre, unsigned long bits_input); void zmod_poly_mul_precomp_init(zmod_poly_precomp_t pre, zmod_poly_t input2, unsigned long bits_input, unsigned long length1); /* Bit packing functions */ unsigned long zmod_poly_bits(zmod_poly_t poly); void _zmod_poly_bit_pack_mpn(mp_limb_t * res, zmod_poly_t poly, unsigned long bits, unsigned long length); void _zmod_poly_bit_unpack_mpn(zmod_poly_t poly, mp_limb_t *mpn, unsigned long length, unsigned long bits); void print_binary(unsigned long n, unsigned long len); void print_binary2(unsigned long n, unsigned long len, unsigned long space_bit); /* Scalar multiplication */ void _zmod_poly_scalar_mul(zmod_poly_t res, zmod_poly_t poly, unsigned long scalar); void zmod_poly_scalar_mul(zmod_poly_t res, zmod_poly_t poly, unsigned long scalar); void __zmod_poly_scalar_mul_without_mod(zmod_poly_t res, zmod_poly_t poly, unsigned long scalar); /* Division */ void zmod_poly_divrem_classical(zmod_poly_t Q, zmod_poly_t R, zmod_poly_t A, zmod_poly_t B); void __zmod_poly_divrem_classical_mod_last(zmod_poly_t Q, zmod_poly_t R, zmod_poly_t A, zmod_poly_t B); void zmod_poly_div_classical(zmod_poly_t Q, zmod_poly_t A, zmod_poly_t B); void __zmod_poly_div_classical_mod_last(zmod_poly_t Q, zmod_poly_t A, zmod_poly_t B); void zmod_poly_div_divconquer_recursive(zmod_poly_t Q, zmod_poly_t BQ, zmod_poly_t A, zmod_poly_t B); void zmod_poly_divrem_divconquer(zmod_poly_t Q, zmod_poly_t R, zmod_poly_t A, zmod_poly_t B); void zmod_poly_div_divconquer(zmod_poly_t Q, zmod_poly_t A, zmod_poly_t B); /* Newton Inversion */ void zmod_poly_newton_invert_basecase(zmod_poly_t Q_inv, zmod_poly_t Q, unsigned long n); void zmod_poly_newton_invert(zmod_poly_t Q_inv, zmod_poly_t Q, unsigned long n); /* Newton Division */ void zmod_poly_div_series(zmod_poly_t Q, zmod_poly_t A, zmod_poly_t B, unsigned long n); void zmod_poly_div_newton(zmod_poly_t Q, zmod_poly_t A, zmod_poly_t B); void zmod_poly_divrem_newton(zmod_poly_t Q, zmod_poly_t R, zmod_poly_t A, zmod_poly_t B); #define ZMOD_DIV_BASECASE_CUTOFF 64 static inline void zmod_poly_divrem(zmod_poly_t Q, zmod_poly_t R, zmod_poly_t A, zmod_poly_t B) { if ((B->length < ZMOD_DIV_BASECASE_CUTOFF) && (A->length < 2*ZMOD_DIV_BASECASE_CUTOFF)) { zmod_poly_divrem_classical(Q, R, A, B); return; } zmod_poly_divrem_newton(Q, R, A, B); } static inline void zmod_poly_div(zmod_poly_t Q, zmod_poly_t A, zmod_poly_t B) { if ((B->length < ZMOD_DIV_BASECASE_CUTOFF) && (A->length < 2*ZMOD_DIV_BASECASE_CUTOFF)) { zmod_poly_div_classical(Q, A, B); return; } zmod_poly_div_newton(Q, A, B); } /* Resultant */ unsigned long zmod_poly_resultant_euclidean(zmod_poly_t a, zmod_poly_t b); static inline unsigned long zmod_poly_resultant(zmod_poly_t a, zmod_poly_t b) { return zmod_poly_resultant_euclidean(a, b); } /* GCD */ void zmod_poly_gcd(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2); int zmod_poly_gcd_invert(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2); void zmod_poly_xgcd(zmod_poly_t res, zmod_poly_t s, zmod_poly_t t, zmod_poly_t poly1, zmod_poly_t poly2); #ifdef __cplusplus } #endif #endif /* _ZMOD_POLY_H_ */ flint-1.011/ZmodF_mul-test.c0000644017361200017500000004000311025357254015535 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** ZmodF_mul-test.c: test module for ZmodF_mul module Copyright (C) 2007, David Harvey *****************************************************************************/ #include #include #include "ZmodF_mul.h" #include "test-support.h" #include "memory-manager.h" #define DEBUG 0 // prints debug information /* Prints the ZmodF_t to stdout in hex, each limb in a separate block, most significant limb (i.e. the overflow limb) first. */ void ZmodF_print(ZmodF_t x, unsigned long n) { for (long i = n; i >= 0; i--) #if FLINT_BITS == 64 printf("%016lx ", x[i]); #else printf("%08lx ", x[i]); #endif } /* Prints each coefficient of the polynomial on a separate line. */ void ZmodF_poly_print(ZmodF_poly_t x) { for (unsigned long k = 0; k < (1UL << x->depth); k++) { ZmodF_print(x->coeffs[k], x->n); printf("\n"); } } int test__ZmodF_mul_fft_split() { int success = 1; mpz_t x, y, z; mpz_init(x); mpz_init(y); mpz_init(z); mp_limb_t buf[300]; for (unsigned long n = 1; n < 200 && success; n++) { for (unsigned long depth = 0; ((n*FLINT_BITS) % (1 << depth) == 0) && success; depth++) { unsigned long bits = (n*FLINT_BITS) >> depth; unsigned long m = (bits-1)/FLINT_BITS + 1; ZmodF_poly_t poly; ZmodF_poly_init(poly, depth, m, 1); #if DEBUG printf("n = %d, depth = %d, m = %d\n", n, depth, m); #endif for (unsigned long trial = 0; trial < 120; trial++) { random_limbs(buf, n); buf[n] = 0; mpn_to_mpz(x, buf, n); _ZmodF_mul_fft_split(poly, buf, n); for (unsigned long i = 0; i < (1 << depth); i++) { mpz_tdiv_r_2exp(y, x, bits); mpz_tdiv_q_2exp(x, x, bits); mpn_to_mpz(z, poly->coeffs[i], m+1); if (mpz_cmp(z, y)) success = 0; } } ZmodF_poly_clear(poly); } } mpz_clear(x); mpz_clear(y); mpz_clear(z); return success; } int test__ZmodF_mul_fft_combine() { int success = 1; mpz_t x, y, p, q, r, s, total; mpz_init(x); mpz_init(y); mpz_init(s); mpz_init(r); mpz_init(q); mpz_init(p); mpz_init(total); mp_limb_t buf[300]; for (unsigned long n = 1; n < 80 && success; n++) { for (unsigned long depth = 0; ((n*FLINT_BITS) % (1 << depth) == 0) && success; depth++) { for (unsigned long m = 1; m < n/4 && success; m++) { for (unsigned long k = 0; k < 5 && success; k++) { #if DEBUG printf("n = %ld, depth = %ld, m = %ld, k = %ld\n", n, depth, m, k); #endif ZmodF_poly_t poly; ZmodF_poly_init(poly, depth, m+k, 1); // p := B^n + 1 mpz_set_ui(p, 1); mpz_mul_2exp(p, p, n*FLINT_BITS); mpz_add_ui(p, p, 1); // q := (B^m + 1)*B^k mpz_set_ui(q, 1); mpz_mul_2exp(q, q, m*FLINT_BITS); mpz_add_ui(q, q, 1); mpz_mul_2exp(q, q, k*FLINT_BITS); // r := B^(m+k) - 1 mpz_set_ui(r, 1); mpz_mul_2exp(r, r, (m+k)*FLINT_BITS); mpz_sub_ui(r, r, 1); // s := B^(m+k)/2 mpz_set_ui(s, 1); mpz_mul_2exp(s, s, (m+k)*FLINT_BITS - 1); for (unsigned long trial = 0; trial < 20 && success; trial++) { mpz_set_ui(total, 0); for (long i = (1 << depth) - 1; i >= 0; i--) { // select random x in (0, B^(m+k)) mpz_set_ui(x, 0); while (!mpz_sgn(x)) { mpz_rrandomb(x, randstate, (m+k)*FLINT_BITS); if (random_ulong(2)) // to get high bit 0 sometimes mpz_sub(x, r, x); } // push it down to (-B^(m+k)/2, B^(m+k)/2) mpz_sub(x, x, s); // add it to running total mpz_mul_2exp(total, total, (n*FLINT_BITS) >> depth); mpz_add(total, total, x); // normalise it into [0, q), and store in polynomial mpz_mod(x, x, q); mpz_to_mpn(poly->coeffs[i], m+k+1, x); } // compare result to target function _ZmodF_mul_fft_combine(buf, poly, m, k, n); ZmodF_normalise(buf, n); mpn_to_mpz(y, buf, n+1); mpz_mod(total, total, p); if (mpz_cmp(total, y)) success = 0; } ZmodF_poly_clear(poly); } } } } mpz_clear(x); mpz_clear(y); mpz_clear(s); mpz_clear(r); mpz_clear(q); mpz_clear(p); mpz_clear(total); return success; } int test__ZmodF_mul_threeway_reduce() { int success = 1; mp_limb_t in[2000]; mp_limb_t out1[2000]; mp_limb_t out2[2000]; mp_limb_t test[2000]; mpz_t x, y, power, power2, mod1, mod2; mpz_init(x); mpz_init(y); mpz_init(power); mpz_init(power2); mpz_init(mod1); mpz_init(mod2); for (unsigned long n = 3; n < 300 && success; n += 3) { #if DEBUG printf("n = %d\n", n); #endif // power = B^n mpz_set_ui(power, 1); mpz_mul_2exp(power, power, n*FLINT_BITS); // power2 = B^(2n/3) mpz_set_ui(power2, 1); mpz_mul_2exp(power2, power2, 2*n/3*FLINT_BITS); // mod1 = B^(n/3) + 1 mpz_set_ui(mod1, 1); mpz_mul_2exp(mod1, mod1, n/3*FLINT_BITS); mpz_add_ui(mod1, mod1, 1); // mod2 = B^(2n/3) - B^(n/3) + 1 mpz_set(mod2, mod1); mpz_mul_2exp(mod2, mod2, n/3*FLINT_BITS); mpz_sub(mod2, mod2, mod1); mpz_sub(mod2, mod2, mod1); mpz_add_ui(mod2, mod2, 3); for (unsigned long trial = 0; trial < 250 && success; trial++) { random_limbs(in, n); in[n] = 0; mpn_to_mpz(x, in, n+1); _ZmodF_mul_threeway_reduce1(out1, in, n/3); ZmodF_normalise(out1, n/3); mpz_mod(y, x, mod1); mpz_to_mpn(test, n/3 + 1, y); if (mpn_cmp(test, out1, n/3 + 1)) success = 0; _ZmodF_mul_threeway_reduce2(out2, in, n/3); mpz_mod(y, x, mod2); mpz_to_mpn(test, 2*n/3, y); if (mpn_cmp(test, out2, 2*n/3)) { // didn't work... check if the "other answer" is correct mpz_add(y, y, mod2); if (mpz_cmp(y, power2) >= 0) success = 0; else { mpz_to_mpn(test, 2*n/3, y); if (mpn_cmp(test, out2, 2*n/3)) success = 0; } } } } mpz_clear(mod2); mpz_clear(mod1); mpz_clear(power2); mpz_clear(power); mpz_clear(y); mpz_clear(x); return success; } int test_ZmodF_mul_info_mul_plain() { int success = 1; mp_limb_t in1[2000]; mp_limb_t in2[2000]; mp_limb_t out[2000]; mpz_t x1, x2, y, z, p; mpz_init(x1); mpz_init(x2); mpz_init(y); mpz_init(z); mpz_init(p); for (unsigned long n = 1; n < 100 && success; n++) { #if DEBUG printf("n = %d\n", n); #endif // p = B^n + 1 mpz_set_ui(p, 1); mpz_mul_2exp(p, p, n*FLINT_BITS); mpz_add_ui(p, p, 1); ZmodF_mul_info_t info; ZmodF_mul_info_init_plain(info, n, 0); for (unsigned long trial = 0; trial < 1000 && success; trial++) { if (random_ulong(4) == 0) { // put in -1 mod p every now and then ZmodF_zero(in1, n); in1[n] = 1; } else { random_limbs(in1, n); in1[n] = 0; } if (random_ulong(4) == 0) { // put in -1 mod p every now and then ZmodF_zero(in2, n); in2[n] = 1; } else { random_limbs(in2, n); in2[n] = 0; } // test multiplication mpn_to_mpz(x1, in1, n+1); mpn_to_mpz(x2, in2, n+1); mpz_mul(z, x1, x2); mpz_mod(z, z, p); ZmodF_mul_info_mul(info, out, in1, in2); ZmodF_normalise(out, n); mpn_to_mpz(y, out, n+1); if (mpz_cmp(y, z)) success = 0; // test squaring mpz_mul(z, x1, x1); mpz_mod(z, z, p); ZmodF_mul_info_mul(info, out, in1, in1); ZmodF_normalise(out, n); mpn_to_mpz(y, out, n+1); if (mpz_cmp(y, z)) success = 0; } ZmodF_mul_info_clear(info); } mpz_clear(x1); mpz_clear(x2); mpz_clear(y); mpz_clear(z); mpz_clear(p); return success; } int test_ZmodF_mul_info_mul_threeway() { int success = 1; mp_limb_t in1[2000]; mp_limb_t in2[2000]; mp_limb_t out_plain[2000]; mp_limb_t out_threeway[2000]; mpz_t x; mpz_init(x); for (unsigned long n = 3; n < 100 && success; n += 3) { #if DEBUG printf("n = %d\n", n); #endif ZmodF_mul_info_t info_plain, info_threeway; ZmodF_mul_info_init_threeway(info_threeway, n, 0); ZmodF_mul_info_init_plain(info_plain, n, 0); for (unsigned long trial = 0; trial < 50000 && success; trial++) { if (random_ulong(4) == 0) { // put in -1 mod p every now and then ZmodF_zero(in1, n); in1[n] = 1; } else { random_limbs(in1, n); in1[n] = 0; } if (random_ulong(4) == 0) { // put in -1 mod p every now and then ZmodF_zero(in2, n); in2[n] = 1; } else { random_limbs(in2, n); in2[n] = 0; } // test multiplication ZmodF_mul_info_mul(info_plain, out_plain, in1, in2); ZmodF_mul_info_mul(info_threeway, out_threeway, in1, in2); ZmodF_normalise(out_plain, n); ZmodF_normalise(out_threeway, n); if (mpn_cmp(out_plain, out_threeway, n+1)) success = 0; // test squaring ZmodF_mul_info_mul(info_plain, out_plain, in1, in1); ZmodF_mul_info_mul(info_threeway, out_threeway, in1, in1); ZmodF_normalise(out_plain, n); ZmodF_normalise(out_threeway, n); if (mpn_cmp(out_plain, out_threeway, n+1)) success = 0; } ZmodF_mul_info_clear(info_plain); ZmodF_mul_info_clear(info_threeway); } mpz_clear(x); return success; } int test_ZmodF_mul_info_mul_fft() { int success = 1; mp_limb_t in1[1000]; mp_limb_t in2[1000]; mp_limb_t out_plain[1000]; mp_limb_t out_fft[1000]; mpz_t x; mpz_init(x); for (unsigned long n = 1; n < 300 && success; n++) { for (unsigned long depth = 1; (n*FLINT_BITS) % (1 << depth) == 0 && (depth <= FLINT_LG_BITS_PER_LIMB + 4) && success; depth++) { unsigned long input_bits = (n*FLINT_BITS) >> depth; unsigned long output_bits = 2*input_bits + 1 + depth; unsigned long target_m = ((output_bits - 1) >> FLINT_LG_BITS_PER_LIMB) + 1; for (unsigned long m = target_m - 2; m <= target_m + 3 && success; m++) { if ((m*FLINT_BITS) % (1 << depth) != 0) continue; for (unsigned long k = 0; k <= 2 && success; k++) { if (m + k < target_m) continue; if (k > m) continue; #if DEBUG printf("n = %ld, depth = %ld, m = %ld, k = %ld\n", n, depth, m, k); #endif ZmodF_mul_info_t info_plain, info_fft; ZmodF_mul_info_init_plain(info_plain, n, 0); ZmodF_mul_info_init_fft(info_fft, n, depth, m, k, 0); for (unsigned long trial = 0; trial < 10 && success; trial++) { if (random_ulong(4) == 0) { // put in -1 mod p every now and then ZmodF_zero(in1, n); in1[n] = 1; } else { random_limbs(in1, n); in1[n] = 0; } if (random_ulong(4) == 0) { // put in -1 mod p every now and then ZmodF_zero(in2, n); in2[n] = 1; } else { random_limbs(in2, n); in2[n] = 0; } // test multiplication ZmodF_mul_info_mul(info_plain, out_plain, in1, in2); ZmodF_mul_info_mul(info_fft, out_fft, in1, in2); ZmodF_normalise(out_plain, n); ZmodF_normalise(out_fft, n); if (mpn_cmp(out_plain, out_fft, n+1)) success = 0; // test squaring ZmodF_mul_info_mul(info_plain, out_plain, in1, in1); ZmodF_mul_info_mul(info_fft, out_fft, in1, in1); ZmodF_normalise(out_plain, n); ZmodF_normalise(out_fft, n); if (mpn_cmp(out_plain, out_fft, n+1)) success = 0; } ZmodF_mul_info_clear(info_fft); ZmodF_mul_info_clear(info_plain); } } } } mpz_clear(x); return success; } /**************************************************************************** Main test functions ****************************************************************************/ #define RUN_TEST(targetfunc) \ printf("Testing " #targetfunc "()... "); \ fflush(stdout); \ success = test_##targetfunc(); \ all_success = all_success && success; \ printf(success ? "ok\n" : "FAIL!\n"); void ZmodF_mul_test_all() { int success, all_success = 1; RUN_TEST(_ZmodF_mul_fft_split); RUN_TEST(_ZmodF_mul_fft_combine); RUN_TEST(_ZmodF_mul_threeway_reduce); RUN_TEST(ZmodF_mul_info_mul_plain); RUN_TEST(ZmodF_mul_info_mul_threeway); RUN_TEST(ZmodF_mul_info_mul_fft); printf(all_success ? "\nAll tests passed\n" : "\nAt least one test FAILED!\n"); } int main() { test_support_init(); ZmodF_mul_test_all(); test_support_cleanup(); flint_stack_cleanup(); return 0; } // end of file **************************************************************** flint-1.011/fmpz.h0000644017361200017500000001723311025357254013656 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** fmpz.h: "flat" multi-precision integer format Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #ifndef FLINT_FMPZ_H #define FLINT_FMPZ_H #ifdef __cplusplus extern "C" { #endif #include #include "memory-manager.h" #include "flint.h" #include "long_extras.h" typedef mp_limb_t * fmpz_t; #define ABS(x) (((long) x < 0) ? -x : x) #define NORM(coeff) \ do { \ if ((coeff)[0]) \ { \ if ((long) (coeff)[0] < 0) \ { \ while ((!(coeff)[-(coeff)[0]]) && (coeff)[0]) (coeff)[0]++; \ } else \ { \ while ((!(coeff)[(coeff)[0]]) && (coeff)[0]) (coeff)[0]--; \ } \ } \ } while (0); void fmpz_check_normalisation(const fmpz_t x); static inline fmpz_t fmpz_init(const unsigned long limbs) { return (fmpz_t) flint_heap_alloc(limbs + 1); } static inline fmpz_t fmpz_realloc(fmpz_t f, const unsigned long limbs) { return (fmpz_t) flint_heap_realloc(f, limbs + 1); } static inline void fmpz_clear(const fmpz_t f) { flint_heap_free(f); } void fmpz_print(fmpz_t in); void fmpz_random_limbs2(fmpz_t x, unsigned long n); static inline unsigned long fmpz_size(const fmpz_t x) { long limb = (long) x[0]; return (unsigned long) ((limb < 0L) ? -limb : limb); } static inline unsigned long fmpz_bits(const fmpz_t x) { unsigned long limbs = FLINT_ABS(x[0]); unsigned long bits = FLINT_BIT_COUNT(x[limbs]); if (limbs == 0) return 0; return (((limbs-1)< 0L) { res[0] = 1L; res[1] = x; } else if (x < 0L) { res[0] = -1L; res[1] = -x; } else res[0] = 0UL; } // returns nonzero if op1 == op2 static inline int fmpz_equal(const fmpz_t op1, const fmpz_t op2) { // if the signs/sizes are different, they can't be equal if (op1[0] != op2[0]) return 0; // compare actual limbs for (long i = 0; i < fmpz_size(op1); i++) { if (op1[i+1] != op2[i+1]) return 0; } return 1; } // sets res := op // doesn't check for aliasing (i.e. if op == res, it will stupidly copy data) // assumes res has enough room static inline void fmpz_set(fmpz_t res, const fmpz_t op) { long i = fmpz_size(op); do { res[i] = op[i]; i--; } while (i >= 0); } // res must have enough space for x void mpz_to_fmpz(fmpz_t res, const mpz_t x); void fmpz_to_mpz(mpz_t res, const fmpz_t x); void fmpz_add(fmpz_t coeffs_out, const fmpz_t in1, const fmpz_t in2); void fmpz_add_ui_inplace(fmpz_t output, const unsigned long x); void fmpz_add_ui(fmpz_t output, const fmpz_t input, const unsigned long x); void __fmpz_add_ui_inplace(fmpz_t output, const unsigned long x); void fmpz_sub(fmpz_t coeffs_out, const fmpz_t in1, const fmpz_t in2); void fmpz_sub_ui_inplace(fmpz_t output, const unsigned long x); void fmpz_sub_ui(fmpz_t output, const fmpz_t input, const unsigned long x); void fmpz_mul(fmpz_t res, const fmpz_t a, const fmpz_t b); void __fmpz_mul(fmpz_t res, const fmpz_t a, const fmpz_t b); void fmpz_mul_ui(fmpz_t output, const fmpz_t input, const unsigned long x); void fmpz_addmul(fmpz_t res, const fmpz_t a, const fmpz_t b); void fmpz_tdiv(fmpz_t res, const fmpz_t a, const fmpz_t b); void fmpz_fdiv(fmpz_t res, const fmpz_t a, const fmpz_t b); void fmpz_tdiv_ui(fmpz_t output, const fmpz_t input, const unsigned long x); unsigned long fmpz_mod_ui(const fmpz_t input, const unsigned long x); void fmpz_pow_ui(fmpz_t output, const fmpz_t input, const unsigned long exp); unsigned long __fmpz_power_of_two(const fmpz_t x); void fmpz_div_2exp(fmpz_t output, fmpz_t x, unsigned long exp); void fmpz_mul_2exp(fmpz_t output, fmpz_t x, unsigned long exp); void fmpz_gcd(fmpz_t output, fmpz_t x1, fmpz_t x2); /* Computes the binomial coefficient next := bin(n, k) given prev = bin(n, k-1) The output is assumed to have enough space for the result, plus one extra limb (for efficiency reasons) Note: bin(n, k) requires at most n bits to represent it when n and k are positive Currently only implemented for positive n and k Todo: implement this for negative n and k */ static inline void __fmpz_binomial_next(fmpz_t next, const fmpz_t prev, const long n, const long k) { fmpz_mul_ui(next, prev, n-k+1); fmpz_tdiv_ui(next, next, k); } static inline int fmpz_is_one(const fmpz_t f) { if (f[0] == 1L) return (f[1] == 1L); else return 0; } static inline int fmpz_is_zero(const fmpz_t f) { return (f[0] == 0L); } static inline void __fmpz_normalise(const fmpz_t f) { NORM(f); } static inline int fmpz_cmpabs(const fmpz_t f1, const fmpz_t f2) { unsigned long size1 = FLINT_ABS(f1[0]); unsigned long size2 = FLINT_ABS(f2[0]); if (size1 < size2) return -1; if (size1 > size2) return 1; return mpn_cmp(f1 + 1, f2 + 1, size1); } void fmpz_sqrtrem(fmpz_t sqrt, fmpz_t rem, fmpz_t n); /* Computes the unique integer mod m1*m2 which is r1 mod m1 and r2 mod m2 where m1 is an fmpz_t and m2 is a coprime unsigned long. Assumes both m1 and m2 are reduced modulo their respective moduli. c must be set to m1^{-1} mod m2. pre must be set to a precomputed inverse of m2 Assumes the number of bits of m2 is at most FLINT_D_BITS-1 */ static inline void fmpz_CRT_ui_precomp(fmpz_t out, fmpz_t r1, fmpz_t m1, unsigned long r2, unsigned long m2, unsigned long c, double pre) { unsigned long r1mod = fmpz_mod_ui(r1, m2); unsigned long s = z_submod(r2, r1mod, m2); s = z_mulmod_precomp(s, c, m2, pre); fmpz_t sm1 = fmpz_init(m1[0] + 1); fmpz_mul_ui(sm1, m1, s); fmpz_add(out, r1, sm1); fmpz_clear(sm1); } /* As for fmpz_CRT_ui_precomp except that it assumes the number of bits of m2 is at most FLINT_BITS-1 */ static inline void fmpz_CRT_ui2_precomp(fmpz_t out, fmpz_t r1, fmpz_t m1, unsigned long r2, unsigned long m2, unsigned long c, double pre) { unsigned long r1mod = fmpz_mod_ui(r1, m2); unsigned long s = z_submod(r2, r1mod, m2); s = z_mulmod2_precomp(s, c, m2, pre); fmpz_t sm1 = fmpz_init(m1[0] + 1); fmpz_mul_ui(sm1, m1, s); fmpz_add(out, r1, sm1); fmpz_clear(sm1); } #ifdef __cplusplus } #endif #endif // *************** end of file flint-1.011/todo.txt0000644017361200017500000002320611025357254014234 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ === general stuff === * try GREDC algorithm * we should be checking somewhere (at build time?) that FLINT_BITS really is the same as GMP's bits per limb * cache hints: * add some documentation for cache hint macros in flint.h * instead of having loops like for (j = 0; j < x->n; j += 8) FLINT_PREFETCH(x->coeffs[i+8], j); everywhere, can't we have a FLINT_BLOCK_PREFETCH macro? * determine cache size at build time, instead of #defined in flint.h!!!! * possibly determine size of each cache in hierarchy? * Collect together timing statistics on various programs (MAGMA, NTL, PARI, FLINT, etc), in one convenient place. (David) * Write an asymptotically fast GCD algorithm for the Z package for very large integers. (First check how far along the GMP implementation is, and compare it speedwise to MAGMA (which presumably is very fast :-) ) * follow up issue related to arithmetic right shifts --- see NTL's #define for this. Add test code to the build process to check for this. * write threadsafe limb allocator === fft === * GMP has a cool idea for doing very long butterflies. Instead of doing a call to mpn_add_n and mpn_sub_n separately, it works in chunks to ensure everything is done in cache. This will only make a difference when the coefficients are so large they don't even fit in L1 any more. To do this *properly* it would need to work for rotations as well. * Is there a way to do reduce_mod_p_exact with one pass over the data in the worst case? (Currently the worst case is two passes.) * study the the overflow bit guarantees more carefully in the inverse transform. The cross butterflies seem to add a factor of 3 rather than 2 to the errors. Currently we are being too conservative in doing fast reductions; we could get away with fewer, but would need to do a safety reduction pass over the data after a certain number of layers. * maybe it's not optimal to store the coefficients n+1 limbs apart. Bill mentioned some potential cache-thrashing issues on intels. Might be better to add a bit of padding. === fmpz_poly module === * for KS, try the idea where we evaluate X1 = f(2^n)g(2^n), X2 = f(-2^n)g(-2^n) and then reconstruct output from X1+X2 and X1-X2 * It seems a little nuts to be using ZmodF_poly with a single coefficient in _fmpz_poly_mul_KS(). There's something wrong with the code structure here. The bitpacking etc routines need to be abstracted differently or something. -- david * we should change the name of ABS() in fmpz_poly. This will certainly interfere with someone else's namespace one day. -- david === modpmul module (branch) === * try writing assembly version of mul_redc. Maybe the three multiplications can be pipelined. Also, it might be possible to write a version which does two independent multiplications in parallel, and gets some pipelining happening that way. But that would require rewriting FFTs to take advantage of it. * consider writing a radix-4 or split-radix FFT. I don't understand these too well, but it seems like these only give a speedup on "complex" data. There was a paper Bill mentioned that simulates a "complex" FFT by working over GF(p^2), so perhaps this could be used. * see whether doing two FFT's at once saves time on computing roots of unity and whether the multiplications being data independent might allow them to be interleaved and thus overlapped due to pipelining on the Opteron. * try to speed up basecase matrix transposition code * examine NTL's modmuls more carefully. Benchmark just the modmuls by themselves. * think about the ordering of loops in each stage of the outer fft routine. Sometimes might be slightly better to start at the end and work backwards to improve locality. === ZmodF module === * in revision 507, I rewrote ZmodF_mul_2exp(). For long coefficients the new version should be more efficient, since it does fewer passes over the data on average. But this needs to be checked. Moreover it's quite possible the new version is slower for small coefficients, which also needs to be investigated. === ZmodF_poly module === * reorganise code to permit doing pointwise mults + inner FFTs + inner IFFTs with better locality === ZmodF_mul module === * for squaring, we don't need to allocate so much memory === Miscellaneous === * Write new faster bitpacking routines - see branch (include support for 32 bit machines) * change FLINT_LG_BITS_PER_LIMB to FLINT_LG_BITS, same for BYTES, etc * use mpn_random2 in more places instead of mpz_randombb, in fact come up with some FLINT wide macros perhaps * Add documents found by Tomasz to FLINT website * Add basic non-truncated non-cache friendly FFT and convolution. * Add hard coded small FFT's, etc. * Add fpLLL, GMP-ECM, mpfr, gf2x * Optimise fmpz_poly addition, since it is slower than the mpz_poly version and used by a *lot* * Implement odd/even karatsuba * Make multiplication shift when there are trailing zeroes * Make powering shift when there are trailing zeroes * Make division code use Newton division when monic B and tune the crossover to Newton division in the other cases * Clean up code for classical poly division * Make integer multiplication tuning code switch to making things divisible by 3 only when 3-way has a chance of being faster * Clean up recursive poly multiplication code * Add poly GCD * Retune almost everything and factor out tuning constants (including the one in flint.h) * Optimise division functions for small coefficients (precomputed inverses?) and particularly for monic B * Implement Graeffe and Kung's tricks * Implement David's KS trick * Implement Mulders' short product * Make F_mpz_mul deal with powers of 2 factors * Make QS polynomial selection work for very small factorisations * Write code to time all manner of basic things such as modmuls, in and out of cache memory acccesses etc * Add stack based memory management back into recursive division functions where this improves performance, but do it neatly and safely perhaps using some macros * Clean up fmpz_poly by putting as much as possible into fmpz * Do lots more profiling and put it on the website * Clean up fmpz_poly test code, making much more of it readable with extensive use of macros. * Write aliasing test code for series division * Implement REDC * Implement a small prime FFT * Implement middle product for series division * Reimplement splitting * Comment code * Implement algorithms for exact division and exact scalar division * Implement recursive algorithm to check if polynomial is divisible by another * Implement Karatsuba and classical squaring algorithms * Implement power series module * Hash tables for QS * Get tinyQS working properly * Save making a copy in fmpz_div_2exp * Make documentation point out where aliasing shouldn't occur in division functions and in fmpz * Implement polynomial evaluation * Implement polynomial derivative * Implement polynomial composition * Implement polynomial remainder and pseudo remainder * Implement polynomial translation f(x+t) * Implement polynomial is_zero and is_one * Support in place polynomial negation * In zmod_poly make truncate in place always in line with fmpz_poly * In zmod_poly make set_coeff set_coeff_ui in line with fmpz_poly * In fmpz_poly_div_series and zmod_poly_div_series, allow A and B to be aliased * In fmpz_poly_div_series and zmod_poly_div_series, deal with special case where the numerator is the series 1 by calling newton_invert * In fmpz_poly_div_newton the special case doesn't need to set a coefficient to zero then normalise * Have zmod_poly_divrem_newton switch out to divrem_divconquer for small lengths * Write test functions for long_extras gcd functions * Decide if zmod_poly gcd functions should return 1 if polys are coprime, or just return any unit. Decide if the gcd ought to be monic. * Speed up CRT by checking if any new coefficients have more limbs than the old coefficient and only doing the full equality comparison if not. Also get rid of the allocation of scratch space in the fmpz_CRT_precomp routines. * Add FFT caching to newton inversion functions * Try unrolling loops in mpn_extras, but beware compiler flags may already make this happen. * Check that aliasing checks both poly1 == poly2 and poly1->length == poly2->length for unmanaged functions, but only the former for managed functions. * Remove NTL as a build dependency of FLINT * Make the functions which read a polynomial from a string, set the size of the coefficients before reading in). * Write a z_div_precomp function flint-1.011/mpz_poly-tuning.c0000644017361200017500000000230711025357254016044 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /* Tuning values for mpz_poly module Automatically generated by mpz_poly-tune program */ #include "mpz_poly-tuning.h" #include "mpz_poly.h" unsigned long mpz_poly_kara_crossover_table[] = {9, 7, 6, 5, 4, 4, 3, 0}; unsigned long mpz_poly_kara_crossover_table_size = 8; // end of file ********************************* flint-1.011/fmpz.c0000644017361200017500000005117311025357254013652 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** fmpz.c: "flat" integer format Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #include #include #include #include "fmpz.h" #include "flint.h" #include "memory-manager.h" #include "longlong_wrapper.h" #include "longlong.h" #include "mpn_extras.h" #include "F_mpn_mul-tuning.h" #include "long_extras.h" #define SWAP_PTRS(x_dummy_p, y_dummy_p) \ do { \ fmpz_t swap_temp_p = x_dummy_p; \ x_dummy_p = y_dummy_p; \ y_dummy_p = swap_temp_p; \ } while(0); void fmpz_check_normalisation(const fmpz_t x) { if ((x[0]) && (!x[ABS(x[0])])) { printf("Error: fmpz_t not normalised!\n"); abort(); } } void mpz_to_fmpz(fmpz_t res, const mpz_t x) { if (mpz_sgn(x)) { size_t countp; mpz_export(res + 1, &countp, -1, sizeof(mp_limb_t), 0, 0, x); res[0] = ((long) mpz_sgn(x) > 0L) ? (long) countp : (long) -countp; } else res[0] = 0L; } void fmpz_to_mpz(mpz_t res, const fmpz_t x) { long size = x[0]; if (size == 0) mpz_set_ui(res, 0); else { mpz_import(res, ABS(size), -1, sizeof(mp_limb_t), 0, 0, x + 1); if (size < 0) mpz_neg(res, res); } } void fmpz_print(fmpz_t in) { mpz_t coeff; mpz_init(coeff); fmpz_to_mpz(coeff, in); gmp_printf("%Zd", coeff); mpz_clear(coeff); } /* Generate a random fmpz_t with n limbs with longs strings of 1's and 0's */ void fmpz_random_limbs2(fmpz_t x, unsigned long n) { if (n == 0) { x[0] = 0L; return; } mpn_random2(x + 1, n); x[0] = n; } /* Adds two fmpz's together */ void fmpz_add(fmpz_t coeffs_out, const fmpz_t in1, const fmpz_t in2) { fmpz_t coeffs1 = in1; fmpz_t coeffs2 = in2; long carry; unsigned long size1 = ABS(coeffs1[0]); unsigned long size2 = ABS(coeffs2[0]); if (size1 < size2) { SWAP_PTRS(coeffs1, coeffs2); size1 = ABS(coeffs1[0]); size2 = ABS(coeffs2[0]); } if (!size1) { if (!size2) coeffs_out[0] = 0L; else { if (coeffs_out != coeffs2) F_mpn_copy(coeffs_out, coeffs2, size2+1); } } else if (!size2) { if (coeffs_out != coeffs1) F_mpn_copy(coeffs_out, coeffs1, size1+1); } else if ((long) (coeffs1[0] ^ coeffs2[0]) >= 0L) { coeffs_out[0] = coeffs1[0]; carry = mpn_add(coeffs_out+1, coeffs1+1, size1, coeffs2+1, size2); if (carry) { coeffs_out[size1+1] = carry; if ((long) coeffs_out[0] < 0L) coeffs_out[0]--; else coeffs_out[0]++; } } else { carry = 0; if (size1 != size2) carry = 1; else carry = mpn_cmp(coeffs1+1, coeffs2+1, size1); if (carry == 0) coeffs_out[0] = 0L; else if (carry > 0) { mpn_sub(coeffs_out+1, coeffs1+1, size1, coeffs2+1, size2); coeffs_out[0] = coeffs1[0]; NORM(coeffs_out); } else { mpn_sub_n(coeffs_out+1, coeffs2+1, coeffs1+1, size1); coeffs_out[0] = -coeffs1[0]; NORM(coeffs_out); } } } /* Add an unsigned long to an fmpz, inplace */ void fmpz_add_ui_inplace(fmpz_t output, const unsigned long x) { unsigned long carry; if (x) { if (!output[0]) { output[1] = x; output[0] = 1; } else if ((long) output[0] > 0) { carry = mpn_add_1(output + 1, output + 1, output[0], x); if (carry) { output[output[0]+1] = carry; output[0]++; } } else if ((long) output[0] < -1L) { mpn_sub_1(output + 1, output + 1, ABS(output[0]), x); NORM(output); } else { if (x <= output[1]) { output[1] -= x; if (!output[1]) output[0] = 0; } else { output[1] = x - output[1]; output[0] = 1; } } } } /* Add an unsigned long to an fmpz, inplace */ void fmpz_add_ui(fmpz_t output, const fmpz_t input, const unsigned long x) { unsigned long carry; if (x) { if (!input[0]) { output[1] = x; output[0] = 1; } else if ((long) input[0] > 0) { carry = mpn_add_1(output + 1, input + 1, input[0], x); output[0] = input[0]; if (carry) { output[output[0]+1] = carry; output[0]++; } } else if ((long) input[0] < -1L) { mpn_sub_1(output + 1, input + 1, ABS(input[0]), x); output[0] = input[0]; NORM(output); } else { if (x <= input[1]) { output[1] = input[1] - x; if (!output[1]) output[0] = 0L; else output[0] = -1L; } else { output[1] = x - input[1]; output[0] = 1L; } } } else { fmpz_set(output, input); } } /* Add an unsigned long to a coefficient. Assumes the output coefficient is non-negative. */ void __fmpz_add_ui_inplace(fmpz_t output, const unsigned long x) { unsigned long carry; if (x) { if (!output[0]) { output[1] = x; output[0] = 1; } else { carry = mpn_add_1(output + 1, output + 1, output[0], x); if (carry) { output[output[0]+1] = carry; output[0]++; } } } } void fmpz_sub(fmpz_t coeffs_out, const fmpz_t in1, const fmpz_t in2) { fmpz_t coeffs1 = in1; fmpz_t coeffs2 = in2; long carry; unsigned long size1 = ABS(coeffs1[0]); unsigned long size2 = ABS(coeffs2[0]); int in_order = 1; if (size1 < size2) { SWAP_PTRS(coeffs1, coeffs2); size1 = ABS(coeffs1[0]); size2 = ABS(coeffs2[0]); in_order = 0; } if (!size1) { if (!size2) coeffs_out[0] = 0L; else { if (coeffs2 != coeffs_out) F_mpn_copy(coeffs_out, coeffs2, size2+1); if (in_order) coeffs_out[0] = -coeffs_out[0]; } } else if (!size2) { if (coeffs1 != coeffs_out) F_mpn_copy(coeffs_out, coeffs1, size1+1); if (!in_order) coeffs_out[0] = -coeffs_out[0]; } else if ((long) (coeffs1[0] ^ coeffs2[0]) < 0) { if (in_order) coeffs_out[0] = coeffs1[0]; else coeffs_out[0] = -coeffs1[0]; carry = mpn_add(coeffs_out+1, coeffs1+1, size1, coeffs2+1, size2); if (carry) { coeffs_out[size1+1] = carry; if ((long) coeffs_out[0] < 0) coeffs_out[0]--; else coeffs_out[0]++; } } else { carry = 0; if (size1 != size2) carry = 1; else carry = mpn_cmp(coeffs1+1, coeffs2+1, size1); if (carry == 0) coeffs_out[0] = 0L; else if (carry > 0) { mpn_sub(coeffs_out+1, coeffs1+1, size1, coeffs2+1, size2); if (in_order) coeffs_out[0] = coeffs1[0]; else coeffs_out[0] = -coeffs1[0]; NORM(coeffs_out); } else { mpn_sub_n(coeffs_out+1, coeffs2+1, coeffs1+1, size1); if (in_order) coeffs_out[0] = -coeffs1[0]; else coeffs_out[0] = coeffs1[0]; NORM(coeffs_out); } } } void fmpz_sub_ui_inplace(fmpz_t output, const unsigned long x) { unsigned long carry; if (x) { if (!output[0]) { output[1] = x; output[0] = -1L; } else if ((long) output[0] < 0) { carry = mpn_add_1(output + 1, output + 1, ABS(output[0]), x); if (carry) { output[ABS(output[0])+1] = carry; output[0]--; } } else if ((long) output[0] > 1L) { mpn_sub_1(output + 1, output + 1, output[0], x); NORM(output); } else { if (x <= output[1]) { output[1] -= x; if (!output[1]) output[0] = 0; } else { output[1] = x - output[1]; output[0] = -1L; } } } } void fmpz_sub_ui(fmpz_t output, const fmpz_t input, const unsigned long x) { unsigned long carry; if (x) { if (!input[0]) { output[1] = x; output[0] = -1L; } else if ((long) input[0] < 0) { carry = mpn_add_1(output + 1, input + 1, ABS(input[0]), x); output[0] = input[0]; if (carry) { output[ABS(output[0])+1] = carry; output[0]--; } } else if ((long) input[0] > 1L) { mpn_sub_1(output + 1, input + 1, input[0], x); output[0] = input[0]; NORM(output); } else { if (x <= input[1]) { output[1] = input[1] - x; if (!output[1]) output[0] = 0; else output[0] = 1L; } else { output[1] = x - input[1]; output[0] = -1L; } } } else { fmpz_set(output, input); } } /* Multiplies two fmpz's Assumes no overlap */ void fmpz_mul(fmpz_t res, const fmpz_t a, const fmpz_t b) { long a0 = a[0]; long b0 = b[0]; unsigned long sizea = FLINT_ABS(a0); unsigned long sizeb = FLINT_ABS(b0); while ((!a[sizea]) && (sizea)) sizea--; while ((!b[sizeb]) && (sizeb)) sizeb--; mp_limb_t mslimb; fmpz_t temp; if ((sizea == 0) || (sizeb == 0)) { res[0] = 0; } else if (sizea + sizeb < 100) { temp = (fmpz_t) flint_stack_alloc_small(sizea + sizeb + 1); if (sizea >= sizeb) mslimb = mpn_mul(temp+1, a+1, sizea, b+1, sizeb); else mslimb = mpn_mul(temp+1, b+1, sizeb, a+1, sizea); temp[0] = sizea + sizeb - (mslimb == 0); F_mpn_copy(res, temp, temp[0]+1); if ((long) (a0 ^ b0) < 0) res[0] = -res[0]; flint_stack_release_small(); } else if (sizea + sizeb < 2*FLINT_FFT_LIMBS_CROSSOVER) { temp = (fmpz_t) flint_stack_alloc(sizea + sizeb + 1); if (sizea >= sizeb) mslimb = mpn_mul(temp+1, a+1, sizea, b+1, sizeb); else mslimb = mpn_mul(temp+1, b+1, sizeb, a+1, sizea); temp[0] = sizea + sizeb - (mslimb == 0); F_mpn_copy(res, temp, temp[0]+1); if ((long) (a0 ^ b0) < 0) res[0] = -res[0]; flint_stack_release(); } else { if (sizea >= sizeb) mslimb = F_mpn_mul(res+1, a+1, sizea, b+1, sizeb); else mslimb = F_mpn_mul(res+1, b+1, sizeb, a+1, sizea); res[0] = sizea+sizeb - (mslimb == 0); if ((long) (a0 ^ b0) < 0) res[0] = -res[0]; } } /* Used internally by fmpz_poly. Multiplies two fmpz's but assumes res has enough space to contain the number of limbs in _a_ plus the number of limbs of _b_, whenever this sum is less than 2*FLINT_FFT_LIMBS_CROSSOVER Assumes no overlap */ void __fmpz_mul(fmpz_t res, const fmpz_t a, const fmpz_t b) { long a0 = a[0]; long b0 = b[0]; unsigned long sizea = FLINT_ABS(a0); unsigned long sizeb = FLINT_ABS(b0); while ((!a[sizea]) && (sizea)) sizea--; while ((!b[sizeb]) && (sizeb)) sizeb--; mp_limb_t mslimb; fmpz_t temp; if ((sizea == 0) || (sizeb == 0)) { res[0] = 0; } else if (sizea + sizeb < 100) { if (sizea >= sizeb) mslimb = mpn_mul(res+1, a+1, sizea, b+1, sizeb); else mslimb = mpn_mul(res+1, b+1, sizeb, a+1, sizea); res[0] = sizea + sizeb - (mslimb == 0); if ((long) (a[0] ^ b[0]) < 0) res[0] = -res[0]; } else { if (sizea >= sizeb) mslimb = F_mpn_mul(res+1, a+1, sizea, b+1, sizeb); else mslimb = F_mpn_mul(res+1, b+1, sizeb, a+1, sizea); res[0] = sizea+sizeb - (mslimb == 0); if ((long) (a[0] ^ b[0]) < 0) res[0] = -res[0]; } } void fmpz_mul_ui(fmpz_t output, const fmpz_t input, const unsigned long x) { if (x == 0) { output[0] = 0; return; } mp_limb_t mslimb; if (output[0] = input[0]) // This isn't a typo { mslimb = mpn_mul_1(output+1, input+1, FLINT_ABS(input[0]), x); if (mslimb) { output[FLINT_ABS(input[0])+1] = mslimb; if ((long) output[0] > 0) output[0]++; else output[0]--; } } } /* Sets res to res+a*b Assumes no overlap */ void fmpz_addmul(fmpz_t res, const fmpz_t a, const fmpz_t b) { long a0 = a[0]; long b0 = b[0]; unsigned long sizea = FLINT_ABS(a0); unsigned long sizeb = FLINT_ABS(b0); while ((!a[sizea]) && (sizea)) sizea--; while ((!b[sizeb]) && (sizeb)) sizeb--; fmpz_t temp; mp_limb_t mslimb; if (sizea && sizeb) { if (sizea + sizeb < 100) { temp = (fmpz_t) flint_stack_alloc_small(sizea + sizeb + 1); if (sizea >= sizeb) mslimb = mpn_mul(temp+1, a+1, sizea, b+1, sizeb); else mslimb = mpn_mul(temp+1, b+1, sizeb, a+1, sizea); temp[0] = sizea + sizeb - (mslimb == 0); if ((long) (a[0] ^ b[0]) < 0) temp[0] = -temp[0]; fmpz_add(res, res, temp); flint_stack_release_small(); } else { temp = (fmpz_t) flint_stack_alloc(sizea + sizeb + 1); if (sizea >= sizeb) mslimb = F_mpn_mul(temp+1, a+1, sizea, b+1, sizeb); else mslimb = F_mpn_mul(temp+1, b+1, sizeb, a+1, sizea); temp[0] = sizea + sizeb - (mslimb == 0); if ((long) (a[0] ^ b[0]) < 0) temp[0] = -temp[0]; fmpz_add(res, res, temp); flint_stack_release(); } } } /* Sets res to a / b Assumes no overlap Rounding occurs towards zero */ void fmpz_tdiv(fmpz_t res, const fmpz_t a, const fmpz_t b) { long a0 = a[0]; long b0 = b[0]; unsigned long sizea = FLINT_ABS(a0); unsigned long sizeb = FLINT_ABS(b0); while ((!a[sizea]) && (sizea)) sizea--; while ((!b[sizeb]) && (sizeb)) sizeb--; mp_limb_t mslimb; fmpz_t temp; if (sizeb == 0) { printf("Error: division by zero!\n"); abort(); } else if (sizea < sizeb) // Todo: make this deal with sizea == sizeb but a < b { res[0] = 0; } else { temp = (fmpz_t) flint_stack_alloc(sizeb); mpn_tdiv_qr(res+1, temp, 0, a+1, sizea, b+1, sizeb); res[0] = sizea - sizeb + 1; if ((long) (a0 ^ b0) < 0) res[0] = -res[0]; flint_stack_release(); } NORM(res); } /* Sets res to a / b Assumes no overlap Rounding occurs towards minus infinity */ void fmpz_fdiv(fmpz_t res, const fmpz_t a, const fmpz_t b) { long a0 = a[0]; long b0 = b[0]; unsigned long sizea = FLINT_ABS(a0); unsigned long sizeb = FLINT_ABS(b0); while ((!a[sizea]) && (sizea)) sizea--; while ((!b[sizeb]) && (sizeb)) sizeb--; mp_limb_t mslimb; fmpz_t temp; if (sizeb == 0) { printf("Error: division by zero!\n"); abort(); } else if (sizea < sizeb) // Todo: make this deal with sizea == sizeb but a < b { if (((long) (a0 ^ b0) < 0L) && (a0)) { res[0] = -1L; res[1] = 1; } else res[0] = 0; return; } else { temp = (fmpz_t) flint_stack_alloc(sizeb); mpn_tdiv_qr(res+1, temp, 0, a+1, sizea, b+1, sizeb); res[0] = sizea - sizeb + 1; if ((long) (a0 ^ b0) < 0L) res[0] = -res[0]; NORM(res); if ((long) (a0 ^ b0) < 0L) { unsigned long i = 0; for (; i < sizeb; i++) { if (temp[i]) break; } if (i < sizeb) { fmpz_sub_ui_inplace(res, 1UL); } } flint_stack_release(); } } void fmpz_tdiv_ui(fmpz_t output, const fmpz_t input, const unsigned long x) { output[0] = input[0]; unsigned long size = FLINT_ABS(input[0]); mpn_divmod_1(output+1, input+1, size, x); NORM(output); } /* Returns input % x. Output will be reduced mod x. */ unsigned long fmpz_mod_ui(const fmpz_t input, const unsigned long x) { unsigned long size = FLINT_ABS(input[0]); unsigned long mod; mod = mpn_mod_1(input+1, size, x); if (!mod) return mod; else if ((long) input[0] < 0L) { return x - mod; } else return mod; } /* Raise input to the power exp Very simplistic at this point. It just converts to an mpz_t and uses GMP's mpz_pow_ui function */ void fmpz_pow_ui(fmpz_t output, const fmpz_t input, const unsigned long exp) { mpz_t power; mpz_init(power); fmpz_to_mpz(power, input); mpz_pow_ui(power, power, exp); mpz_to_fmpz(output, power); mpz_clear(power); } unsigned long __fmpz_power_of_two(const fmpz_t x) { if (x[0] == 0) return -1L; return mpn_scan1(x + 1, 0); } void fmpz_mul_2exp(fmpz_t output, fmpz_t x, unsigned long exp) { unsigned long limbs = (exp >> FLINT_LG_BITS_PER_LIMB); unsigned long bits = (exp & (FLINT_BITS - 1)); mp_limb_t msl = 0L; if (x[0] == 0) { output[0] = 0L; return; } if (bits) { msl = mpn_lshift(output + limbs + 1, x + 1, FLINT_ABS(x[0]), bits); if (msl) output[limbs + FLINT_ABS(x[0]) + 1] = msl; } else F_mpn_copy(output + limbs + 1, x + 1, FLINT_ABS(x[0])); if (limbs) F_mpn_clear(output + 1, limbs); if ((long) x[0] >= 0L) output[0] = x[0] + limbs + (msl != 0L); else output[0] = x[0] - limbs - (msl != 0L); } void fmpz_div_2exp(fmpz_t output, fmpz_t x, unsigned long exp) { unsigned long limbs = (exp >> FLINT_LG_BITS_PER_LIMB); unsigned long bits = (exp & (FLINT_BITS - 1)); if ((x[0] == 0) || (limbs >= FLINT_ABS(x[0]))) { output[0] = 0L; return; } if (bits) { fmpz_t temp = fmpz_init(FLINT_ABS(x[0]) - limbs); mpn_rshift(temp + 1, x + limbs + 1, FLINT_ABS(x[0]) - limbs, bits); if ((long) x[0] >= 0L) temp[0] = x[0] - limbs; else temp[0] = limbs + x[0]; NORM(temp); fmpz_set(output, temp); fmpz_clear(temp); } else { F_mpn_copy(output + 1, x + limbs + 1, FLINT_ABS(x[0]) - limbs); if ((long) x[0] >= 0L) output[0] = x[0] - limbs; else output[0] = limbs + x[0]; } } void fmpz_gcd(fmpz_t output, fmpz_t x1, fmpz_t x2) { if (x1[0] == 0) { fmpz_set(output, x2); if ((long) output[0] < 0L) output[0] = -output[0]; return; } if (x2[0] == 0) { fmpz_set(output, x1); if ((long) output[0] < 0L) output[0] = -output[0]; return; } unsigned long twos1 = __fmpz_power_of_two(x1); unsigned long twos2 = __fmpz_power_of_two(x2); unsigned long n1, n2; fmpz_t a1 = fmpz_init(FLINT_ABS(x1[0]) - (twos1 >> FLINT_LG_BITS_PER_LIMB)); fmpz_t a2 = fmpz_init(FLINT_ABS(x2[0]) - (twos2 >> FLINT_LG_BITS_PER_LIMB)); fmpz_div_2exp(a1, x1, twos1); if ((long) a1[0] < 0L) a1[0] = -a1[0]; fmpz_div_2exp(a2, x2, twos2); if ((long) a2[0] < 0L) a2[0] = -a2[0]; if (fmpz_is_one(a1) || fmpz_is_one(a2)) { fmpz_set_ui(output, 1UL); } else { n1 = FLINT_ABS(a1[0]); n2 = FLINT_ABS(a2[0]); if (fmpz_bits(a1) >= fmpz_bits(a2)) output[0] = mpn_gcd(output + 1, a1 + 1, n1, a2 + 1, n2); else output[0] = mpn_gcd(output + 1, a2 + 1, n2, a1 + 1, n1); } unsigned long min = FLINT_MIN(twos1, twos2); fmpz_mul_2exp(output, output, min); fmpz_clear(a1); fmpz_clear(a2); } /* Sets sqrt to the square root of n and sets rem to the remainder No aliasing of sqrt and n */ void fmpz_sqrtrem(fmpz_t sqrt, fmpz_t rem, fmpz_t n) { long size = n[0]; if (size < 0L) { printf("Cannot take the square root of a negative number!\n"); abort(); } if (!size) { fmpz_set_ui(sqrt, 0L); fmpz_set_ui(rem, 0L); return; } rem[0] = mpn_sqrtrem(sqrt+1, rem+1, n+1, size); sqrt[0] = (size+1)/2; } // *************** end of file flint-1.011/kara-profile.c0000644017361200017500000000717211025357254015252 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** kara-profile.c Comparative profiling for karatsuba multiplication Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #include "profiler.h" #include "flint.h" #include "test-support.h" #include "mpz_poly.h" #include "fmpz_poly.h" #include typedef struct { unsigned long length1, length2, bits1, bits2; int which; // 0 for mpz_poly, 1 for fmpz_poly } arg_t; void target(void* y, unsigned long count) { arg_t* arg = (arg_t*) y; mpz_t x; mpz_init(x); mpz_poly_t in1, in2, out; mpz_poly_init(in1); mpz_poly_init(in2); mpz_poly_init(out); fmpz_poly_t in1f, in2f, outf; fmpz_poly_init2(in1f, arg->length1, (arg->bits1-1)/FLINT_BITS+1); fmpz_poly_init2(in2f, arg->length2, (arg->bits2-1)/FLINT_BITS+1); _fmpz_poly_stack_init(outf, arg->length1 + arg->length2 - 1, in1f->limbs + in2f->limbs + 1); for (unsigned long i = 0; i < arg->length1; i++) { mpz_urandomb(x, randstate, arg->bits1); mpz_poly_set_coeff(in1, i, x); } mpz_poly_to_fmpz_poly(in1f, in1); for (unsigned long i = 0; i < arg->length2; i++) { mpz_urandomb(x, randstate, arg->bits2); mpz_poly_set_coeff(in2, i, x); } mpz_poly_to_fmpz_poly(in2f, in2); start_clock(0); if (arg->which) { for (unsigned long i = 0; i < count; i++) _fmpz_poly_mul_karatsuba(outf, in1f, in2f); } else { for (unsigned long i = 0; i < count; i++) mpz_poly_mul_karatsuba(out, in1, in2); } stop_clock(0); _fmpz_poly_stack_clear(outf); fmpz_poly_clear(in2f); fmpz_poly_clear(in1f); mpz_poly_clear(out); mpz_poly_clear(in2); mpz_poly_clear(in1); mpz_clear(x); } /* command line arguments are: length1, length2, bits1, bits2 */ int main(int argc, char* argv[]) { if (argc != 5) { printf("expected four arguments (see source)\n"); return 0; } test_support_init(); arg_t arg; arg.length1 = atoi(argv[1]); arg.length2 = atoi(argv[2]); arg.bits1 = atoi(argv[3]); arg.bits2 = atoi(argv[4]); double min_time, max_time; for (unsigned long i = 0; i < 3; i++) { arg.which = 0; prof_repeat(&min_time, &max_time, target, &arg); printf(" mpz_poly: min = %.3le, \tmax = %.3le\n", min_time, max_time); fflush(stdout); arg.which = 1; prof_repeat(&min_time, &max_time, target, &arg); printf("fmpz_poly: min = %.3le, \tmax = %.3le\n", min_time, max_time); fflush(stdout); } test_support_cleanup(); return 0; } // end of file **************************************************************** flint-1.011/fmpz_poly.h0000644017361200017500000012272011025357254014717 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** fmpz_poly.h: Polynomials over Z, implemented as contiguous block of fmpz_t's Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #ifndef FLINT_FMPZ_POLY_H #define FLINT_FMPZ_POLY_H #ifdef __cplusplus extern "C" { #endif #include #include #include #include "mpn_extras.h" #include "fmpz.h" #include "zmod_poly.h" /**************************************************************************** fmpz_poly_t ----------- fmpz_poly_t represents a dense polynomial in Z[x] using a single block of memory to hold all the coefficients. This type is better suited to handling very dense polynomials with relatively small coefficients, where the memory management overhead of Zpoly_t would be too expensive. "coeffs" is an array of limbs of length (alloc * (limbs+1)). Each coefficient uses limbs+1 limbs. For each coefficient, the first limb is a sign/size limb: the number of limbs of the absolute value of the coefficient is given by the absolute value of this limb, and the sign of this limb is the sign of the coefficient. (Zero is stored as a sign/size of zero followed by arbitrary data.) The remaining "limbs" limbs represent the absolute value of the coefficient, stored in GMP's mpn format. Only the first "length" coefficients actually represent coefficients of the polynomial; i.e. it's a polynomial of degree at most length-1. If length == 0, this is the zero polynomial. All functions normalise so that the (length-1)-th coefficient is non-zero. Obviously always alloc >= length. There are two classes of functions operating on fmpz_poly_t: -- The _fmpz_poly_* functions NEVER free or reallocate "coeffs", so they don't care how "coeffs" was allocated, and they never even look at the "alloc" attribute. They always assume the output has enough space for the result. They also NEVER modify the limbs attribute (since this would screw up the block size). -- The fmpz_poly_* functions ASSUME that "coeffs" was allocated via flint_heap_alloc, and they MAY free or reallocate "coeffs" using flint_heap_realloc, flint_heap_free etc, whenever they feel the need. */ typedef struct { mp_limb_t* coeffs; unsigned long alloc; unsigned long length; unsigned long limbs; } fmpz_poly_struct; // fmpz_poly_t allows reference-like semantics for fmpz_poly_struct: typedef fmpz_poly_struct fmpz_poly_t[1]; typedef fmpz_poly_struct * fmpz_poly_p; #define SWAP(x_dummy, y_dummy) \ do { \ fmpz_poly_p swap_temp = x_dummy; \ x_dummy = y_dummy; \ y_dummy = swap_temp; \ } while(0); #define SWAP_PTRS(x_dummy_p, y_dummy_p) \ do { \ fmpz_t swap_temp_p = x_dummy_p; \ x_dummy_p = y_dummy_p; \ y_dummy_p = swap_temp_p; \ } while(0); /**************************************************************************** Conversion Routines ****************************************************************************/ /* Converts fmpz_poly_t "poly_fmpz" to a ZmodF_poly. Each coefficient of poly_fmpz is assumed to fit into a coefficient of poly_f. The maximum number of *bits* that any coefficient takes is returned and made negative if any of the coefficients was negative. Only _length_ coefficients are converted. Assumes 0 < length <= poly_fmpz->length */ long fmpz_poly_to_ZmodF_poly(ZmodF_poly_t poly_f, const fmpz_poly_t poly_fmpz, const unsigned long length); /* Normalise and converts ZmodF_poly "poly_f" to a fmpz_poly_t. Each coefficient of poly_f is assumed to fit into a coefficient of poly_fmpz. The normalisation ensures that this function is the inverse of ZmodF_poly_convert_in_mpn. Assumes 0 < poly_f->length */ void ZmodF_poly_to_fmpz_poly(fmpz_poly_t poly_fmpz, const ZmodF_poly_t poly_f, const long sign); /* Packs bundle coefficients, each padded out to the given number of limbs, into the first coefficient of poly_f. Assumes 0 < bundle <= poly_fmpz->length */ void fmpz_poly_limb_pack(ZmodF_poly_t poly_f, const fmpz_poly_t poly_fmpz, const unsigned long bundle, const long limbs); /* Unpacks bundle coefficients from the first coefficient of poly_f, each assumed to be stored in a field of the given number of limbs. Assumes 0 < bundle <= poly_f->length */ void fmpz_poly_limb_unpack(fmpz_poly_t poly_fmpz, const ZmodF_poly_t poly_f, const unsigned long bundle, const unsigned long limbs); /* Unpacks bundle coefficients from the first coefficient of poly_f, each assumed to be stored in a field of the given number of limbs. Assumes the coefficients are unsigned. Assumes 0 < bundle <= poly_f->length */ void fmpz_poly_limb_unpack_unsigned(fmpz_poly_t poly_fmpz, const ZmodF_poly_t poly_f, const unsigned long bundle, const unsigned long limbs); /* Packs poly_fmpz down to the bit into poly_f. Each coefficient of poly_f will have "bundle" coefficients packed into it. Each of the original coefficients is packed into a bitfield "bits" bits wide including one bit for a sign bit. "bits" is assumed to be less than FLINT_BITS. If bits is negative, the input poly is assumed to have signed coefficients. Assumes 0 < bundle and 0 < poly_fmpz->length */ void fmpz_poly_bit_pack(ZmodF_poly_t poly_f, const fmpz_poly_t poly_fmpz, const unsigned long bundle, const long bits, const unsigned long length, const long negate); /* Unpacks poly_f into poly_fmpz. This is the inverse of ZmodF_poly_bitpack_mpn, so long as the final coefficient in the polynomial is positive. Each coeff of poly_f is assumed to contain "bundle" coefficients, each stored in a bitfield "bits" bits wide with the most significant bit being reserved for the sign. The total number of coefficients to be unpacked is given by the length of poly_fmpz. One must ensure each of the coefficients of poly_fmpz are set to zero before calling this function for the first time since it adds to existing coefficients of poly_fmpz, rather than overwriting them. "bits" is assumed to be less than FLINT_BITS. Assumes 0 < bundle and 0 < poly_f->length */ void fmpz_poly_bit_unpack(fmpz_poly_t poly_fmpz, const ZmodF_poly_t poly_f, const unsigned long bundle, const unsigned long bits); void fmpz_poly_bit_unpack_unsigned(fmpz_poly_t poly_fmpz, const ZmodF_poly_t poly_f, const unsigned long bundle, const unsigned long bits); /* Packs poly_fmpz down to the byte into poly_f. Each coefficient of poly_f will have "bundle" coefficients packed into it, each packed into a field "bytes" bytes wide. "coeff_bytes" is assumed to be at least FLINT_BITS/8, i.e. the coefficients are assumed to be at least a limb wide. Assumes 0 < bundle and 0 < poly_fmpz->length */ void fmpz_poly_byte_pack(ZmodF_poly_t poly_f, const fmpz_poly_t poly_fmpz, const unsigned long bundle, const unsigned long coeff_bytes, const unsigned long length, const long negate); /* Unpacks array into poly_fmpz. Each coefficient stored in array will have "bundle" coefficients, each packed into a field "bytes" bytes wide. The total number of coefficients to be unpacked is given by the length of poly_fmpz. "coeff_bytes" is assumed to be at least FLINT_BITS/8, i.e. the coefficients are assumed to be at least a limb wide. Assumes 0 < bundle and poly->mpn->length > 0 */ void fmpz_poly_byte_unpack_unsigned(fmpz_poly_t poly_m, const mp_limb_t* array, const unsigned long bundle, const unsigned long coeff_bytes); void fmpz_poly_byte_unpack(fmpz_poly_t poly_m, const mp_limb_t* array, const unsigned long bundle, const unsigned long coeff_bytes); /* Splits each coefficient of poly_fmpz into pieces "limbs" limbs long and stores each piece into bundle coefficients of poly_f. */ void fmpz_poly_split(ZmodF_poly_t poly_f, fmpz_poly_t poly_fmpz, unsigned long bundle, unsigned long limbs); /* Combines each "bundle" coefficients of poly_f, each taken to be "limbs" limbs long, into a coefficient of poly_fmpz. This function is used for testing purposed only, and is the exact inverse of ZmodF_poly_split_mpn. The number of coefficients extracted is given by the length of poly_fmpz. */ void fmpz_poly_unsplit(ZmodF_poly_t poly_f, fmpz_poly_t poly_fmpz, unsigned long bundle, unsigned long limbs); /* Reduce coefficients of the given fmpz_poly fpol, modulo the modulus of the given zmod_poly zpol, and store the result in zpol. */ void fmpz_poly_to_zmod_poly(zmod_poly_t zpol, fmpz_poly_t fpol); /* Store the unsigned long coefficients of the zmod_poly zpol in the given fmpz_poly fpol. The unsigned version normalised to [0, p) the other version to [-p/2, p/2] */ void zmod_poly_to_fmpz_poly(fmpz_poly_t fpol, zmod_poly_t zpol); void zmod_poly_to_fmpz_poly_unsigned(fmpz_poly_t fpol, zmod_poly_t zpol); /* Given an fmpz_poly_t fpol representing the reduction modulo oldmod of a polynomial and a zmod_poly zpol with modulus p, use Chinese remaindering to reconstruct the polynomial modulo newmod, with newmod = p*oldmod, where each new coefficient fpol[i] is set to the unique non-negative integer in [0, newmod) which is fpol[i] mod oldmod and zpol[i] mod p. Assumes p and oldmod are coprime. Returns 1 if the CRT has stabilised, i.e. if the new output equals the old input else it returns 0. The unsigned version normalises the output to [0, newmod) the main version normalises to [-nemod/2, newmod/2]. Allows aliasing of fpol and res. */ int fmpz_poly_CRT(fmpz_poly_t res, fmpz_poly_t fpol, zmod_poly_t zpol, fmpz_t newmod, fmpz_t oldmod); int fmpz_poly_CRT_unsigned(fmpz_poly_t res, fmpz_poly_t fpol, zmod_poly_t zpol, fmpz_t newmod, fmpz_t oldmod); /*============================================================================ Functions in _fmpz_poly_* layer ===============================================================================*/ void _fmpz_poly_stack_init(fmpz_poly_t poly, const unsigned long alloc, const unsigned long limbs); void _fmpz_poly_stack_clear(fmpz_poly_t poly); void _fmpz_poly_check(const fmpz_poly_t poly); void _fmpz_poly_normalise(fmpz_poly_t poly); void _fmpz_poly_check_normalisation(const fmpz_poly_t poly); static inline fmpz_t _fmpz_poly_get_coeff_ptr(const fmpz_poly_t poly, const unsigned long n) { return poly->coeffs+n*(poly->limbs+1); } static inline fmpz_t _fmpz_poly_lead(const fmpz_poly_t poly) { if (poly->length == 0) return NULL; return poly->coeffs+(poly->length-1)*(poly->limbs+1); } static inline fmpz_t fmpz_poly_lead(const fmpz_poly_t poly) { if (poly->length == 0) return NULL; return poly->coeffs+(poly->length-1)*(poly->limbs+1); } /* Set "output" to the given coefficient and return the sign Assumes length of output is poly->limbs limbs long. */ static inline long _fmpz_poly_get_coeff(mp_limb_t * output, const fmpz_poly_t poly, const unsigned long n) { F_mpn_clear(output, poly->limbs); if (poly->coeffs[n*(poly->limbs+1)] != 0L) { F_mpn_copy(output, poly->coeffs+n*(poly->limbs+1)+1, ABS(poly->coeffs[n*(poly->limbs+1)])); } return poly->coeffs[n*(poly->limbs+1)]; } static inline unsigned long _fmpz_poly_get_coeff_ui(fmpz_poly_t poly, const unsigned long n) { if (n >= poly->length) return 0L; if (poly->coeffs[n*(poly->limbs+1)] == 0L) return 0L; else return poly->coeffs[n*(poly->limbs+1)+1]; } static inline long _fmpz_poly_get_coeff_si(fmpz_poly_t poly, const unsigned long n) { if (n >= poly->length) return 0L; if (poly->coeffs[n*(poly->limbs+1)] == 0L) return 0L; if ((long) poly->coeffs[n*(poly->limbs+1)] > 0L) return poly->coeffs[n*(poly->limbs+1)+1]; else return -poly->coeffs[n*(poly->limbs+1)+1]; } void _fmpz_poly_get_coeff_mpz(mpz_t x, const fmpz_poly_t poly, const unsigned long n); void _fmpz_poly_get_coeff_mpz_read_only(mpz_t x, const fmpz_poly_t poly, const unsigned long n); static inline void _fmpz_poly_set_coeff_mpz(fmpz_poly_t poly, const unsigned long n, const mpz_t x) { if (poly->limbs == 0) return; if (n+1 > poly->length) { for (long i = poly->length; i + 1 < n; i++) { poly->coeffs[i*(poly->limbs+1)] = 0; } poly->length = n+1; } mpz_to_fmpz(poly->coeffs + n*(poly->limbs+1), x); _fmpz_poly_normalise(poly); } void _fmpz_poly_set_coeff_fmpz(fmpz_poly_t poly, const unsigned long n, fmpz_t x); void _fmpz_poly_get_coeff_fmpz(fmpz_t x, const fmpz_poly_t poly, const unsigned long n); /* Set a coefficient to the given value having "size" limbs. Assumes that the poly->limbs is at least "size" and that n < poly->length */ static inline void _fmpz_poly_set_coeff(fmpz_poly_t poly, const unsigned long n, const mp_limb_t * x, const long sign, const unsigned long size) { FLINT_ASSERT(poly->limbs >= size); F_mpn_copy(poly->coeffs+n*(poly->limbs+1)+1, x, size); poly->coeffs[n*(poly->limbs+1)] = sign; if (poly->limbs > size) F_mpn_clear(poly->coeffs+n*(poly->limbs+1)+size+1, poly->limbs-size); _fmpz_poly_normalise(poly); } void _fmpz_poly_set_coeff_ui(fmpz_poly_t poly, const unsigned long n, const unsigned long x); void _fmpz_poly_set_coeff_si(fmpz_poly_t poly, const unsigned long n, const long x); static inline long _fmpz_poly_degree(const fmpz_poly_t poly) { return poly->length - 1; } static inline unsigned long _fmpz_poly_length(const fmpz_poly_t poly) { return poly->length; } static inline unsigned long _fmpz_poly_limbs(const fmpz_poly_t poly) { return poly->limbs; } void _fmpz_poly_set(fmpz_poly_t output, const fmpz_poly_t input); /* Zero the polynomial by setting the length to zero. Does not set the actual limbs to zero. */ static inline void _fmpz_poly_zero(fmpz_poly_t output) { output->length = 0; } /* Zero first n coefficients of poly, regardless of what length is */ void _fmpz_poly_zero_coeffs(fmpz_poly_t poly, const unsigned long n); static inline void _fmpz_poly_attach(fmpz_poly_t output, const fmpz_poly_t input) { output->length = input->length; output->limbs = input->limbs; output->coeffs = input->coeffs; } static inline void fmpz_poly_attach(fmpz_poly_t output, const fmpz_poly_t input) { _fmpz_poly_attach(output, input); } /* Attach input shifted right by n to output */ static inline void _fmpz_poly_attach_shift(fmpz_poly_t output, const fmpz_poly_t input, unsigned long n) { if (input->length >= n) output->length = input->length - n; else output->length = 0; output->limbs = input->limbs; output->coeffs = input->coeffs + n*(input->limbs+1); } static inline void fmpz_poly_attach_shift(fmpz_poly_t output, const fmpz_poly_t input, unsigned long n) { _fmpz_poly_attach_shift(output, input, n); } /* Attach input to first n coefficients of input */ static inline void _fmpz_poly_attach_truncate(fmpz_poly_t output, const fmpz_poly_t input, unsigned long n) { if (input->length < n) output->length = input->length; else output->length = n; output->limbs = input->limbs; output->coeffs = input->coeffs; _fmpz_poly_normalise(output); } static inline void fmpz_poly_attach_truncate(fmpz_poly_t output, const fmpz_poly_t input, unsigned long n) { _fmpz_poly_attach_truncate(output, input, n); } long _fmpz_poly_max_bits1(const fmpz_poly_t poly); long _fmpz_poly_max_bits(const fmpz_poly_t poly); unsigned long _fmpz_poly_max_limbs(const fmpz_poly_t poly); int _fmpz_poly_equal(const fmpz_poly_t input1, const fmpz_poly_t input2); void _fmpz_poly_neg(fmpz_poly_t output, const fmpz_poly_t input); void _fmpz_poly_truncate(fmpz_poly_t poly, const unsigned long trunc); void _fmpz_poly_reverse(fmpz_poly_t output, const fmpz_poly_t input, const unsigned long length); void _fmpz_poly_left_shift(fmpz_poly_t output, const fmpz_poly_t input, const unsigned long n); void _fmpz_poly_right_shift(fmpz_poly_t output, const fmpz_poly_t input, const unsigned long n); void _fmpz_poly_add(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2); void _fmpz_poly_sub(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2); void _fmpz_poly_scalar_mul_fmpz(fmpz_poly_t output, const fmpz_poly_t poly, const fmpz_t x); void _fmpz_poly_scalar_mul_ui(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long x); void _fmpz_poly_scalar_mul_si(fmpz_poly_t output, const fmpz_poly_t poly, const long x); void _fmpz_poly_scalar_div_fmpz(fmpz_poly_t output, const fmpz_poly_t poly, const fmpz_t x); void _fmpz_poly_scalar_tdiv_ui(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long x); void _fmpz_poly_scalar_div_ui(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long x); void _fmpz_poly_scalar_div_si(fmpz_poly_t output, const fmpz_poly_t poly, const long x); void _fmpz_poly_scalar_tdiv_si(fmpz_poly_t output, const fmpz_poly_t poly, const long x); void _fmpz_poly_scalar_div_exact_ui(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long x); void _fmpz_poly_scalar_div_exact_si(fmpz_poly_t output, const fmpz_poly_t poly, const long x); void _fmpz_poly_mul_classical(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2); void _fmpz_poly_mul_classical_trunc(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2, const unsigned long trunc); void _fmpz_poly_mul_classical_trunc_left(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2, const unsigned long trunc); void __fmpz_poly_karamul_recursive(fmpz_poly_t res, const fmpz_poly_t a, const fmpz_poly_t b, fmpz_poly_t scratch, fmpz_poly_t scratchb, const unsigned long crossover); void _fmpz_poly_mul_karatsuba(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2); void _fmpz_poly_mul_karatsuba_trunc(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2, const unsigned long trunc); void _fmpz_poly_mul_karatsuba_trunc_left(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2, const unsigned long trunc); /* Multiply two polynomials together using the Kronecker segmentation method. Currently assumes that the number of output bits per coefficient is <= 64 and is supplied by the parameter "bits" */ void _fmpz_poly_mul_KS(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2); void _fmpz_poly_mul_KS_trunc(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2, const unsigned long trunc); void _fmpz_poly_mul_SS(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2); void _fmpz_poly_mul_SS_trunc(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2, const unsigned long trunc); void _fmpz_poly_mul_trunc_n(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2, const unsigned long trunc); void _fmpz_poly_mul_trunc_left_n(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2, const unsigned long trunc); void _fmpz_poly_mul(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2); void _fmpz_poly_sqr(fmpz_poly_t output, const fmpz_poly_t input); void _fmpz_poly_sqr_naive(fmpz_poly_t output, const fmpz_poly_t input); void _fmpz_poly_sqr_karatsuba(fmpz_poly_t output, const fmpz_poly_t input); void _fmpz_poly_content(fmpz_t content, const fmpz_poly_t a); /*============================================================================ Functions in fmpz_poly_* layer ===============================================================================*/ void fmpz_poly_init(fmpz_poly_t poly); void fmpz_poly_init2(fmpz_poly_t poly, const unsigned long alloc, const unsigned long limbs); void fmpz_poly_realloc(fmpz_poly_t poly, const unsigned long alloc); void fmpz_poly_fit_length(fmpz_poly_t poly, const unsigned long alloc); void fmpz_poly_resize_limbs(fmpz_poly_t poly, const unsigned long limbs); static inline void fmpz_poly_fit_limbs(fmpz_poly_t poly, const unsigned long limbs) { if ((long) limbs > (long) poly->limbs) fmpz_poly_resize_limbs(poly, limbs); } void fmpz_poly_clear(fmpz_poly_t poly); void fmpz_poly_check(const fmpz_poly_t poly); void fmpz_poly_check_normalisation(const fmpz_poly_t poly); // ------------------------------------------------------ // String conversions and I/O int fmpz_poly_from_string(fmpz_poly_t poly, const char* s); char* fmpz_poly_to_string(const fmpz_poly_t poly); void fmpz_poly_print(const fmpz_poly_t poly); void fmpz_poly_fprint(const fmpz_poly_t poly, FILE* f); int fmpz_poly_fread(fmpz_poly_t poly, FILE* f); char* fmpz_poly_to_string_pretty(const fmpz_poly_t poly, const char * x); void fmpz_poly_fprint_pretty(const fmpz_poly_t poly, FILE* f, const char * x); void fmpz_poly_print_pretty(const fmpz_poly_t poly, const char * x); static inline int fmpz_poly_read(fmpz_poly_t poly) { return fmpz_poly_fread(poly, stdin); } static inline unsigned long fmpz_poly_limbs(const fmpz_poly_t poly) { return poly->limbs; } static inline long fmpz_poly_degree(const fmpz_poly_t poly) { return poly->length - 1; } static inline unsigned long fmpz_poly_length(const fmpz_poly_t poly) { return poly->length; } static inline long fmpz_poly_max_bits1(const fmpz_poly_t poly) { return _fmpz_poly_max_bits1(poly); } static inline long fmpz_poly_max_bits(const fmpz_poly_t poly) { return _fmpz_poly_max_bits(poly); } static inline long fmpz_poly_max_limbs(const fmpz_poly_t poly) { return _fmpz_poly_max_limbs(poly); } static inline void fmpz_poly_truncate(fmpz_poly_t poly, const unsigned long length) { FLINT_ASSERT(poly->length >= length); poly->length = length; _fmpz_poly_normalise(poly); } static inline void fmpz_poly_swap(fmpz_poly_t x, fmpz_poly_t y) { if (x == y) return; fmpz_t temp_p; mp_limb_t temp_l; temp_l = x->alloc; x->alloc = y->alloc; y->alloc = temp_l; temp_p = x->coeffs; x->coeffs = y->coeffs; y->coeffs = temp_p; temp_l = x->length; x->length = y->length; y->length = temp_l; temp_l = x->limbs; x->limbs = y->limbs; y->limbs = temp_l; } static inline fmpz_t fmpz_poly_get_coeff_ptr(const fmpz_poly_t poly, const unsigned long n) { if (n >= poly->length) { return NULL; } return poly->coeffs+n*(poly->limbs+1); } static inline long fmpz_poly_get_coeff(mp_limb_t * output, const fmpz_poly_t poly, const unsigned long n) { if (n >= poly->length) { F_mpn_clear(output, poly->limbs); return 0; } return _fmpz_poly_get_coeff(output, poly, n); } static inline unsigned long fmpz_poly_get_coeff_ui(const fmpz_poly_t poly, const unsigned long n) { if (n >= poly->length) { return 0; } if (poly->coeffs[n*(poly->limbs+1)] == 0) return 0; else return poly->coeffs[n*(poly->limbs+1)+1]; } static inline long fmpz_poly_get_coeff_si(const fmpz_poly_t poly, const unsigned long n) { if (n >= poly->length) { return 0; } if (poly->coeffs[n*(poly->limbs+1)] == 0) return 0; if (poly->coeffs[n*(poly->limbs+1)] == 1L) return poly->coeffs[n*(poly->limbs+1)+1]; else return -poly->coeffs[n*(poly->limbs+1)+1]; } void fmpz_poly_get_coeff_mpz(mpz_t x, const fmpz_poly_t poly, const unsigned long n); void fmpz_poly_get_coeff_mpz_read_only(mpz_t x, const fmpz_poly_t poly, const unsigned long n); static inline void fmpz_poly_set_coeff_fmpz(fmpz_poly_t poly, const unsigned long n, fmpz_t x) { fmpz_poly_fit_length(poly, n+1); fmpz_poly_fit_limbs(poly, fmpz_size(x)); if (n+1 > poly->length) { for (long i = poly->length; i + 1 < n; i++) { poly->coeffs[i*(poly->limbs+1)] = 0L; } poly->length = n+1; } _fmpz_poly_set_coeff_fmpz(poly, n, x); _fmpz_poly_normalise(poly); } static inline void fmpz_poly_get_coeff_fmpz(fmpz_t x, const fmpz_poly_t poly, const unsigned long n) { if (n >= poly->length) { x[0] = 0; return; } _fmpz_poly_get_coeff_fmpz(x, poly, n); } static inline void fmpz_poly_set_coeff(fmpz_poly_t poly, const unsigned long n, const mp_limb_t * x, const long sign, const unsigned long size) { fmpz_poly_fit_length(poly, n+1); fmpz_poly_fit_limbs(poly, size); if (n+1 > poly->length) { for (long i = poly->length; i + 1 < n; i++) { poly->coeffs[i*(poly->limbs+1)] = 0L; } poly->length = n+1; } _fmpz_poly_set_coeff(poly, n, x, sign, size); _fmpz_poly_normalise(poly); } static inline void fmpz_poly_set_coeff_si(fmpz_poly_t poly, const unsigned long n, const long x) { fmpz_poly_fit_length(poly, n+1); fmpz_poly_fit_limbs(poly, 1); if (n+1 > poly->length) { for (long i = poly->length; i + 1 < n; i++) { poly->coeffs[i*(poly->limbs+1)] = 0L; } poly->length = n+1; } _fmpz_poly_set_coeff_si(poly, n, x); _fmpz_poly_normalise(poly); } static inline void fmpz_poly_set_coeff_ui(fmpz_poly_t poly, const unsigned long n, const unsigned long x) { fmpz_poly_fit_length(poly, n+1); fmpz_poly_fit_limbs(poly, 1); if (n+1 > poly->length) { for (long i = poly->length; i + 1 < n; i++) { poly->coeffs[i*(poly->limbs+1)] = 0L; } poly->length = n+1; } _fmpz_poly_set_coeff_ui(poly, n, x); _fmpz_poly_normalise(poly); } static inline void fmpz_poly_set_coeff_mpz(fmpz_poly_t poly, const unsigned long n, const mpz_t x) { fmpz_poly_fit_length(poly, n+1); fmpz_poly_fit_limbs(poly, mpz_size(x)); _fmpz_poly_set_coeff_mpz(poly, n, x); } static inline void fmpz_poly_set(fmpz_poly_t output, const fmpz_poly_t input) { fmpz_poly_fit_length(output, input->length); fmpz_poly_fit_limbs(output, input->limbs); _fmpz_poly_set(output, input); } static inline int fmpz_poly_equal(const fmpz_poly_t input1, const fmpz_poly_t input2) { return _fmpz_poly_equal(input1, input2); } static inline void fmpz_poly_zero(fmpz_poly_t output) { output->length = 0; } static inline void fmpz_poly_zero_coeffs(fmpz_poly_t poly, const unsigned long n) { fmpz_poly_fit_length(poly, n); if (n >= poly->length) { fmpz_poly_zero(poly); return; } _fmpz_poly_zero_coeffs(poly, n); } static inline void fmpz_poly_neg(fmpz_poly_t output, const fmpz_poly_t input) { fmpz_poly_fit_length(output, input->length); fmpz_poly_fit_limbs(output, input->limbs); _fmpz_poly_neg(output, input); } static inline void fmpz_poly_reverse(fmpz_poly_t output, const fmpz_poly_t input, const unsigned long length) { fmpz_poly_fit_length(output, length); fmpz_poly_fit_limbs(output, input->limbs); _fmpz_poly_reverse(output, input, length); } static inline void fmpz_poly_left_shift(fmpz_poly_t output, const fmpz_poly_t input, const unsigned long n) { if (input->length + n == 0) { fmpz_poly_zero(output); return; } fmpz_poly_fit_length(output, input->length + n); fmpz_poly_fit_limbs(output, input->limbs); _fmpz_poly_left_shift(output, input, n); } static inline void fmpz_poly_right_shift(fmpz_poly_t output, const fmpz_poly_t input, const unsigned long n) { if ((long)(input->length - n) <= 0L) { fmpz_poly_zero(output); return; } fmpz_poly_fit_length(output, input->length - n); fmpz_poly_fit_limbs(output, input->limbs); _fmpz_poly_right_shift(output, input, n); } void fmpz_poly_2norm(fmpz_t norm, fmpz_poly_t pol); void fmpz_poly_scalar_mul_fmpz(fmpz_poly_t output, const fmpz_poly_t input, const fmpz_t x); void fmpz_poly_scalar_mul_ui(fmpz_poly_t output, const fmpz_poly_t input, unsigned long x); void fmpz_poly_scalar_mul_si(fmpz_poly_t output, const fmpz_poly_t input, long x); void fmpz_poly_scalar_mul_mpz(fmpz_poly_t output, const fmpz_poly_t input, const mpz_t x); static inline void fmpz_poly_scalar_div_ui(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long x) { if (poly->length == 0) { fmpz_poly_zero(output); return; } unsigned long limbs = fmpz_poly_max_limbs(poly); fmpz_poly_fit_length(output, poly->length); fmpz_poly_fit_limbs(output, limbs); _fmpz_poly_scalar_div_ui(output, poly, x); } static inline void fmpz_poly_scalar_div_si(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long x) { if (poly->length == 0) { fmpz_poly_zero(output); return; } unsigned long limbs = fmpz_poly_max_limbs(poly); fmpz_poly_fit_length(output, poly->length); fmpz_poly_fit_limbs(output, limbs); _fmpz_poly_scalar_div_si(output, poly, x); } static inline void fmpz_poly_scalar_tdiv_ui(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long x) { if (poly->length == 0) { fmpz_poly_zero(output); return; } unsigned long limbs = fmpz_poly_max_limbs(poly); fmpz_poly_fit_length(output, poly->length); fmpz_poly_fit_limbs(output, limbs); _fmpz_poly_scalar_tdiv_ui(output, poly, x); } static inline void fmpz_poly_scalar_tdiv_si(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long x) { if (poly->length == 0) { fmpz_poly_zero(output); return; } unsigned long limbs = fmpz_poly_max_limbs(poly); fmpz_poly_fit_length(output, poly->length); fmpz_poly_fit_limbs(output, limbs); _fmpz_poly_scalar_tdiv_si(output, poly, x); } static inline void fmpz_poly_scalar_div_exact_ui(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long x) { if (poly->length == 0) { fmpz_poly_zero(output); return; } unsigned long limbs = fmpz_poly_max_limbs(poly); fmpz_poly_fit_length(output, poly->length); fmpz_poly_fit_limbs(output, limbs); _fmpz_poly_scalar_div_exact_ui(output, poly, x); } static inline void fmpz_poly_scalar_div_exact_si(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long x) { if (poly->length == 0) { fmpz_poly_zero(output); return; } unsigned long limbs = fmpz_poly_max_limbs(poly); fmpz_poly_fit_length(output, poly->length); fmpz_poly_fit_limbs(output, limbs); _fmpz_poly_scalar_div_exact_si(output, poly, x); } void fmpz_poly_scalar_div_fmpz(fmpz_poly_t output, const fmpz_poly_t input, const fmpz_t x); void fmpz_poly_scalar_div_mpz(fmpz_poly_t output, const fmpz_poly_t input, const mpz_t x); static inline void fmpz_poly_add(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2) { if (input1 == input2) { fmpz_poly_scalar_mul_ui(output, input1, 2UL); return; } unsigned long bits1 = FLINT_ABS(_fmpz_poly_max_bits(input1)); unsigned long bits2 = FLINT_ABS(_fmpz_poly_max_bits(input2)); fmpz_poly_fit_length(output, FLINT_MAX(input1->length, input2->length)); fmpz_poly_fit_limbs(output, FLINT_MAX(bits1, bits2)/FLINT_BITS + 1); _fmpz_poly_add(output, input1, input2); } static inline void fmpz_poly_sub(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2) { if (input1 == input2) { fmpz_poly_zero(output); return; } unsigned long bits1 = FLINT_ABS(_fmpz_poly_max_bits(input1)); unsigned long bits2 = FLINT_ABS(_fmpz_poly_max_bits(input2)); fmpz_poly_fit_length(output, FLINT_MAX(input1->length, input2->length)); fmpz_poly_fit_limbs(output, FLINT_MAX(bits1, bits2)/FLINT_BITS + 1); _fmpz_poly_sub(output, input1, input2); } void fmpz_poly_mul(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2); void fmpz_poly_mul_trunc_n(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2, const unsigned long trunc); void fmpz_poly_mul_trunc_left_n(fmpz_poly_t output, const fmpz_poly_t input1, const fmpz_poly_t input2, const unsigned long trunc); void fmpz_poly_div_classical(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B); void fmpz_poly_divrem_classical(fmpz_poly_t Q, fmpz_poly_t R, const fmpz_poly_t A, const fmpz_poly_t B); void fmpz_poly_div_divconquer_recursive(fmpz_poly_t Q, fmpz_poly_t DQ, const fmpz_poly_t A, const fmpz_poly_t B); void fmpz_poly_divrem_divconquer(fmpz_poly_t Q, fmpz_poly_t R, const fmpz_poly_t A, const fmpz_poly_t B); void fmpz_poly_div_divconquer(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B); void fmpz_poly_div_mulders(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B); void fmpz_poly_newton_invert_basecase(fmpz_poly_t Q_inv, const fmpz_poly_t Q, const unsigned long n); void fmpz_poly_newton_invert(fmpz_poly_t Q_inv, const fmpz_poly_t Q, const unsigned long n); void fmpz_poly_div_series(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B, const unsigned long n); void fmpz_poly_div_newton(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B); static inline void fmpz_poly_div(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B) { if (A == B) { fmpz_poly_fit_length(Q, 1); fmpz_poly_fit_limbs(Q, 1); fmpz_poly_zero(Q); fmpz_poly_set_coeff_ui(Q, 0, 1UL); return; } fmpz_poly_t Ain, Bin; if (A == Q) { _fmpz_poly_stack_init(Ain, A->length, A->limbs); _fmpz_poly_set(Ain, A); } else _fmpz_poly_attach(Ain, A); if (B == Q) { _fmpz_poly_stack_init(Bin, B->length, B->limbs); _fmpz_poly_set(Bin, B); } else _fmpz_poly_attach(Bin, B); fmpz_poly_div_mulders(Q, Ain, Bin); if (A == Q) _fmpz_poly_stack_clear(Ain); if (B == Q) _fmpz_poly_stack_clear(Bin); } static inline void fmpz_poly_divrem(fmpz_poly_t Q, fmpz_poly_t R, const fmpz_poly_t A, const fmpz_poly_t B) { if (A == B) { fmpz_poly_fit_length(Q, 1); fmpz_poly_fit_limbs(Q, 1); fmpz_poly_zero(Q); fmpz_poly_zero(R); fmpz_poly_set_coeff_ui(Q, 0, 1UL); return; } fmpz_poly_t Ain, Bin; if ((A == R) || (A == Q)) { _fmpz_poly_stack_init(Ain, A->length, A->limbs); _fmpz_poly_set(Ain, A); } else _fmpz_poly_attach(Ain, A); if ((B == R) || (B == Q)) { _fmpz_poly_stack_init(Bin, B->length, B->limbs); _fmpz_poly_set(Bin, B); } else _fmpz_poly_attach(Bin, B); fmpz_poly_divrem_divconquer(Q, R, Ain, Bin); if ((A == R) || (A == Q)) _fmpz_poly_stack_clear(Ain); if ((B == R) || (B == Q)) _fmpz_poly_stack_clear(Bin); } /* Returns 1 and the quotient if B divides A else returns 0 */ static inline int fmpz_poly_divides(fmpz_poly_t Q, fmpz_poly_t A, fmpz_poly_t B) { fmpz_poly_t R; int divides = 0; fmpz_poly_init(R); fmpz_poly_divrem(Q, R, A, B); if (R->length == 0) divides = 1; fmpz_poly_clear(R); return divides; } void fmpz_poly_power(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long exp); void fmpz_poly_power_trunc_n(fmpz_poly_t output, const fmpz_poly_t poly, const unsigned long exp, const unsigned long n); void fmpz_poly_pseudo_divrem_cohen(fmpz_poly_t Q, fmpz_poly_t R, const fmpz_poly_t A, const fmpz_poly_t B); void fmpz_poly_pseudo_divrem_shoup(fmpz_poly_t Q, fmpz_poly_t R, const fmpz_poly_t A, const fmpz_poly_t B); void fmpz_poly_pseudo_divrem_basecase(fmpz_poly_t Q, fmpz_poly_t R, unsigned long * d, const fmpz_poly_t A, const fmpz_poly_t B); void fmpz_poly_pseudo_div_basecase(fmpz_poly_t Q, unsigned long * d, const fmpz_poly_t A, const fmpz_poly_t B); void fmpz_poly_pseudo_divrem_recursive(fmpz_poly_t Q, fmpz_poly_t R, unsigned long * d, const fmpz_poly_t A, const fmpz_poly_t B); static inline void fmpz_poly_pseudo_divrem(fmpz_poly_t Q, fmpz_poly_t R, unsigned long * d, const fmpz_poly_t A, const fmpz_poly_t B) { if (A == B) { fmpz_poly_fit_length(Q, 1); fmpz_poly_fit_limbs(Q, 1); fmpz_poly_zero(Q); fmpz_poly_zero(R); d = 0; fmpz_poly_set_coeff_ui(Q, 0, 1UL); return; } fmpz_poly_t Ain, Bin; if ((A == R) || (A == Q)) { _fmpz_poly_stack_init(Ain, A->length, A->limbs); _fmpz_poly_set(Ain, A); } else _fmpz_poly_attach(Ain, A); if ((B == R) || (B == Q)) { _fmpz_poly_stack_init(Bin, B->length, B->limbs); _fmpz_poly_set(Bin, B); } else _fmpz_poly_attach(Bin, B); fmpz_poly_pseudo_divrem_recursive(Q, R, d, Ain, Bin); if ((A == R) || (A == Q)) _fmpz_poly_stack_clear(Ain); if ((B == R) || (B == Q)) _fmpz_poly_stack_clear(Bin); } void fmpz_poly_pseudo_div_recursive(fmpz_poly_t Q, unsigned long * d, const fmpz_poly_t A, const fmpz_poly_t B); static inline void fmpz_poly_pseudo_div(fmpz_poly_t Q, unsigned long * d, const fmpz_poly_t A, const fmpz_poly_t B) { if (A == B) { fmpz_poly_fit_length(Q, 1); fmpz_poly_fit_limbs(Q, 1); fmpz_poly_zero(Q); d = 0; fmpz_poly_set_coeff_ui(Q, 0, 1UL); return; } fmpz_poly_t Ain, Bin; if (A == Q) { _fmpz_poly_stack_init(Ain, A->length, A->limbs); _fmpz_poly_set(Ain, A); } else _fmpz_poly_attach(Ain, A); if (B == Q) { _fmpz_poly_stack_init(Bin, B->length, B->limbs); _fmpz_poly_set(Bin, B); } else _fmpz_poly_attach(Bin, B); fmpz_poly_pseudo_div_recursive(Q, d, Ain, Bin); if (A == Q) _fmpz_poly_stack_clear(Ain); if (B == Q) _fmpz_poly_stack_clear(Bin); } void fmpz_poly_content(fmpz_t c, fmpz_poly_t poly); static inline void _fmpz_poly_primitive_part(fmpz_poly_t prim, fmpz_poly_t poly) { if (poly->length == 0) { _fmpz_poly_zero(prim); return; } fmpz_t c = fmpz_init(poly->limbs); _fmpz_poly_content(c, poly); _fmpz_poly_scalar_div_fmpz(prim, poly, c); fmpz_clear(c); } static inline void fmpz_poly_primitive_part(fmpz_poly_t prim, fmpz_poly_t poly) { if (poly->length == 0) { fmpz_poly_zero(prim); return; } fmpz_t c = fmpz_init(poly->limbs); fmpz_poly_content(c, poly); fmpz_poly_scalar_div_fmpz(prim, poly, c); fmpz_clear(c); } void fmpz_poly_gcd_subresultant(fmpz_poly_t res, const fmpz_poly_t poly1, const fmpz_poly_t poly2); void fmpz_poly_gcd_modular(fmpz_poly_t res, const fmpz_poly_t poly1, const fmpz_poly_t poly2); void fmpz_poly_gcd(fmpz_poly_t res, const fmpz_poly_t poly1, const fmpz_poly_t poly2); void fmpz_poly_invmod_modular(fmpz_t d, fmpz_poly_t H, fmpz_poly_t poly1, fmpz_poly_t poly2); static inline void fmpz_poly_invmod(fmpz_t d, fmpz_poly_t H, fmpz_poly_t poly1, fmpz_poly_t poly2) { fmpz_poly_invmod_modular(d, H, poly1, poly2); } unsigned long fmpz_poly_resultant_bound(fmpz_poly_t a, fmpz_poly_t b); void fmpz_poly_resultant(fmpz_t res, fmpz_poly_t a, fmpz_poly_t b); void fmpz_poly_xgcd_modular(fmpz_t r, fmpz_poly_t s, fmpz_poly_t t, fmpz_poly_t a, fmpz_poly_t b); static inline void fmpz_poly_xgcd(fmpz_t r, fmpz_poly_t s, fmpz_poly_t t, fmpz_poly_t a, fmpz_poly_t b) { fmpz_poly_xgcd_modular(r, s, t, a, b); } // *************** end of file #ifdef __cplusplus } #endif #endif flint-1.011/mpz_extras.c0000644017361200017500000004222511025357254015070 0ustar tabbotttabbott/*============================================================================ Copyright (C) 2007, William Hart This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ #include #include #include #include #include "mpz_extras.h" #include "flint.h" #include "mpn_extras.h" #include "F_mpn_mul-tuning.h" #include "memory-manager.h" #include "longlong_wrapper.h" #include "longlong.h" #define DEBUG2 1 #define DEBUG 0 /* Memory manager to allocate a single mpz_t. It returns a pointer to the mpz_t. mpz_t's should be released in the order they were allocated. */ #define RESALLOC 100 //allocate this many mpz_t's at once to save on overheads mpz_t** reservoir; // Array of pointers to mpz_t's in the reservoir unsigned long rescount=0; //Next available mpz_t in reservoir unsigned long currentalloc=0; //total number of mpz_t's in reservoir mpz_t* F_mpz_alloc(void) { static int initialised = 0; static mpz_t** tempres; mpz_t* alloc_d; //allocate another block of mpz_t's if none are currently allocated, or the reservoir is depleted if (rescount==currentalloc) // need more limb_memp_t's { if (!initialised) { reservoir = (mpz_t**)malloc(RESALLOC*sizeof(mpz_t*)); //allocate space for the array of pointers reservoir[0] = (mpz_t*)malloc(RESALLOC*sizeof(mpz_t)); //allocate space for the mpz_t's for (unsigned long i=0; i= p) p2 %= p; #if UDIV_NEEDS_NORMALIZATION count_lead_zeros(norm, p); udiv_qrnnd(q, r, (p2<>(FLINT_BITS-norm)), p1<= 0) mpz_sub(res, res, m); mpz_clear(x); mpz_clear(s); } /* Compute a^exp mod m using Montgomery reduction Requires that m is odd and positive and that exp is positive */ void F_mpz_expmod_mont(mpz_t res, mpz_t a, mpz_t exp, mpz_t m) { unsigned long n; unsigned long bits = mpz_sizeinbase(exp, 2); mpz_t aRED; mpz_t powRED; mpz_t R; mpz_t temp; int flag = 0; mpz_init(aRED); mpz_init(powRED); mpz_init(R); mpz_init(temp); n = F_mpz_mont_red(aRED, a, m); mpz_set_ui(temp, 1); mpz_mul_2exp(temp, temp, n); mpz_invert(R, m, temp); mpz_sub(R, temp, R); if (mpz_cmp(R, temp) == 0) mpz_sub(R, R, temp); mpz_set(powRED, aRED); #ifdef DEBUG gmp_printf("powRED = %Zd\n", powRED); #endif for (unsigned long i = 0; i < bits - 1; i++) { if (mpz_tstbit(exp, i)) { if (flag) F_mpz_mont_mul(res, res, powRED, m, R, n); else { mpz_set(res, powRED); flag = 1; } } F_mpz_mont_mul(powRED, powRED, powRED, m, R, n); #ifdef DEBUG gmp_printf("powRED = %Zd\n", powRED); #endif } if (flag) F_mpz_mont_mul(res, res, powRED, m, R, n); else mpz_set(res, powRED); mpz_set_ui(temp, 1); F_mpz_mont_mul(res, res, temp, m, R, n); mpz_clear(temp); mpz_clear(R); mpz_clear(powRED); mpz_clear(aRED); } void F_mpz_divrem_BZ(mpz_t Q, mpz_t R, mpz_t A, mpz_t B) { unsigned long n = mpz_size(B); unsigned long m = mpz_size(A) - n; if ((long) m < 0) { mpz_set_ui(Q, 0); mpz_set(R, A); return; } if (m < 64) { mpz_fdiv_qr(Q, R, A, B); return; } unsigned long k = m/2; mpz_t * B0 = F_mpz_alloc(); mpz_t * B1 = F_mpz_alloc(); mpz_t * A0 = F_mpz_alloc(); mpz_t * A1 = F_mpz_alloc(); mpz_t * Q0 = F_mpz_alloc(); mpz_t * Q1 = F_mpz_alloc(); mpz_t * R0 = F_mpz_alloc(); mpz_t * R1 = F_mpz_alloc(); mpz_t * temp = F_mpz_alloc(); mpz_t * temp2 = F_mpz_alloc(); mpz_t * temp3 = F_mpz_alloc(); mpz_fdiv_q_2exp(*B1, B, FLINT_BITS*k); mpz_fdiv_q_2exp(*A1, A, FLINT_BITS*2*k); F_mpz_divrem_BZ(*Q1, *R1, *A1, *B1); mpz_fdiv_r_2exp(*B0, B, FLINT_BITS*k); mpz_fdiv_r_2exp(*A0, A, FLINT_BITS*2*k); mpz_mul_2exp(*temp, *R1, FLINT_BITS*2*k); mpz_add(*temp, *temp, *A0); mpz_mul_2exp(*temp2, *Q1, FLINT_BITS*k); mpz_mul(*temp2, *temp2, *B0); mpz_sub(*temp, *temp, *temp2); mpz_mul_2exp(*temp2, B, FLINT_BITS*k); while (mpz_cmp_ui(*temp, 0) < 0) { mpz_sub_ui(*Q1, *Q1, 1); mpz_add(*temp, *temp, *temp2); } mpz_fdiv_q_2exp(*temp2, *temp, FLINT_BITS*k); F_mpz_divrem_BZ(*Q0, *R0, *temp2, *B1); mpz_fdiv_r_2exp(*temp2, *temp, FLINT_BITS*k); mpz_mul_2exp(R, *R0, FLINT_BITS*k); mpz_add(R, R, *temp2); mpz_submul(R, *Q0, *B0); while (mpz_cmp_ui(R, 0) < 0) { mpz_sub_ui(*Q0, *Q0, 1); mpz_add(R, R, B); } mpz_mul_2exp(Q, *Q1, FLINT_BITS*k); mpz_add(Q, Q, *Q0); F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); } void F_mpz_rem_BZ(mpz_t R, mpz_t A, mpz_t B) { unsigned long n = mpz_size(B); unsigned long m = mpz_size(A) - n; if ((long) m < 0) { mpz_set(R, A); return; } if (m < 64) { mpz_fdiv_r(R, A, B); return; } unsigned long k = m/2; mpz_t * B0 = F_mpz_alloc(); mpz_t * B1 = F_mpz_alloc(); mpz_t * A0 = F_mpz_alloc(); mpz_t * A1 = F_mpz_alloc(); mpz_t * Q0 = F_mpz_alloc(); mpz_t * Q1 = F_mpz_alloc(); mpz_t * R0 = F_mpz_alloc(); mpz_t * R1 = F_mpz_alloc(); mpz_t * temp = F_mpz_alloc(); mpz_t * temp2 = F_mpz_alloc(); mpz_t * temp3 = F_mpz_alloc(); mpz_fdiv_q_2exp(*B1, B, FLINT_BITS*k); mpz_fdiv_q_2exp(*A1, A, FLINT_BITS*2*k); F_mpz_divrem_BZ(*Q1, *R1, *A1, *B1); mpz_fdiv_r_2exp(*B0, B, FLINT_BITS*k); mpz_fdiv_r_2exp(*A0, A, FLINT_BITS*2*k); mpz_mul_2exp(*temp, *R1, FLINT_BITS*2*k); mpz_add(*temp, *temp, *A0); mpz_mul_2exp(*temp2, *Q1, FLINT_BITS*k); mpz_mul(*temp2, *temp2, *B0); mpz_sub(*temp, *temp, *temp2); mpz_mul_2exp(*temp2, B, FLINT_BITS*k); while (mpz_cmp_ui(*temp, 0) < 0) { mpz_sub_ui(*Q1, *Q1, 1); mpz_add(*temp, *temp, *temp2); } mpz_fdiv_q_2exp(*temp2, *temp, FLINT_BITS*k); F_mpz_divrem_BZ(*Q0, *R0, *temp2, *B1); mpz_fdiv_r_2exp(*temp2, *temp, FLINT_BITS*k); mpz_mul_2exp(R, *R0, FLINT_BITS*k); mpz_add(R, R, *temp2); mpz_submul(R, *Q0, *B0); while (mpz_cmp_ui(R, 0) < 0) { mpz_add(R, R, B); } F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release(); } void F_mpz_mulmod_BZ(mpz_t res, mpz_t a, mpz_t b, mpz_t m) { mpz_t * temp = F_mpz_alloc(); mpz_mul(*temp, a, b); F_mpz_rem_BZ(res, *temp, m); F_mpz_release(); } void F_mpz_expmod_BZ(mpz_t res, mpz_t a, mpz_t exp, mpz_t m) { unsigned long n; unsigned long bits = mpz_sizeinbase(exp, 2); mpz_t aRED; mpz_t powRED; mpz_t temp; int flag = 0; mpz_init(aRED); mpz_init(powRED); mpz_init(temp); mpz_set(powRED, a); #if DEBUG gmp_printf("powRED = %Zd\n", powRED); #endif for (unsigned long i = 0; i < bits - 1; i++) { if (mpz_tstbit(exp, i)) { if (flag) F_mpz_mulmod_BZ(res, res, powRED, m); else { mpz_set(res, powRED); flag = 1; } } F_mpz_mulmod_BZ(powRED, powRED, powRED, m); #if DEBUG gmp_printf("powRED = %Zd\n", powRED); #endif } if (flag) F_mpz_mulmod_BZ(res, res, powRED, m); else mpz_set(res, powRED); mpz_clear(temp); mpz_clear(powRED); mpz_clear(aRED); } /* Large integer multiplication code */ void __F_mpz_mul(mpz_t res, mpz_t a, mpz_t b, unsigned long twk) { unsigned long sa = mpz_size(a); unsigned long sb = mpz_size(b); if (sa+sb > FLINT_FFT_LIMBS_CROSSOVER) { unsigned long s1 = (FLINT_BIT_COUNT(a->_mp_d[sa-1]) + FLINT_BIT_COUNT(b->_mp_d[sb-1]) <= FLINT_BITS); mp_limb_t* output = (mp_limb_t*) flint_stack_alloc(sa + sb); __F_mpn_mul(output, a->_mp_d, sa, b->_mp_d, sb, twk); mpz_import(res, sa+sb-s1, -1, sizeof(mp_limb_t), 0, 0, output); if (mpz_sgn(res) != mpz_sgn(a)*mpz_sgn(b)) mpz_neg(res,res); flint_stack_release(); } else mpz_mul(res, a, b); } void F_mpz_mul(mpz_t res, mpz_t a, mpz_t b) { unsigned long sa = mpz_size(a); unsigned long sb = mpz_size(b); if (sa+sb > FLINT_FFT_LIMBS_CROSSOVER) { unsigned long s1 = (FLINT_BIT_COUNT(a->_mp_d[sa-1]) + FLINT_BIT_COUNT(b->_mp_d[sb-1]) <= FLINT_BITS); mp_limb_t* output = (mp_limb_t*) flint_stack_alloc(sa + sb); F_mpn_mul(output, a->_mp_d, sa, b->_mp_d, sb); mpz_import(res, sa+sb-s1, -1, sizeof(mp_limb_t), 0, 0, output); if (mpz_sgn(res) != mpz_sgn(a)*mpz_sgn(b)) mpz_neg(res,res); flint_stack_release(); } else mpz_mul(res, a, b); } flint-1.011/flint_env0000644017361200017500000000265311025357254014440 0ustar tabbotttabbott#!/bin/sh # (C) 2007, Robert Bradshaw, William Hart, William Stein, Michael Abshoff if [ "`uname`" = "Linux" -a "`uname -m`" = "x86_64" ]; then FLINT_TUNE="-mtune=opteron -march=opteron -funroll-loops " elif [ "`uname`" = "Darwin" -a "`uname -m`" = "Power Macintosh" ]; then FLINT_TUNE=" -funroll-loops " elif [ "`uname -p`" = "powerpc" ]; then FLINT_TUNE="-m64 -mcpu=970 -mtune=970 -mpowerpc64 -falign-loops=16 -falign-functions=16 -falign-labels=16 -falign-jumps=16" elif [ "`uname -m`" = "ia64" ]; then # -funroll-loops crashes the build on itanium under GCC-4.2.1, as reported by # Kate Minola. FLINT_TUNE=" " else FLINT_TUNE="-funroll-loops " fi if [ "`uname`" = "Darwin" ]; then FLINT_LIB="libflint.dylib" else FLINT_LIB="libflint.so" fi export FLINT_TUNE export FLINT_LIB if [ -z "$FLINT_GMP_INCLUDE_DIR" ] then FLINT_GMP_INCLUDE_DIR="/usr/local/include" fi export FLINT_GMP_INCLUDE_DIR if [ -z "$FLINT_GMP_LIB_DIR" ] then FLINT_GMP_LIB_DIR="/usr/local/lib" fi export FLINT_GMP_LIB_DIR if [ -z "$FLINT_LINK_OPTIONS" ] then FLINT_LINK_OPTIONS="" fi export FLINT_LINK_OPTION if [ -z "${LD_LIBRARY_PATH}" ] then LD_LIBRARY_PATH=$FLINT_GMP_LIB_DIR fi export LD_LIBRARY_PATH if [ -z "$FLINT_NTL_INCLUDE_DIR" ] then FLINT_NTL_INCLUDE_DIR="/usr/local/include" fi export FLINT_NTL_INCLUDE_DIR if [ -z "$FLINT_NTL_LIB_DIR" ] then FLINT_NTL_LIB_DIR="/usr/local/lib" fi export FLINT_NTL_LIB_DIR flint-1.011/BPTJCubes.c0000644017361200017500000001740211025357254014414 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** BPTJ_cubes.c: Finds solutions to x^3 + y^3 + z^3 = k Based on the algorithm of Beck, Pine, Tarrant and Jensen Simultaneously searches for solutions for k = k1, k2, k3 Searches from T = START to STOP Copyright (C) 2007, William Hart *****************************************************************************/ #include #include #include #include #include "long_extras.h" #define START 1000000 #define STOP 200000000 #define k1 1965 #define k2 1986 #define k3 1991 #define NUMPRIMES 4000 #define TABLESIZE 1800000 // Must be a multiple of CACHEBLOCK #define CACHEBLOCK 60000 #define RABIN 6 mpz_t temp, temp2, D; gmp_randstate_t randstate; static inline int test_root(unsigned long root, unsigned long T, unsigned long k) { long d; if (root > T/2) { mpz_set_ui(D, root); mpz_pow_ui(D, D, 3); mpz_sub_ui(D, D, k); } else { mpz_set_ui(D, T-root); mpz_pow_ui(D, D, 3); mpz_add_ui(D, D, k); } mpz_mul_2exp(D, D, 2); mpz_divexact_ui(D, D, T); mpz_set_ui(temp2, T); mpz_submul_ui(D, temp2, T); mpz_mul_ui(D, D, 3); if (mpz_sgn(D) >= 0) { mpz_sqrtrem(D, temp2, D); if (!mpz_sgn(temp2)) { if (!mpz_fdiv_r_ui(temp, D, 3)) { d = mpz_fdiv_r_ui(temp, D, 6); if ((((3*T)%6) == d) || (((3*T)%6) == 6-d)) return 1; } } } return 0; } int main() { gmp_randinit_default(randstate); FILE * file1 = fopen("output.log","w"); unsigned long T = z_nextprime(START); double Tinv; unsigned long cuberoot1; unsigned long root1, root2, root3; unsigned long s = 0, t, p; unsigned char * current; unsigned char * table = (unsigned char *) malloc(TABLESIZE); unsigned long * mod = (unsigned long *) malloc(NUMPRIMES*sizeof(unsigned long)); unsigned long * prime = (unsigned long *) malloc(NUMPRIMES*sizeof(unsigned long)); s = 3; for (unsigned long i = 0; i < NUMPRIMES; i++) { prime[i] = s; s = z_nextprime(s); } for (unsigned long i = 0; i < NUMPRIMES; i++) { s = (T%prime[i]); if (s == 0) s = prime[i]; mod[i] = prime[i]-s; } mpz_init(temp); mpz_init(temp2); mpz_init(D); unsigned long Ttab = 0; while (T < STOP) { memset(table, 0, TABLESIZE); for (unsigned long offset = 0; offset < TABLESIZE; offset+=CACHEBLOCK) { for (unsigned long i = 0; i < NUMPRIMES; i++) { s = mod[i]; p = prime[i]; current = table + offset; for ( ; s < CACHEBLOCK; s += p) current[s] = 1; s -= CACHEBLOCK; mod[i] = s; } } mpz_set_ui(temp, T); while (!mpz_probab_prime_p(temp,3)) { T += 2; Ttab += 2; while (table[Ttab]) { T += 2; Ttab += 2; } mpz_set_ui(temp, T); } Tinv = z_precompute_inverse(T); while (Ttab < TABLESIZE) { root1 = z_cuberootmod(&cuberoot1, k1, T); if (root1) { if (cuberoot1 != 1) { root2 = z_mulmod_precomp(root1, cuberoot1, Tinv, T); root3 = z_mulmod_precomp(root2, cuberoot1, Tinv, T); } if (test_root(root1, T, k1)) { fprintf(file1,"k = %ld, T = %ld, root = %ld\n", k1, T, root1); fflush(file1); abort(); } if (cuberoot1 != 1) { if (test_root(root2, T, k1)) { fprintf(file1,"k = %ld, T = %ld, root = %ld\n", k1, T, root2); fflush(file1); abort(); } if (test_root(root3, T, k1)) { fprintf(file1,"k = %ld, T = %ld, root = %ld\n", k1, T, root3); fflush(file1); abort(); } } } root1 = z_cuberootmod(&cuberoot1, k2, T); if (root1) { if (cuberoot1 != 1) { root2 = z_mulmod_precomp(root1, cuberoot1, Tinv, T); root3 = z_mulmod_precomp(root2, cuberoot1, Tinv, T); } if (test_root(root1, T, k2)) { fprintf(file1,"k = %ld, T = %ld, root = %ld\n", k2, T, root1); fflush(file1); abort(); } if (cuberoot1 != 1) { if (test_root(root2, T, k2)) { fprintf(file1,"k = %ld, T = %ld, root = %ld\n", k2, T, root2); fflush(file1); abort(); } if (test_root(root3, T, k2)) { fprintf(file1,"k = %ld, T = %ld, root = %ld\n", k2, T, root3); fflush(file1); abort(); } } } root1 = z_cuberootmod(&cuberoot1, k3, T); if (root1) { if (cuberoot1 != 1) { root2 = z_mulmod_precomp(root1, cuberoot1, Tinv, T); root3 = z_mulmod_precomp(root2, cuberoot1, Tinv, T); } if (test_root(root1, T, k3)) { fprintf(file1,"k = %ld, T = %ld, root = %ld\n", k3, T, root1); fflush(file1); abort(); } if (cuberoot1 != 1) { if (test_root(root2, T, k3)) { fprintf(file1,"k = %ld, T = %ld, root = %ld\n", k3, T, root2); fflush(file1); abort(); } if (test_root(root3, T, k3)) { fprintf(file1,"k = %ld, T = %ld, root = %ld\n", k3, T, root3); fflush(file1); abort(); } } } do { do { T += 2; Ttab += 2; } while (table[Ttab] && (Ttab < TABLESIZE)); Tinv = z_precompute_inverse(T); } while (!z_isprime_precomp(T, Tinv) && (Ttab < TABLESIZE)); t++; if ((t%1000000UL) == 0) { fprintf(file1,"Checkpoint T = %ld\n", T); fflush(file1); } } Ttab -= TABLESIZE; } mpz_clear(temp); mpz_clear(temp2); mpz_clear(D); gmp_randclear(randstate); return 0; } flint-1.011/makefile0000644017361200017500000003314511025357254014231 0ustar tabbotttabbottLIBDIR=$(PREFIX)/lib INCLUDEDIR=$(PREFIX)/include DOCDIR=$(PREFIX)/doc ifndef FLINT_CC FLINT_CC = gcc endif ifeq ($(MAKECMDGOALS),library) CC = $(FLINT_CC) -fPIC -std=c99 else CC = $(FLINT_CC) -std=c99 endif ifndef FLINT_PY FLINT_PY = python endif ifndef FLINT_CPP FLINT_CPP = g++ endif CPP = $(FLINT_CPP) LIBS = -L$(FLINT_GMP_LIB_DIR) $(FLINT_LINK_OPTIONS) -lgmp -lpthread -lm LIBS2 = -L$(FLINT_GMP_LIB_DIR) -L$(FLINT_NTL_LIB_DIR) $(FLINT_LINK_OPTIONS) -lgmp -lpthread -lntl -lm ifndef FLINT_NTL_INCLUDE_DIR INCS = -I$(FLINT_GMP_INCLUDE_DIR) else INCS = -I$(FLINT_GMP_INCLUDE_DIR) -I$(FLINT_NTL_INCLUDE_DIR) endif CFLAGS = $(INCS) $(FLINT_TUNE) -O3 RM = rm -f HEADERS = \ mpz_extras.h \ F_mpn_mul-tuning.h \ ZmodF.h \ ZmodF_mul-tuning.h \ ZmodF_mul.h \ ZmodF_poly.h \ flint.h \ fmpz.h \ fmpz_poly.h \ longlong.h \ longlong_wrapper.h \ memory-manager.h \ mpn_extras.h \ mpz_poly-tuning.h \ mpz_poly.h \ profiler-main.h \ profiler.h \ test-support.h \ long_extras.h \ zmod_poly.h ####### library object files FLINTOBJ = \ mpn_extras.o \ mpz_extras.o \ memory-manager.o \ ZmodF.o \ ZmodF_mul.o \ ZmodF_mul-tuning.o \ fmpz.o \ fmpz_poly.o \ mpz_poly-tuning.o \ mpz_poly.o \ ZmodF_poly.o \ long_extras.o \ zmod_poly.o QS: mpQS tinyQS tune: ZmodF_mul-tune mpz_poly-tune test: mpn_extras-test fmpz_poly-test fmpz-test ZmodF-test ZmodF_poly-test mpz_poly-test ZmodF_mul-test long_extras-test zmod_poly-test profile: ZmodF_poly-profile kara-profile fmpz_poly-profile mpz_poly-profile ZmodF_mul-profile examples: delta_qexp BPTJCubes bernoulli_zmod F_mpz_mul-timing expmod all: QS tune test profile examples library: $(FLINT_LIB) libflint.dylib: $(FLINTOBJ) $(CC) -single_module -fPIC -dynamiclib -o libflint.dylib $(FLINTOBJ) $(LIBS) libflint.dll: $(FLINTOBJ) $(CC) -fPIC -shared -o libflint.dll $(FLINTOBJ) $(LIBS) libflint.so: $(FLINTOBJ) $(CC) -fPIC -shared -o libflint.so $(FLINTOBJ) $(LIBS) mpn_extras.o: mpn_extras.c $(HEADERS) $(CC) $(CFLAGS) -c mpn_extras.c -o mpn_extras.o mpz_extras.o: mpz_extras.c $(HEADERS) $(CC) $(CFLAGS) -c mpz_extras.c -o mpz_extras.o memory-manager.o: memory-manager.c $(HEADERS) $(CC) $(CFLAGS) -c memory-manager.c -o memory-manager.o ZmodF.o: ZmodF.c $(HEADERS) $(CC) $(CFLAGS) -c ZmodF.c -o ZmodF.o ZmodF_mul.o: ZmodF_mul.c $(HEADERS) $(CC) $(CFLAGS) -c ZmodF_mul.c -o ZmodF_mul.o ZmodF_mul-tuning.o: ZmodF_mul-tuning.c $(HEADERS) $(CC) $(CFLAGS) -c ZmodF_mul-tuning.c -o ZmodF_mul-tuning.o fmpz.o: fmpz.c $(HEADERS) $(CC) $(CFLAGS) -c fmpz.c -o fmpz.o fmpz_poly.o: fmpz_poly.c $(HEADERS) $(CC) $(CFLAGS) -c fmpz_poly.c -o fmpz_poly.o mpz_poly.o: mpz_poly.c $(HEADERS) $(CC) $(CFLAGS) -c mpz_poly.c -o mpz_poly.o mpz_poly-tuning.o: mpz_poly-tuning.c $(HEADERS) $(CC) $(CFLAGS) -c mpz_poly-tuning.c -o mpz_poly-tuning.o ZmodF_poly.o: ZmodF_poly.c $(HEADERS) $(CC) $(CFLAGS) -c ZmodF_poly.c -o ZmodF_poly.o long_extras.o: long_extras.c long_extras.h $(CC) $(CFLAGS) -c long_extras.c -o long_extras.o zmod_poly.o: zmod_poly.c $(HEADERS) $(CC) $(CFLAGS) -c zmod_poly.c -o zmod_poly.o NTL-interface.o: NTL-interface.cpp $(HEADERS) $(CPP) $(CFLAGS) -c NTL-interface.cpp -o NTL-interface.o ####### test program object files test-support.o: test-support.c $(HEADERS) $(CC) $(CFLAGS) -c test-support.c -o test-support.o fmpz_poly-test.o: fmpz_poly-test.c $(HEADERS) $(CC) $(CFLAGS) -c fmpz_poly-test.c -o fmpz_poly-test.o fmpz-test.o: fmpz-test.c $(HEADERS) $(CC) $(CFLAGS) -c fmpz-test.c -o fmpz-test.o ZmodF-test.o: ZmodF-test.c $(HEADERS) $(CC) $(CFLAGS) -c ZmodF-test.c -o ZmodF-test.o ZmodF_poly-test.o: ZmodF_poly-test.c $(HEADERS) $(CC) $(CFLAGS) -c ZmodF_poly-test.c -o ZmodF_poly-test.o mpz_poly-test.o: mpz_poly-test.c $(HEADERS) $(CC) $(CFLAGS) -c mpz_poly-test.c -o mpz_poly-test.o mpn_extras-test.o: mpn_extras-test.c $(HEADERS) $(CC) $(CFLAGS) -c mpn_extras-test.c -o mpn_extras-test.o ZmodF_mul-test.o: ZmodF_mul-test.c $(HEADERS) $(CC) $(CFLAGS) -c ZmodF_mul-test.c -o ZmodF_mul-test.o long_extras-test.o: long_extras-test.c $(CC) $(CFLAGS) -c long_extras-test.c -o long_extras-test.o zmod_poly-test.o: zmod_poly-test.c $(CC) $(CFLAGS) -c zmod_poly-test.c -o zmod_poly-test.o NTL-interface-test.o: NTL-interface-test.cpp $(CPP) $(CFLAGS) -c NTL-interface-test.cpp -o NTL-interface-test.o ####### test program targets mpn_extras-test: mpn_extras-test.o test-support.o $(FLINTOBJ) $(HEADERS) $(CC) $(CFLAGS) mpn_extras-test.o test-support.o -o mpn_extras-test $(FLINTOBJ) $(LIBS) fmpz_poly-test: fmpz_poly-test.o test-support.o $(FLINTOBJ) $(HEADERS) $(CC) $(CFLAGS) fmpz_poly-test.o test-support.o -o fmpz_poly-test $(FLINTOBJ) $(LIBS) fmpz-test: fmpz-test.o test-support.o $(FLINTOBJ) $(HEADERS) $(CC) $(CFLAGS) fmpz-test.o test-support.o -o fmpz-test $(FLINTOBJ) $(LIBS) ZmodF-test: ZmodF-test.o test-support.o $(FLINTOBJ) $(HEADERS) $(CC) $(CFLAGS) ZmodF-test.o test-support.o -o ZmodF-test $(FLINTOBJ) $(LIBS) ZmodF_poly-test: ZmodF_poly-test.o test-support.o $(FLINTOBJ) $(HEADERS) $(CC) $(CFLAGS) ZmodF_poly-test.o test-support.o -o ZmodF_poly-test $(FLINTOBJ) $(LIBS) mpz_poly-test: mpz_poly-test.o test-support.o $(FLINTOBJ) $(HEADERS) $(CC) $(CFLAGS) mpz_poly-test.o test-support.o -o mpz_poly-test $(FLINTOBJ) $(LIBS) ZmodF_mul-test: ZmodF_mul-test.o test-support.o $(FLINTOBJ) $(HEADERS) $(CC) $(CFLAGS) ZmodF_mul-test.o test-support.o -o ZmodF_mul-test $(FLINTOBJ) $(LIBS) long_extras-test: long_extras.o long_extras-test.o test-support.o memory-manager.o $(CC) $(CFLAGS) long_extras.o long_extras-test.o test-support.o memory-manager.o -o long_extras-test $(LIBS) zmod_poly-test: zmod_poly-test.o test-support.o $(FLINTOBJ) $(HEADERS) $(CC) $(CFLAGS) zmod_poly-test.o test-support.o -o zmod_poly-test $(FLINTOBJ) $(LIBS) NTL-interface-test: NTL-interface.o NTL-interface-test.o test-support.o $(FLINTOBJ) $(HEADERS) $(CPP) $(CFLAGS) NTL-interface-test.o NTL-interface.o test-support.o $(FLINTOBJ) -o NTL-interface-test $(LIBS2) ####### tuning program object files ZmodF_mul-tune.o: ZmodF_mul-tune.c $(HEADERS) $(CC) $(CFLAGS) -c ZmodF_mul-tune.c -o ZmodF_mul-tune.o mpz_poly-tune.o: mpz_poly-tune.c $(HEADERS) $(CC) $(CFLAGS) -c mpz_poly-tune.c -o mpz_poly-tune.o ####### tuning program targets ZmodF_mul-tune: ZmodF_mul-tune.o test-support.o profiler.o $(FLINTOBJ) $(HEADERS) $(CC) $(CFLAGS) ZmodF_mul-tune.o test-support.o profiler.o -o ZmodF_mul-tune $(FLINTOBJ) $(LIBS) mpz_poly-tune: mpz_poly-tune.o test-support.o profiler.o $(FLINTOBJ) $(HEADERS) $(CC) $(CFLAGS) mpz_poly-tune.o test-support.o profiler.o -o mpz_poly-tune $(FLINTOBJ) $(LIBS) ####### profiling object files profiler.o: profiler.c $(HEADERS) $(CC) $(CFLAGS) -c profiler.c -o profiler.o profiler-main.o: profiler-main.c $(HEADERS) $(CC) $(CFLAGS) -c profiler-main.c -o profiler-main.o fmpz_poly-profile-tables.o: fmpz_poly-profile.c $(HEADERS) $(FLINT_PY) make-profile-tables.py fmpz_poly $(CC) $(CFLAGS) -c fmpz_poly-profile-tables.c -o fmpz_poly-profile-tables.o rm fmpz_poly-profile-tables.c fmpz_poly-profile.o: fmpz_poly-profile.c $(HEADERS) $(CC) $(CFLAGS) -c fmpz_poly-profile.c -o fmpz_poly-profile.o mpz_poly-profile-tables.o: mpz_poly-profile.c $(HEADERS) $(FLINT_PY) make-profile-tables.py mpz_poly $(CC) $(CFLAGS) -c mpz_poly-profile-tables.c -o mpz_poly-profile-tables.o rm mpz_poly-profile-tables.c mpz_poly-profile.o: mpz_poly-profile.c $(HEADERS) $(CC) $(CFLAGS) -c mpz_poly-profile.c -o mpz_poly-profile.o ZmodF_poly-profile-tables.o: ZmodF_poly-profile.c $(HEADERS) $(FLINT_PY) make-profile-tables.py ZmodF_poly $(CC) $(CFLAGS) -c ZmodF_poly-profile-tables.c -o ZmodF_poly-profile-tables.o rm ZmodF_poly-profile-tables.c ZmodF_poly-profile.o: ZmodF_poly-profile.c $(HEADERS) $(CC) $(CFLAGS) -c ZmodF_poly-profile.c -o ZmodF_poly-profile.o ZmodF_mul-profile-tables.o: ZmodF_mul-profile.c $(HEADERS) $(FLINT_PY) make-profile-tables.py ZmodF_mul $(CC) $(CFLAGS) -c ZmodF_mul-profile-tables.c -o ZmodF_mul-profile-tables.o rm ZmodF_mul-profile-tables.c ZmodF_mul-profile.o: ZmodF_mul-profile.c $(HEADERS) $(CC) $(CFLAGS) -c ZmodF_mul-profile.c -o ZmodF_mul-profile.o NTL-profile-tables.o: NTL-profile.c $(HEADERS) $(FLINT_PY) make-profile-tables.py NTL $(CPP) $(CFLAGS) -c NTL-profile-tables.c -o NTL-profile-tables.o zmod_poly-profile-tables.o: zmod_poly-profile.c $(HEADERS) $(FLINT_PY) make-profile-tables.py zmod_poly $(CC) $(CFLAGS) -c zmod_poly-profile-tables.c -o zmod_poly-profile-tables.o rm zmod_poly-profile-tables.c zmod_poly-profile.o: zmod_poly-profile.c $(HEADERS) $(CC) $(CFLAGS) -c zmod_poly-profile.c -o zmod_poly-profile.o bernoulli-profile-tables.o: bernoulli-profile.c $(HEADERS) $(FLINT_PY) make-profile-tables.py bernoulli $(CC) $(CFLAGS) -c bernoulli-profile-tables.c -o bernoulli-profile-tables.o rm bernoulli-profile-tables.c bernoulli-profile.o: bernoulli-profile.c $(HEADERS) $(CC) $(CFLAGS) -c bernoulli-profile.c -o bernoulli-profile.o ####### profiling program targets PROFOBJ = $(FLINTOBJ) profiler.o profiler-main.o fmpz_poly-profile: fmpz_poly-profile.o fmpz_poly-profile-tables.o test-support.o $(PROFOBJ) $(CC) $(CFLAGS) -o fmpz_poly-profile fmpz_poly-profile.o fmpz_poly-profile-tables.o test-support.o $(PROFOBJ) $(LIBS) mpz_poly-profile: mpz_poly-profile.o mpz_poly-profile-tables.o test-support.o $(PROFOBJ) $(CC) $(CFLAGS) -o mpz_poly-profile mpz_poly-profile.o mpz_poly-profile-tables.o test-support.o $(PROFOBJ) $(LIBS) ZmodF_mul-profile: ZmodF_mul-profile.o ZmodF_mul-profile-tables.o $(PROFOBJ) $(CC) $(CFLAGS) -o ZmodF_mul-profile ZmodF_mul-profile.o ZmodF_mul-profile-tables.o $(PROFOBJ) $(LIBS) ZmodF_poly-profile: ZmodF_poly-profile.o ZmodF_poly-profile-tables.o $(PROFOBJ) $(CC) $(CFLAGS) -o ZmodF_poly-profile ZmodF_poly-profile.o ZmodF_poly-profile-tables.o $(PROFOBJ) $(LIBS) kara-profile: kara-profile.c profiler.o test-support.o $(FLINTOBJ) $(CC) $(CFLAGS) -o kara-profile kara-profile.c profiler.o test-support.o $(FLINTOBJ) $(LIBS) NTL-profile: NTL-profile.c test-support.o NTL-profile-tables.o $(PROFOBJ) $(CPP) $(CFLAGS) -o NTL-profile NTL-profile.c NTL-profile-tables.o test-support.o $(PROFOBJ) $(LIB) -lntl zmod_poly-profile: zmod_poly-profile.o zmod_poly-profile-tables.o $(PROFOBJ) $(CC) $(CFLAGS) -o zmod_poly-profile zmod_poly.o zmod_poly-profile.o zmod_poly-profile-tables.o $(PROFOBJ) $(LIBS) bernoulli-profile: bernoulli-profile.o bernoulli-profile-tables.o $(PROFOBJ) $(CC) $(CFLAGS) -o bernoulli-profile zmod_poly.o bernoulli-profile.o bernoulli-profile-tables.o $(PROFOBJ) $(LIBS) ####### example programs delta_qexp.o: delta_qexp.c $(HEADERS) $(CC) $(CFLAGS) -c delta_qexp.c -o delta_qexp.o delta_qexp: delta_qexp.o $(FLINTOBJ) $(CC) $(CFLAGS) -o delta_qexp delta_qexp.o $(FLINTOBJ) $(LIBS) expmod: expmod.c $(FLINTOBJ) $(CC) $(CFLAGS) -o expmod expmod.c $(FLINTOBJ) $(LIBS) BPTJCubes: long_extras.o memory-manager.o $(CC) $(CFLAGS) -o BPTJCubes BPTJCubes.c memory-manager.o long_extras.o $(LIBS) bernoulli.o: bernoulli.c $(HEADERS) $(CC) $(CFLAGS) -c bernoulli.c -o bernoulli.o bernoulli: bernoulli.o long_extras.o $(FLINTOBJ) $(CC) $(CFLAGS) -o bernoulli bernoulli.o $(FLINTOBJ) $(LIBS) bernoulli_fmpz.o: bernoulli_fmpz.c $(HEADERS) $(CC) $(CFLAGS) -c bernoulli_fmpz.c -o bernoulli_fmpz.o bernoulli_fmpz: bernoulli_fmpz.o $(FLINTOBJ) $(CC) $(CFLAGS) -o bernoulli_fmpz bernoulli_fmpz.o $(FLINTOBJ) $(LIBS) bernoulli_zmod.o: bernoulli_zmod.c $(HEADERS) $(CC) $(CFLAGS) -c bernoulli_zmod.c -o bernoulli_zmod.o bernoulli_zmod: bernoulli_zmod.o $(FLINTOBJ) $(CC) $(CFLAGS) -o bernoulli_zmod bernoulli_zmod.o $(FLINTOBJ) $(LIBS) ####### Quadratic sieve poly.o: QS/poly.c QS/poly.h $(CC) $(CFLAGS) -c QS/poly.c -o poly.o factor_base.o: QS/factor_base.c QS/factor_base.h $(CC) $(CFLAGS) -c QS/factor_base.c -o factor_base.o sieve.o: QS/sieve.c QS/sieve.h $(CC) $(CFLAGS) -c QS/sieve.c -o sieve.o linear_algebra.o: QS/linear_algebra.c QS/linear_algebra.h $(CC) $(CFLAGS) -c QS/linear_algebra.c -o linear_algebra.o block_lanczos.o: QS/block_lanczos.c QS/block_lanczos.h $(CC) $(CFLAGS) -c QS/block_lanczos.c -o block_lanczos.o tinyQS: QS/tinyQS.c QS/tinyQS.h factor_base.o poly.o sieve.o linear_algebra.o block_lanczos.o $(FLINTOBJ) $(CC) $(CFLAGS) -o tinyQS QS/tinyQS.c factor_base.o poly.o sieve.o linear_algebra.o block_lanczos.o $(FLINTOBJ) $(LIBS) mp_sieve.o: QS/mp_sieve.c QS/mp_sieve.h $(CC) $(CFLAGS) -c QS/mp_sieve.c -o mp_sieve.o mp_linear_algebra.o: QS/mp_linear_algebra.c QS/mp_linear_algebra.h $(CC) $(CFLAGS) -c QS/mp_linear_algebra.c -o mp_linear_algebra.o mp_poly.o: QS/mp_poly.c QS/mp_poly.h $(CC) $(CFLAGS) -c QS/mp_poly.c -o mp_poly.o mp_lprels.o: QS/mp_lprels.c QS/mp_lprels.h $(CC) $(CFLAGS) -c QS/mp_lprels.c -o mp_lprels.o mp_factor_base.o: QS/mp_factor_base.c QS/mp_factor_base.h $(CC) $(CFLAGS) -c QS/mp_factor_base.c -o mp_factor_base.o mpQS: QS/mpQS.c QS/mpQS.h mp_factor_base.o mp_poly.o mp_sieve.o mp_linear_algebra.o block_lanczos.o mp_lprels.o $(FLINTOBJ) $(CC) $(CFLAGS) -o mpQS QS/mpQS.c mp_factor_base.o mp_poly.o mp_sieve.o mp_linear_algebra.o block_lanczos.o mp_lprels.o $(FLINTOBJ) $(LIBS) ####### Integer multiplication timing ZMULOBJ = zmod_poly.o memory-manager.o fmpz.o ZmodF_mul-tuning.o mpz_poly.o mpz_poly-tuning.o fmpz_poly.o ZmodF_poly.o mpz_extras.o profiler.o ZmodF_mul.o ZmodF.o mpn_extras.o F_mpz_mul-timing.o long_extras.o F_mpz_mul-timing: $(ZMULOBJ) $(CC) $(ZMULOBJ) -o Zmul $(LIBS) flint-1.011/memory-manager.h0000644017361200017500000000347511025357254015625 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** memory-manager.h: FLINT-wide memory management Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #ifndef FLINT_MANAGER_H #define FLINT_MANAGER_H #ifdef __cplusplus extern "C" { #endif void* flint_stack_alloc(unsigned long length); void* flint_stack_alloc_bytes(unsigned long bytes); void* flint_stack_alloc_small(unsigned long length); void flint_stack_release(); void flint_stack_release_small(); void flint_stack_cleanup(); void* flint_heap_alloc(unsigned long limbs); void* flint_heap_alloc_bytes(unsigned long bytes); void* flint_heap_realloc(void* block, unsigned long limbs); void* flint_heap_realloc_bytes(void* block, unsigned long limbs); void flint_heap_free(void* block); #ifdef __cplusplus } #endif #endif flint-1.011/ZmodF_poly.c0000644017361200017500000013775011025357254014766 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** ZmodF_poly.c Polynomials over Z/pZ, where p = the Fermat number B^n + 1, where B = 2^FLINT_BITS. Routines for truncated Schoenhage-Strassen FFTs and convolutions. Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #include "flint.h" #include "memory-manager.h" #include "ZmodF_poly.h" #include "ZmodF_mul.h" #include "fmpz_poly.h" #include "mpn_extras.h" #include "fmpz.h" /**************************************************************************** Memory Management Routines ****************************************************************************/ void ZmodF_poly_init(ZmodF_poly_t poly, unsigned long depth, unsigned long n, unsigned long scratch_count) { poly->n = n; poly->depth = depth; poly->scratch_count = scratch_count; poly->length = 0; unsigned long bufs = (1 << depth) + scratch_count; poly->storage = (mp_limb_t*) flint_heap_alloc(bufs * (n+1)); // put scratch array immediately after coeffs array poly->coeffs = (ZmodF_t*) flint_heap_alloc_bytes(bufs*sizeof(ZmodF_t)); poly->scratch = poly->coeffs + (1 << depth); poly->coeffs[0] = poly->storage; for (unsigned long i = 1; i < bufs; i++) poly->coeffs[i] = poly->coeffs[i-1] + (n+1); } void ZmodF_poly_clear(ZmodF_poly_t poly) { flint_heap_free(poly->coeffs); flint_heap_free(poly->storage); } void ZmodF_poly_stack_init(ZmodF_poly_t poly, unsigned long depth, unsigned long n, unsigned long scratch_count) { poly->n = n; poly->depth = depth; poly->scratch_count = scratch_count; poly->length = 0; unsigned long bufs = (1 << depth) + scratch_count; poly->storage = (mp_limb_t*) flint_stack_alloc(bufs * (n+1)); // put scratch array immediately after coeffs array poly->coeffs = (ZmodF_t*) flint_stack_alloc_bytes(bufs*sizeof(ZmodF_t)); poly->scratch = poly->coeffs + (1 << depth); poly->coeffs[0] = poly->storage; for (unsigned long i = 1; i < bufs; i++) poly->coeffs[i] = poly->coeffs[i-1] + (n+1); } void ZmodF_poly_stack_clear(ZmodF_poly_t poly) { flint_stack_release(); flint_stack_release(); } /**************************************************************************** Basic Arithmetic Routines ****************************************************************************/ void ZmodF_poly_set(ZmodF_poly_t x, ZmodF_poly_t y) { FLINT_ASSERT(x->depth == y->depth); FLINT_ASSERT(x->n == y->n); for (unsigned long i = 0; i < y->length; i++) ZmodF_set(x->coeffs[i], y->coeffs[i], x->n); x->length = y->length; } void ZmodF_poly_pointwise_mul(ZmodF_poly_t res, ZmodF_poly_t x, ZmodF_poly_t y) { FLINT_ASSERT(x->depth == y->depth); FLINT_ASSERT(x->depth == res->depth); FLINT_ASSERT(x->n == y->n); FLINT_ASSERT(x->n == res->n); FLINT_ASSERT(x->length == y->length); unsigned long j; ZmodF_mul_info_t info; ZmodF_mul_info_init(info, x->n, x == y); if (x != y) for (unsigned long i = 0; i < x->length; i++) { if (i+8 < x->length) { for (j = 0; j < x->n; j += 8) FLINT_PREFETCH(x->coeffs[i+8], j); for (j = 0; j < y->n; j += 8) FLINT_PREFETCH(y->coeffs[i+8], j); } ZmodF_mul_info_mul(info, res->coeffs[i], x->coeffs[i], y->coeffs[i]); } else for (unsigned long i = 0; i < x->length; i++) { if (i+8 < x->length) { for (j = 0; j < x->n; j += 8) FLINT_PREFETCH(x->coeffs[i+8], j); } ZmodF_mul_info_mul(info, res->coeffs[i], x->coeffs[i], x->coeffs[i]); } ZmodF_mul_info_clear(info); res->length = x->length; } void ZmodF_poly_add(ZmodF_poly_t res, ZmodF_poly_t x, ZmodF_poly_t y) { FLINT_ASSERT(x->depth == y->depth); FLINT_ASSERT(x->depth == res->depth); FLINT_ASSERT(x->n == y->n); FLINT_ASSERT(x->n == res->n); FLINT_ASSERT(x->length == y->length); for (unsigned long i = 0; i < x->length; i++) ZmodF_add(res->coeffs[i], x->coeffs[i], y->coeffs[i], x->n); res->length = x->length; } void ZmodF_poly_sub(ZmodF_poly_t res, ZmodF_poly_t x, ZmodF_poly_t y) { FLINT_ASSERT(x->depth == y->depth); FLINT_ASSERT(x->depth == res->depth); FLINT_ASSERT(x->n == y->n); FLINT_ASSERT(x->n == res->n); FLINT_ASSERT(x->length == y->length); for (unsigned long i = 0; i < x->length; i++) ZmodF_sub(res->coeffs[i], x->coeffs[i], y->coeffs[i], x->n); res->length = x->length; } void ZmodF_poly_normalise(ZmodF_poly_t poly) { for (unsigned long i = 0; i < poly->length; i++) ZmodF_normalise(poly->coeffs[i], poly->n); } void ZmodF_poly_rescale(ZmodF_poly_t poly) { if (poly->depth == 0) return; for (unsigned long i = 0; i < poly->length; i++) ZmodF_short_div_2exp(poly->coeffs[i], poly->coeffs[i], poly->depth, poly->n); } void ZmodF_poly_rescale_range(ZmodF_poly_t poly, unsigned long start, unsigned long n) { if (poly->depth == 0) return; unsigned long length = FLINT_MIN(n, poly->length); for (unsigned long i = start; i < length; i++) ZmodF_short_div_2exp(poly->coeffs[i], poly->coeffs[i], poly->depth, poly->n); } /**************************************************************************** Forward fourier transforms (internal code) ****************************************************************************/ void _ZmodF_poly_FFT_iterative( ZmodF_t* x, unsigned long depth, unsigned long skip, unsigned long nonzero, unsigned long length, unsigned long twist, unsigned long n, ZmodF_t* scratch) { FLINT_ASSERT((4*n*FLINT_BITS) % (1 << depth) == 0); FLINT_ASSERT(skip >= 1); FLINT_ASSERT(n >= 1); FLINT_ASSERT(nonzero >= 1 && nonzero <= (1 << depth)); FLINT_ASSERT(length >= 1 && length <= (1 << depth)); FLINT_ASSERT(depth >= 1); unsigned long i, s, start; ZmodF_t* y, * z; // root is the (2^depth)-th root of unity for the current layer, // measured as a power of sqrt2 unsigned long root = (4*n*FLINT_BITS) >> depth; FLINT_ASSERT(twist < root); // half = half the current block length unsigned long half = 1UL << (depth - 1); unsigned long half_skip = half * skip; unsigned long layer; // ========================================================================= // Special case for first layer, if root and/or twist involve sqrt2 // rotations. if ((root | twist) & 1) { // Let length = multiple of block size plus a remainder. unsigned long length_quantised = length & (-2*half); unsigned long length_remainder = length - length_quantised; if (length <= half) { // Only need first output for each butterfly, // i.e. (a, b) -> (a + b, ?) if (nonzero > half) for (i = 0, y = x; i < nonzero - half; i++, y += skip) ZmodF_add(y[0], y[0], y[half_skip], n); } else { // Need both outputs for each butterfly. if (nonzero <= half) { // The second half of each butterfly input are zeroes, so we just // computing (a, 0) -> (a, ra), where r is the appropriate root // of unity. for (i = 0, s = twist, y = x; i < nonzero; i++, s += root, y += skip) { ZmodF_mul_sqrt2exp(y[half_skip], y[0], s, n); } } else { // If nonzero > half, then we need some full butterflies... for (i = 0, s = twist, y = x; i < nonzero - half; i++, s += root, y += skip) { ZmodF_forward_butterfly_sqrt2exp(y, y + half_skip, scratch, s, n); } // and also some partial butterflies (a, 0) -> (a, ra). for (; i < half; i++, s += root, y += skip) ZmodF_mul_sqrt2exp(y[half_skip], y[0], s, n); } } // Here we switch to measuring roots as powers of 2, but we also need // to update to the next layer's roots, and these two actions cancel // each other out :-) // Update block length. half >>= 1; half_skip >>= 1; if (nonzero > 2*half) nonzero = 2*half; layer = 1; } else { // no special case for first layer layer = 0; // switch to measuring roots as powers of 2 root >>= 1; twist >>= 1; } // ========================================================================= // This section handles the layers where there are still zero coefficients // to take advantage of. In most cases, this will only happen for the // first layer or two, so we don't bother with specialised limbshift-only // code for these layers. // Note: from here on there are no sqrt2 rotations, and we measure all // roots as powers of 2. for (; (layer < depth) && (nonzero < 2*half); layer++) { // Let length = multiple of block size plus a remainder. unsigned long length_quantised = length & (-2*half); unsigned long length_remainder = length - length_quantised; if (length_remainder > half) { // If length overhangs by more than half the block, then we need to // perform full butterflies on the last block (i.e. the last block // doesn't get any special treatment). length_quantised += 2*half; } else if (length_remainder) { // If length overhangs the block by at most half the block size, // then we only need to compute the first output of each butterfly // for this block, i.e. (a, b) -> (a + b) if (nonzero > half) { y = x + skip * length_quantised; for (i = 0; i < nonzero - half; i++, y += skip) ZmodF_add(y[0], y[0], y[half_skip], n); } } if (nonzero <= half) { // If nonzero <= half, then the second half of each butterfly input // are zeroes, so we just computing (a, 0) -> (a, ra), where r is the // appropriate root of unity. for (start = 0, y = x; start < length_quantised; start += 2*half, y += 2*half_skip) { for (i = 0, s = twist, z = y; i < nonzero; i++, s += root, z += skip) { ZmodF_mul_2exp(z[half_skip], z[0], s, n); } } } else { for (start = 0, y = x; start < length_quantised; start += 2*half, y += 2*half_skip) { // If nonzero > half, then we need some full butterflies... for (i = 0, s = twist, z = y; i < nonzero - half; i++, s += root, z += skip) { ZmodF_forward_butterfly_2exp(z, z + half_skip, scratch, s, n); } // and also some partial butterflies (a, 0) -> (a, ra). for (; i < half; i++, s += root, z += skip) ZmodF_mul_2exp(z[half_skip], z[0], s, n); } } // Update roots of unity twist <<= 1; root <<= 1; // Update block length. half >>= 1; half_skip >>= 1; if (nonzero > 2*half) // no more zero coefficients to take advantage of: nonzero = 2*half; } // ========================================================================= // Now we may assume there are no more zero coefficients. for (; layer < depth; layer++) { // Let length = multiple of block size plus a remainder. unsigned long length_quantised = length & (-2*half); unsigned long length_remainder = length - length_quantised; if (length_remainder > half) { // If length overhangs by more than half the block, then we need to // perform full butterflies on the last block (i.e. the last block // doesn't get any special treatment). length_quantised += 2*half; } else if (length_remainder) { // If length overhangs the block by at most half the block size, // then we only need to compute the first output of each butterfly // for this block, i.e. (a, b) -> (a + b) y = x + skip * length_quantised; for (i = 0; i < half; i++, y += skip) ZmodF_add(y[0], y[0], y[half_skip], n); } // To keep the inner loops long, we have two versions of the next loop. if (layer < depth/2) { // Version 1: only a few relatively long blocks. for (start = 0, y = x; start < length_quantised; start += 2*half, y += 2*half_skip) { for (i = 0, s = twist, z = y; i < half; i++, s += root, z += skip) ZmodF_forward_butterfly_2exp(z, z + half_skip, scratch, s, n); } } else { // Version 2: lots of short blocks. // Two sub-versions, depending on whether the rotations are all by // a whole number of limbs. if ((root | twist) & (FLINT_BITS - 1)) { // Version 2a: rotations still involve bitshifts. for (i = 0, s = twist, y = x; i < half; i++, s += root, y += skip) for (start = 0, z = y; start < length_quantised; start += 2*half, z += 2*half_skip) { ZmodF_forward_butterfly_2exp(z, z + half_skip, scratch, s, n); } } else { // Version 2b: rotations involve only limbshifts. unsigned long root_limbs = root >> FLINT_LG_BITS_PER_LIMB; if (twist == 0) { // special case, since ZmodF_forward_butterfly_Bexp doesn't // allow zero rotation count for (start = 0, z = x; start < length_quantised; start += 2*half, z += 2*half_skip) { ZmodF_simple_butterfly(z, z + half_skip, scratch, n); } i = 1; y = x + skip; s = root_limbs; } else { i = 0; y = x; s = twist >> FLINT_LG_BITS_PER_LIMB; } for (; i < half; i++, s += root_limbs, y += skip) for (start = 0, z = y; start < length_quantised; start += 2*half, z += 2*half_skip) { ZmodF_forward_butterfly_Bexp(z, z + half_skip, scratch, s, n); } } } // Update roots of unity twist <<= 1; root <<= 1; // Update block length. half >>= 1; half_skip >>= 1; } } /* Factors FFT of length 2^depth into length 2^rows_depth and length 2^cols_depth transforms */ void _ZmodF_poly_FFT_factor( ZmodF_t* x, unsigned long rows_depth, unsigned long cols_depth, unsigned long skip, unsigned long nonzero, unsigned long length, unsigned long twist, unsigned long n, ZmodF_t* scratch) { FLINT_ASSERT(skip >= 1); FLINT_ASSERT(n >= 1); FLINT_ASSERT(rows_depth >= 1); FLINT_ASSERT(cols_depth >= 1); unsigned long depth = rows_depth + cols_depth; FLINT_ASSERT((4*n*FLINT_BITS) % (1 << depth) == 0); FLINT_ASSERT(nonzero >= 1 && nonzero <= (1 << depth)); FLINT_ASSERT(length >= 1 && length <= (1 << depth)); // root is the (2^depth)-th root unity, measured as a power of sqrt2 unsigned long root = (4*n*FLINT_BITS) >> depth; FLINT_ASSERT(twist < root); unsigned long rows = 1UL << rows_depth; unsigned long cols = 1UL << cols_depth; unsigned long length_rows = length >> cols_depth; unsigned long length_cols = length & (cols-1); unsigned long length_whole_rows = length_cols ? (length_rows + 1) : length_rows; unsigned long nonzero_rows = nonzero >> cols_depth; unsigned long nonzero_cols = nonzero & (cols-1); unsigned long i, j; ZmodF_t* y; // column transforms for (i = 0, y = x, j = twist; i < nonzero_cols; i++, y += skip, j += root) _ZmodF_poly_FFT(y, rows_depth, skip << cols_depth, nonzero_rows + 1, length_whole_rows, j, n, scratch); if (nonzero_rows) { for (; i < cols; i++, y += skip, j += root) _ZmodF_poly_FFT(y, rows_depth, skip << cols_depth, nonzero_rows, length_whole_rows, j, n, scratch); nonzero_cols = cols; } // row transforms for (i = 0, y = x; i < length_rows; i++, y += (skip << cols_depth)) _ZmodF_poly_FFT(y, cols_depth, skip, nonzero_cols, cols, twist << rows_depth, n, scratch); if (length_cols) // The relevant portion of the last row: _ZmodF_poly_FFT(y, cols_depth, skip, nonzero_cols, length_cols, twist << rows_depth, n, scratch); } /* This is an internal function. It's just a temporary implementation so that we can get started on higher level code. It is not optimised particularly well yet. x = array of buffers to operate on skip = distance between buffers depth = log2(number of buffers) nonzero = number of buffers assumed to be nonzero length = number of fourier coefficients requested twist = twisting power of sqrt2 n = coefficient length scratch = a scratch buffer */ void _ZmodF_poly_FFT(ZmodF_t* x, unsigned long depth, unsigned long skip, unsigned long nonzero, unsigned long length, unsigned long twist, unsigned long n, ZmodF_t* scratch) { FLINT_ASSERT((4*n*FLINT_BITS) % (1 << depth) == 0); FLINT_ASSERT(skip >= 1); FLINT_ASSERT(n >= 1); FLINT_ASSERT(nonzero >= 1 && nonzero <= (1 << depth)); FLINT_ASSERT(length >= 1 && length <= (1 << depth)); FLINT_ASSERT(depth >= 1); // If the data fits in L1 (2^depth coefficients of length n+1, plus a // scratch buffer), then use the iterative transform. Otherwise factor the // FFT into two chunks. if (depth == 1 || ((1 << depth) + 1) * (n+1) <= ZMODFPOLY_FFT_FACTOR_THRESHOLD) { _ZmodF_poly_FFT_iterative(x, depth, skip, nonzero, length, twist, n, scratch); } else { unsigned long rows_depth = depth >> 1; unsigned long cols_depth = depth - rows_depth; _ZmodF_poly_FFT_factor(x, rows_depth, cols_depth, skip, nonzero, length, twist, n, scratch); } } /**************************************************************************** Inverse fourier transforms (internal code) ****************************************************************************/ /* This one is for when there is no truncation. */ void _ZmodF_poly_IFFT_iterative( ZmodF_t* x, unsigned long depth, unsigned long skip, unsigned long twist, unsigned long n, ZmodF_t* scratch) { FLINT_ASSERT((4*n*FLINT_BITS) % (1 << depth) == 0); FLINT_ASSERT(skip >= 1); FLINT_ASSERT(n >= 1); FLINT_ASSERT(depth >= 1); // root is the (2^(layer+1))-th root unity for each layer, // measured as a power of sqrt2 long root = 2*n*FLINT_BITS; twist <<= (depth - 1); FLINT_ASSERT(twist < root); unsigned long half = 1; unsigned long half_skip = skip; unsigned long size = 1UL << depth; unsigned long layer, start, i, s; ZmodF_t* y, * z; // First group of layers; lots of small blocks. for (layer = 0; layer < depth/2; layer++) { // no sqrt2 should be involved here FLINT_ASSERT(!((twist | root) & 1)); // change roots to be measured as powers of 2 // (also this updates for the next layer in advance) root >>= 1; twist >>= 1; if ((root | twist) & (FLINT_BITS-1)) { // This version allows bitshifts for (i = 0, y = x, s = twist; i < half; i++, s += root, y += skip) for (start = 0, z = y; start < size; start += 2*half, z += 2*half_skip) { ZmodF_inverse_butterfly_2exp(z, z + half_skip, scratch, s, n); } } else { // This version is limbshifts only unsigned long root_limbs = root >> FLINT_LG_BITS_PER_LIMB; if (twist == 0) { // special case since ZmodF_inverse_butterfly_Bexp doesn't allow // zero rotation count for (start = 0, z = x; start < size; start += 2*half, z += 2*half_skip) { ZmodF_simple_butterfly(z, z + half_skip, scratch, n); } i = 1; s = root_limbs; y = x + skip; } else { i = 0; s = twist >> FLINT_LG_BITS_PER_LIMB; y = x; } for (; i < half; i++, s += root_limbs, y += skip) for (start = 0, z = y; start < size; start += 2*half, z += 2*half_skip) { ZmodF_inverse_butterfly_Bexp(z, z + half_skip, scratch, s, n); } } half <<= 1; half_skip <<= 1; } // Second group of layers; just a few large blocks. for (; layer < depth; layer++) { if ((root | twist) & 1) { // sqrt2 is involved. This had better be the last layer. FLINT_ASSERT(layer == depth - 1); for (i = 0, z = x, s = twist; i < half; i++, s += root, z += skip) ZmodF_inverse_butterfly_sqrt2exp(z, z + half_skip, scratch, s, n); return; } else { // Only bitshifts. // change roots to be measured as powers of 2 // (also this updates for the next layer in advance) twist >>= 1; root >>= 1; for (start = 0, y = x; start < size; start += 2*half, y += 2*half_skip) { for (i = 0, z = y, s = twist; i < half; i++, s += root, z += skip) ZmodF_inverse_butterfly_2exp(z, z + half_skip, scratch, s, n); } } half <<= 1; half_skip <<= 1; } } /* This one's for working in L1 when truncation is involved. It splits into two halves. */ void _ZmodF_poly_IFFT_recursive( ZmodF_t* x, unsigned long depth, unsigned long skip, unsigned long nonzero, unsigned long length, int extra, unsigned long twist, unsigned long n, ZmodF_t* scratch) { FLINT_ASSERT((4*n*FLINT_BITS) % (1 << depth) == 0); FLINT_ASSERT(skip >= 1); FLINT_ASSERT(n >= 1); FLINT_ASSERT(nonzero >= 1 && nonzero <= (1UL << depth)); FLINT_ASSERT(length <= nonzero); FLINT_ASSERT((length == 0 && extra) || (length == (1UL << depth) && !extra) || (length > 0 && length < (1UL << depth))); FLINT_ASSERT(depth >= 1); long size = 1UL << depth; if (length == size) { // no truncation necessary _ZmodF_poly_IFFT_iterative(x, depth, skip, twist, n, scratch); return; } // root is the (2^depth)-th root unity, measured as a power of sqrt2 long root = (4*n*FLINT_BITS) >> depth; FLINT_ASSERT(twist < root); long cols = size >> 1; long half = skip << (depth - 1); // symbols in the following diagrams: // A = fully untransformed coefficient // a = fully untransformed coefficient (implied zero) // B = intermediate coefficient // b = intermediate coefficient (implied zero) // C = fully transformed coefficient // c = fully transformed coefficient (implied zero) // ? = garbage that we don't care about // * = the extra C coefficient, or "?" if no extra coefficient requested // the horizontal transforms convert between B and C // the vertical butterflies convert between A and B if ((length < cols) || (length == cols && !extra)) { // The input could look like one of the following: // CCCCAAAA CCCCAAAA CCCCAAaa CCCCaaaa // AAAAAAaa or AAaaaaaa or aaaaaaaa or aaaaaaaa long i, last_zero_forward_butterfly, last_zero_cross_butterfly; if (nonzero <= cols) { i = nonzero - 1; last_zero_forward_butterfly = length; last_zero_cross_butterfly = 0; } else { i = cols - 1; if (nonzero > length + cols) { last_zero_forward_butterfly = nonzero - cols; last_zero_cross_butterfly = length; } else { last_zero_forward_butterfly = length; last_zero_cross_butterfly = nonzero - cols; } } ZmodF_t* y = x + skip*i; // First some forward butterflies ("Aa" => "B?") to make them look like: // CCCCAABB CCCCBBBB CCCCBBaa CCCCaaaa // AAAAAA?? or AAaa???? or aaaa??aa or aaaaaaaa for (; i >= last_zero_forward_butterfly; i--, y -= skip) { // (2*a0, ?) -> (a0, ?) = (b0, ?) ZmodF_short_div_2exp(y[0], y[0], 1, n); } // Then some forward butterflies ("AA" => "B?") to make them look like: // CCCCBBBB CCCCBBBB CCCCBBaa CCCCaaaa // AAAA???? or AAaa???? or aaaa??aa or aaaaaaaa for (; i >= (long)length; i--, y -= skip) { // (2*a0, 2*a1) -> (a0 + a1, ?) = (b0, ?) ZmodF_add(y[0], y[0], y[half], n); ZmodF_short_div_2exp(y[0], y[0], 1, n); } // Transform the first row to make them look like: // BBBB*??? BBBB*??? BBBB*??? BBBB*??? // AAAA???? or AAaa???? or aaaa??aa or aaaaaaaa if (depth > 1) _ZmodF_poly_IFFT_recursive(x, depth - 1, skip, (nonzero < cols) ? nonzero : cols, length, extra, twist << 1, n, scratch); // Cross butterflies ("Ba" => "A?") to make them look like: // BBBB*??? BBAA*??? AAAA*??? AAAA*??? // AAAA???? or AA?????? or ??????aa or ????aaaa for (; i >= last_zero_cross_butterfly; i--, y -= skip) { // (b0, ?) -> (2*b0, ?) = (2*a0, ?) ZmodF_add(y[0], y[0], y[0], n); } // Cross butterflies ("BA" => "A?") to make them look like: // AAAA*??? AAAA*??? AAAA*??? AAAA*??? // ???????? or ???????? or ??????aa or ????aaaa for (; i >= 0; i--, y -= skip) { // (b0, 2*a1) -> (2*b0 - 2*a1, ?) = (2*a0, ?) ZmodF_add(y[0], y[0], y[0], n); ZmodF_sub(y[0], y[0], y[half], n); } } else { // The input looks like one of these: // CCCCCCCC CCCCCCCC // AAAAaaaa (extra == 1) or CCCAAAaa // Transform first row (no truncation necessary) to make them look like: // BBBBBBBB BBBBBBBB // AAAAaaaa (extra == 1) or CCCAAAaa if (depth > 1) _ZmodF_poly_IFFT_iterative(x, depth - 1, skip, twist << 1, n, scratch); long i = cols - 1; unsigned long s = twist + root*i; ZmodF_t* y = x + skip*i; long last_zero_cross_butterfly = nonzero - cols; long last_cross_butterfly = length - cols; // Cross butterflies ("Ba" => "AB") to make them look like: // BBBBAAAA BBBBBBAA // AAAABBBB (extra == 1) or CCCAAABB for (; i >= last_zero_cross_butterfly; i--, s -= root, y -= skip) { // (b0, ?) -> (2*b0, w*b0) = (2*a0, b1) ZmodF_mul_sqrt2exp(y[half], y[0], s, n); ZmodF_add(y[0], y[0], y[0], n); } // Cross butterflies ("BA" => "AB") to make them look like: // AAAAAAAA BBBAAAAA // BBBBBBBB (extra == 1) or CCCBBBBB for (; i >= last_cross_butterfly; i--, s -= root, y -= skip) { // (b0, 2*a1) -> (2*(b0-a1), w*(b0-2*a1)) = (2*a0, b1) ZmodF_sub(scratch[0], y[0], y[half], n); ZmodF_add(y[0], y[0], scratch[0], n); ZmodF_mul_sqrt2exp(y[half], scratch[0], s, n); } // Transform second row to make them look like: // AAAAAAAA BBBAAAAA // *??????? (extra == 1) or BBB*???? if (depth > 1) _ZmodF_poly_IFFT_recursive(x + skip*cols, depth - 1, skip, cols, length - cols, extra, twist << 1, n, scratch); // Inverse butterflies ("BB" => "AA") to make them look like: // AAAAAAAA AAAAAAAA // *??????? (extra == 1) or AAA*???? for (; i >= 0; i--, s -= root, y -= skip) { // (b0, b1) -> (b0 + w*b1, b0 - w*b1) = (2*a0, 2*a1) ZmodF_inverse_butterfly_sqrt2exp(y, y + half, scratch, s, n); } } } /* This is an internal function. It's just a temporary implementation so that we can get started on higher level code. It is not optimised particularly well yet. x = array of buffers to operate on skip = distance between buffers depth = log2(number of buffers) nonzero = number of *output* buffers assumed to be nonzero length = number of untransformed coefficients requested extra = indicates whether an extra *forward* coefficient should be computed twist = twisting power of sqrt2 n = coefficient length scratch = a scratch buffer */ void _ZmodF_poly_IFFT_factor( ZmodF_t* x, unsigned long rows_depth, unsigned long cols_depth, unsigned long skip, unsigned long nonzero, unsigned long length, int extra, unsigned long twist, unsigned long n, ZmodF_t* scratch) { FLINT_ASSERT(skip >= 1); FLINT_ASSERT(n >= 1); FLINT_ASSERT(rows_depth >= 1); FLINT_ASSERT(cols_depth >= 1); unsigned long depth = rows_depth + cols_depth; FLINT_ASSERT((4*n*FLINT_BITS) % (1 << depth) == 0); FLINT_ASSERT(nonzero >= 1 && nonzero <= (1UL << depth)); FLINT_ASSERT(length <= nonzero); FLINT_ASSERT((length == 0 && extra) || (length == (1UL << depth) && !extra) || (length > 0 && length < (1UL << depth))); // root is the (2^depth)-th root unity, measured as a power of sqrt2 unsigned long root = (4*n*FLINT_BITS) >> depth; FLINT_ASSERT(twist < root); unsigned long rows = 1UL << rows_depth; unsigned long cols = 1UL << cols_depth; unsigned long length_rows = length >> cols_depth; unsigned long length_cols = length & (cols-1); unsigned long nonzero_rows = nonzero >> cols_depth; unsigned long nonzero_cols = nonzero & (cols-1); unsigned long i, j; ZmodF_t* y; // row transforms for the rows where we have all fourier coefficients for (i = 0, y = x; i < length_rows; i++, y += (skip << cols_depth)) _ZmodF_poly_IFFT(y, cols_depth, skip, cols, cols, 0, twist << rows_depth, n, scratch); // column transforms where we have enough information for (i = length_cols, y = x + (skip * length_cols), j = twist + (root*length_cols); i < nonzero_cols; i++, y += skip, j += root) { _ZmodF_poly_IFFT(y, rows_depth, skip << cols_depth, nonzero_rows + 1, length_rows, length_cols ? 1 : extra, j, n, scratch); } if (nonzero_rows) for (; i < cols; i++, y += skip, j += root) _ZmodF_poly_IFFT(y, rows_depth, skip << cols_depth, nonzero_rows, length_rows, length_cols ? 1 : extra, j, n, scratch); if (length_cols) { // a single switcheroo row transform _ZmodF_poly_IFFT(x + length_rows * (skip << cols_depth), cols_depth, skip, (nonzero_rows ? cols : nonzero_cols), length_cols, extra, twist << rows_depth, n, scratch); // remaining column transforms for (i = 0, y = x, j = twist; i < length_cols && i < nonzero_cols; i++, y += skip, j += root) { _ZmodF_poly_IFFT(y, rows_depth, skip << cols_depth, nonzero_rows + 1, length_rows + 1, 0, j, n, scratch); } if (nonzero_rows) { for (; i < length_cols; i++, y += skip, j += root) _ZmodF_poly_IFFT(y, rows_depth, skip << cols_depth, nonzero_rows, length_rows + 1, 0, j, n, scratch); } } else if (extra) { // need one extra trivial fourier coefficient x += length_rows * (skip << cols_depth); for (i = 1, y = x + skip; i < (nonzero_rows ? cols : nonzero_cols); i++, y += skip) { ZmodF_add(x[0], x[0], y[0], n); } ZmodF_short_div_2exp(x[0], x[0], cols_depth, n); } } /* This is an internal function. It's just a temporary implementation so that we can get started on higher level code. It is not optimised particularly well yet. x = array of buffers to operate on skip = distance between buffers depth = log2(number of buffers) nonzero = number of *output* buffers assumed to be nonzero length = number of untransformed coefficients requested extra = indicates whether an extra *forward* coefficient should be computed twist = twisting power of sqrt2 n = coefficient length scratch = a scratch buffer */ void _ZmodF_poly_IFFT(ZmodF_t* x, unsigned long depth, unsigned long skip, unsigned long nonzero, unsigned long length, int extra, unsigned long twist, unsigned long n, ZmodF_t* scratch) { FLINT_ASSERT((4*n*FLINT_BITS) % (1 << depth) == 0); FLINT_ASSERT(skip >= 1); FLINT_ASSERT(n >= 1); FLINT_ASSERT(nonzero >= 1 && nonzero <= (1UL << depth)); FLINT_ASSERT(length <= nonzero); FLINT_ASSERT((length == 0 && extra) || (length == (1UL << depth) && !extra) || (length > 0 && length < (1UL << depth))); FLINT_ASSERT(depth >= 1); // If the data fits in L1 (2^depth coefficients of length n+1, plus a // scratch buffer), then use the iterative transform. Otherwise factor the // FFT into two chunks. if (depth == 1 || ((1 << depth) + 1) * (n+1) <= ZMODFPOLY_FFT_FACTOR_THRESHOLD) { _ZmodF_poly_IFFT_recursive(x, depth, skip, nonzero, length, extra, twist, n, scratch); } else { unsigned long rows_depth = depth >> 1; unsigned long cols_depth = depth - rows_depth; _ZmodF_poly_IFFT_factor(x, rows_depth, cols_depth, skip, nonzero, length, extra, twist, n, scratch); } } /**************************************************************************** Forward "dual" fourier transforms (internal code) (twists are applied *before* the transform instead of afterwards, so these are used for e.g. negacyclic transforms) ****************************************************************************/ /* Let M = 2^depth 2^root = Mth root of unity input is assumed to be mod x^M - a^M, where a = 2^twist assumes twist nonzero */ void _ZmodF_poly_FFT_dual_recursive( ZmodF_t* x, unsigned long depth, unsigned long twist, unsigned long root, unsigned long n, ZmodF_t* scratch) { FLINT_ASSERT(twist); FLINT_ASSERT(twist < root); // ========================================================================= // special cases for length <= 4 if (depth == 2) { // length == 4 // ---------------------------------------------------------------------- // Do the outer layer of two butterflies first. This is basically an // unrolled version of the length >= 8 case below. unsigned long bits = (2*twist) & (FLINT_BITS-1); unsigned long limbs = n - (twist >> (FLINT_LG_BITS_PER_LIMB-1)); if (bits) { // each butterfly needs a bitshift bits = FLINT_BITS - bits; if (--limbs) { ZmodF_short_div_2exp(*scratch, x[2], bits, n); ZmodF_div_Bexp_add(x[2], x[0], *scratch, limbs, n); ZmodF_div_Bexp_sub(x[0], x[0], *scratch, limbs, n); ZmodF_short_div_2exp(*scratch, x[3], bits, n); ZmodF_div_Bexp_add(x[3], x[1], *scratch, limbs, n); ZmodF_div_Bexp_sub(x[1], x[1], *scratch, limbs, n); } else { ZmodF_short_div_2exp(*scratch, x[2], bits, n); ZmodF_add(x[2], x[0], *scratch, n); ZmodF_sub(x[0], x[0], *scratch, n); ZmodF_short_div_2exp(*scratch, x[3], bits, n); ZmodF_add(x[3], x[1], *scratch, n); ZmodF_sub(x[1], x[1], *scratch, n); } } else { // no bitshifts needed ZmodF_div_Bexp_add(*scratch, x[0], x[2], limbs, n); ZmodF_swap(scratch, x+2); ZmodF_div_Bexp_sub(x[0], x[0], *scratch, limbs, n); ZmodF_div_Bexp_add(*scratch, x[1], x[3], limbs, n); ZmodF_swap(scratch, x+3); ZmodF_div_Bexp_sub(x[1], x[1], *scratch, limbs, n); } // ---------------------------------------------------------------------- // Now do the bottom layer, two "blocks" of one butterfly each. twist = n*FLINT_BITS - twist; ZmodF_inverse_butterfly_2exp(x, x+1, scratch, twist, n); ZmodF_swap(x, x+1); ZmodF_inverse_butterfly_2exp(x+2, x+3, scratch, twist - root, n); ZmodF_swap(x+2, x+3); return; } if (depth <= 1) { // length == 1 or 2 if (depth == 1) { ZmodF_inverse_butterfly_2exp(x, x+1, scratch, n*FLINT_BITS - twist, n); ZmodF_swap(x, x+1); } return; } // ========================================================================= // general case for length >= 8 // butterflies (a, b) -> (a + w*b, a - w*b), where w = 2^(amount). unsigned long half = 1 << (depth - 1); ZmodF_t* y = x + half; unsigned long amount = twist << (depth - 1); unsigned long bits = amount & (FLINT_BITS-1); unsigned long limbs = n - (amount >> FLINT_LG_BITS_PER_LIMB); if (bits) { // each butterfly needs a bitshift bits = FLINT_BITS - bits; if (--limbs) { for (unsigned long i = 0; i < half; i++) { ZmodF_short_div_2exp(*scratch, y[i], bits, n); ZmodF_div_Bexp_add(y[i], x[i], *scratch, limbs, n); ZmodF_div_Bexp_sub(x[i], x[i], *scratch, limbs, n); } } else { for (unsigned long i = 0; i < half; i++) { ZmodF_short_div_2exp(*scratch, y[i], bits, n); ZmodF_add(y[i], x[i], *scratch, n); ZmodF_sub(x[i], x[i], *scratch, n); } } } else { // all butterflies are limbshifts only for (unsigned long i = 0; i < half; i++) { ZmodF_div_Bexp_add(*scratch, x[i], y[i], limbs, n); ZmodF_swap(scratch, y+i); ZmodF_div_Bexp_sub(x[i], x[i], *scratch, limbs, n); } } // ========================================================================= // recurse into two halves _ZmodF_poly_FFT_dual_recursive(x, depth-1, twist, root << 1, n, scratch); _ZmodF_poly_FFT_dual_recursive(x + half, depth-1, twist + root, root << 1, n, scratch); } void _ZmodF_poly_IFFT_dual_recursive( ZmodF_t* x, unsigned long depth, unsigned long twist, unsigned long root, unsigned long n, ZmodF_t* scratch) { // ========================================================================= // special cases for length <= 4 if (depth == 2) { // ---------------------------------------------------------------------- // Do the inner layer of two "blocks" of one butterfly each. unsigned long temp = n*FLINT_BITS - twist; ZmodF_forward_butterfly_2exp(x+3, x+2, scratch, temp - root, n); ZmodF_swap(x+2, x+3); ZmodF_forward_butterfly_2exp(x+1, x, scratch, temp, n); ZmodF_swap(x, x+1); // ---------------------------------------------------------------------- // Now do the outer layer of two butterflies. This is basically an // unrolled version of the length >= 8 case below. unsigned long amount = 2*twist; unsigned long bits = amount & (FLINT_BITS-1); unsigned long limbs = n - (amount >> FLINT_LG_BITS_PER_LIMB); if (bits) { // each butterfly needs a bitshift if (limbs != n) { ZmodF_sub_mul_Bexp(*scratch, x[2], x[0], limbs, n); ZmodF_add(x[0], x[0], x[2], n); ZmodF_short_div_2exp(x[2], *scratch, bits, n); ZmodF_sub_mul_Bexp(*scratch, x[3], x[1], limbs, n); ZmodF_add(x[1], x[1], x[3], n); ZmodF_short_div_2exp(x[3], *scratch, bits, n); } else { ZmodF_sub(*scratch, x[0], x[2], n); ZmodF_add(x[0], x[0], x[2], n); ZmodF_short_div_2exp(x[2], *scratch, bits, n); ZmodF_sub(*scratch, x[1], x[3], n); ZmodF_add(x[1], x[1], x[3], n); ZmodF_short_div_2exp(x[3], *scratch, bits, n); } } else { // no bitshifts required ZmodF_sub_mul_Bexp(*scratch, x[2], x[0], limbs, n); ZmodF_add(x[0], x[0], x[2], n); ZmodF_swap(x+2, scratch); ZmodF_sub_mul_Bexp(*scratch, x[3], x[1], limbs, n); ZmodF_add(x[1], x[1], x[3], n); ZmodF_swap(x+3, scratch); } return; } if (depth <= 1) { if (depth == 1) { ZmodF_forward_butterfly_2exp(x+1, x, scratch, twist, n); ZmodF_swap(x, x+1); } return; } unsigned long half = 1 << (depth - 1); // ========================================================================= // recurse into two halves _ZmodF_poly_IFFT_dual_recursive(x, depth-1, twist, root << 1, n, scratch); _ZmodF_poly_IFFT_dual_recursive(x + half, depth-1, twist + root, root << 1, n, scratch); // ========================================================================= // general case for length >= 8 // butterflies (a, b) -> (a + b, w*(a - b)), where w = 2^(-amount). ZmodF_t* y = x + half; unsigned long amount = twist << (depth - 1); unsigned long bits = amount & (FLINT_BITS-1); unsigned long limbs = n - (amount >> FLINT_LG_BITS_PER_LIMB); if (bits) { // each butterfly needs a bitshift if (limbs != n) { for (unsigned long i = 0; i < half; i++) { ZmodF_sub_mul_Bexp(*scratch, y[i], x[i], limbs, n); ZmodF_add(x[i], x[i], y[i], n); ZmodF_short_div_2exp(y[i], *scratch, bits, n); } } else { for (unsigned long i = 0; i < half; i++) { ZmodF_sub(*scratch, x[i], y[i], n); ZmodF_add(x[i], x[i], y[i], n); ZmodF_short_div_2exp(y[i], *scratch, bits, n); } } } else { // all butterflies are limbshifts only for (unsigned long i = 0; i < half; i++) { ZmodF_sub_mul_Bexp(*scratch, y[i], x[i], limbs, n); ZmodF_add(x[i], x[i], y[i], n); ZmodF_swap(y+i, scratch); } } } /**************************************************************************** Fourier Transform Routines ****************************************************************************/ void ZmodF_poly_FFT(ZmodF_poly_t poly, unsigned long length) { FLINT_ASSERT(length <= (1UL << poly->depth)); // check the right roots of unity are available FLINT_ASSERT((4 * poly->n * FLINT_BITS) % (1 << poly->depth) == 0); FLINT_ASSERT(poly->scratch_count >= 1); if (length != 0) { if (poly->length == 0) { // input is zero, so output is zero too for (unsigned long i = 0; i < length; i++) ZmodF_zero(poly->coeffs[i], poly->n); } else { if (poly->depth >= 1) _ZmodF_poly_FFT(poly->coeffs, poly->depth, 1, poly->length, length, 0, poly->n, poly->scratch); } } poly->length = length; } void ZmodF_poly_IFFT(ZmodF_poly_t poly) { // check the right roots of unity are available FLINT_ASSERT((4 * poly->n * FLINT_BITS) % (1 << poly->depth) == 0); FLINT_ASSERT(poly->scratch_count >= 1); if (poly->length && poly->depth) _ZmodF_poly_IFFT(poly->coeffs, poly->depth, 1, poly->length, poly->length, 0, 0, poly->n, poly->scratch); } // res may alias x or y // x and y may alias each other void ZmodF_poly_convolution(ZmodF_poly_t res, ZmodF_poly_t x, ZmodF_poly_t y) { FLINT_ASSERT(x->depth == y->depth); FLINT_ASSERT(x->depth == res->depth); FLINT_ASSERT(x->n == y->n); FLINT_ASSERT(x->n == res->n); unsigned long length = x->length + y->length - 1; unsigned long size = 1UL << res->depth; if (length > size) length = size; ZmodF_poly_FFT(x, length); if (x != y) // take care of aliasing ZmodF_poly_FFT(y, length); ZmodF_poly_pointwise_mul(res, x, y); ZmodF_poly_IFFT(res); ZmodF_poly_rescale(res); } // res may alias x or y // x and y may alias each other // only computes the coefficients in the range // [start, n) the rest are rubbish void ZmodF_poly_convolution_range(ZmodF_poly_t res, ZmodF_poly_t x, ZmodF_poly_t y, unsigned long start, unsigned long n) { FLINT_ASSERT(x->depth == y->depth); FLINT_ASSERT(x->depth == res->depth); FLINT_ASSERT(x->n == y->n); FLINT_ASSERT(x->n == res->n); unsigned long length = x->length + y->length - 1; unsigned long size = 1UL << res->depth; if (length > size) length = size; ZmodF_poly_FFT(x, length); if (x != y) // take care of aliasing ZmodF_poly_FFT(y, length); ZmodF_poly_pointwise_mul(res, x, y); ZmodF_poly_IFFT(res); ZmodF_poly_rescale_range(res, start, n); } /**************************************************************************** Negacyclic Fourier Transform Routines ****************************************************************************/ /* ignores length of poly */ void ZmodF_poly_negacyclic_FFT(ZmodF_poly_t poly) { // check the right roots of unity are available FLINT_ASSERT((2 * poly->n * FLINT_BITS) % (1 << poly->depth) == 0); FLINT_ASSERT(poly->scratch_count >= 1); unsigned long twist = (poly->n * FLINT_BITS) >> poly->depth; _ZmodF_poly_FFT_dual_recursive(poly->coeffs, poly->depth, twist, 2*twist, poly->n, poly->scratch); poly->length = 1 << poly->depth; } void ZmodF_poly_negacyclic_IFFT(ZmodF_poly_t poly) { // check the right roots of unity are available FLINT_ASSERT((2 * poly->n * FLINT_BITS) % (1 << poly->depth) == 0); FLINT_ASSERT(poly->scratch_count >= 1); unsigned long twist = (poly->n * FLINT_BITS) >> poly->depth; _ZmodF_poly_IFFT_dual_recursive(poly->coeffs, poly->depth, twist, 2*twist, poly->n, poly->scratch); poly->length = 1 << poly->depth; } void ZmodF_poly_negacyclic_convolution(ZmodF_poly_t res, ZmodF_poly_t x, ZmodF_poly_t y) { FLINT_ASSERT(x->depth == y->depth); FLINT_ASSERT(x->depth == res->depth); FLINT_ASSERT(x->n == y->n); FLINT_ASSERT(x->n == res->n); unsigned long size = 1UL << res->depth; ZmodF_poly_negacyclic_FFT(x); if (x != y) // take care of aliasing ZmodF_poly_negacyclic_FFT(y); ZmodF_poly_pointwise_mul(res, x, y); ZmodF_poly_negacyclic_IFFT(res); ZmodF_poly_rescale(res); res->length = size; } // end of file **************************************************************** flint-1.011/ZmodF.h0000644017361200017500000003056611025357254013725 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** ZmodF.h Copyright (C) 2007, David Harvey Routines for arithmetic on elements of Z/pZ where p = B^n + 1, B = 2^FLINT_BITS. These are currently used only in the ZmodF_poly module, which supplies the Schoenhage-Strassen FFT code. ******************************************************************************/ #ifndef FLINT_ZMODF_H #define FLINT_ZMODF_H #ifdef __cplusplus extern "C" { #endif #include #include #include "flint.h" /* Add the given *signed* limb to the buffer [x, x+count), much like mpn_add_1 and mpn_sub_1 (except it's always inplace). PRECONDITIONS: count >= 1 NOTE: The branch predictability of this function is optimised for the case that abs(limb) is relatively small and that the first limb of x is randomly distributed, which should be the normal usage in the FFT routines. */ static inline void signed_add_1(mp_limb_t* x, unsigned long count, mp_limb_signed_t limb) { FLINT_ASSERT(count >= 1); // If the high bit of x[0] doesn't change when we add "limb" to it, // then there's no possibility of overflow. mp_limb_t temp = x[0] + limb; if ((mp_limb_signed_t)(temp ^ x[0]) >= 0) // the likely case x[0] = temp; else { // the unlikely case; here we need to branch based on the sign of // the limb being added if (limb >= 0) mpn_add_1(x, x, count, limb); else mpn_sub_1(x, x, count, -limb); } } /* A ZmodF_t is stored as a *signed* value in two's complement format, using n+1 limbs. The value is not normalised into any particular range, so the top limb may pick up overflow bits. Of course the arithmetic functions in this module may implicitly reduce mod p whenever they like. More precisely, suppose that the first n limbs are x[0], ..., x[n-1] (unsigned) and the last limb is x[n] (signed). Then the value being represented is x[0] + x[1]*B + ... + x[n-1]*B^(n-1) - x[n] (mod p). */ typedef mp_limb_t* ZmodF_t; /* ============================================================================ Normalisations and simple data movement ============================================================================ */ static inline void ZmodF_swap(ZmodF_t* a, ZmodF_t* b) { ZmodF_t temp = *a; *a = *b; *b = temp; } /* Normalises a into the range [0, p). (Note that the top limb will be set if and only if a = -1 mod p.) */ void ZmodF_normalise(ZmodF_t a, unsigned long n); /* Adjusts a mod p so that the top limb is in the interval [0, 2]. This in general will be faster then ZmodF_normalise(); in particular the branching is much more predictable. */ static inline void ZmodF_fast_reduce(ZmodF_t a, unsigned long n) { mp_limb_t hi = a[n]; a[n] = 1; signed_add_1(a, n+1, 1-hi); } /* a := 0 */ static inline void ZmodF_zero(ZmodF_t a, unsigned long n) { long i = n; do a[i] = 0; while (--i >= 0); } /* b := a */ static inline void ZmodF_set(ZmodF_t b, ZmodF_t a, unsigned long n) { long i = n; do b[i] = a[i]; while (--i >= 0); } /* ============================================================================ Basic arithmetic ============================================================================ */ /* b := -a PRECONDITIONS: a and b may alias each other */ static inline void ZmodF_neg(ZmodF_t b, ZmodF_t a, unsigned long n) { b[n] = ~a[n] - 1; // -1 is to make up mod p for 2's complement negation long i = n-1; do b[i] = ~a[i]; while (--i >= 0); } /* res := a + b PRECONDITIONS: Any combination of aliasing among res, a, b is allowed. */ static inline void ZmodF_add(ZmodF_t res, ZmodF_t a, ZmodF_t b, unsigned long n) { mpn_add_n(res, a, b, n+1); } /* res := a - b PRECONDITIONS: Any combination of aliasing among res, a, b is allowed. */ static inline void ZmodF_sub(ZmodF_t res, ZmodF_t a, ZmodF_t b, unsigned long n) { mpn_sub_n(res, a, b, n+1); } /* b := 2^(-s) a PRECONDITIONS: 0 < s < FLINT_BITS b may alias a */ static inline void ZmodF_short_div_2exp(ZmodF_t b, ZmodF_t a, unsigned long s, unsigned long n) { FLINT_ASSERT(s > 0 && s < FLINT_BITS); // quick adjustment mod p to ensure a is non-negative ZmodF_fast_reduce(a, n); // do the rotation, and push the overflow back to the top limb mp_limb_t overflow = mpn_rshift(b, a, n+1, s); mpn_sub_1(b+n-1, b+n-1, 2, overflow); } /* b := B^s a PRECONDITIONS: 0 < s < n b must not alias a */ static inline void ZmodF_mul_Bexp(ZmodF_t b, ZmodF_t a, unsigned long s, unsigned long n) { FLINT_ASSERT(s > 0); FLINT_ASSERT(s < n); FLINT_ASSERT(b != a); // let a = ex*B^n + hi*B^(n-s) + lo, // where 0 <= lo < B^(n-s) and 0 <= hi < B^s and abs(ex) < B/2. // Then the output should be -ex*B^s + lo*B^s - hi (mod p). long i; // Put B^s - hi - 1 into b i = s-1; do b[i] = ~a[n-s+i]; while (--i >= 0); // Put lo*B^s into b i = n-s-1; do b[i+s] = a[i]; while (--i >= 0); // Put -B^n into b (to compensate mod p for -1 added in first loop) b[n] = (mp_limb_t)(-1L); // Add (-ex-1)*B^s to b signed_add_1(b+s, n-s+1, -a[n]-1); } /* c := a - 2^(-Bs) b PRECONDITIONS: 0 < s < n b must not alias c a may alias b or c */ static inline void ZmodF_div_Bexp_sub(ZmodF_t c, ZmodF_t a, ZmodF_t b, unsigned long s, unsigned long n) { FLINT_ASSERT(s > 0); FLINT_ASSERT(s < n); FLINT_ASSERT(b != c); // add low limbs of b to high limbs of a c[n] = a[n] + mpn_add_n(c+n-s, b, a+n-s, s); // subtract high limbs of b from low limbs of a mp_limb_t overflow = b[n] + mpn_sub_n(c, a, b+s, n-s); // propagate overflow signed_add_1(c+n-s, s+1, -overflow); } /* c := a + 2^(-Bs) b PRECONDITIONS: 0 < s < n b must not alias c a may alias b or c */ static inline void ZmodF_div_Bexp_add(ZmodF_t c, ZmodF_t a, ZmodF_t b, unsigned long s, unsigned long n) { FLINT_ASSERT(s > 0); FLINT_ASSERT(s < n); FLINT_ASSERT(b != c); // subtract low limbs of b from high limbs of a c[n] = a[n] - mpn_sub_n(c+n-s, a+n-s, b, s); // add high limbs of b to low limbs of a mp_limb_t overflow = b[n] + mpn_add_n(c, a, b+s, n-s); // propagate overflow signed_add_1(c+n-s, s+1, overflow); } /* c := B^s (a - b) PRECONDITIONS: c must not alias a or b 0 < s < n */ static inline void ZmodF_sub_mul_Bexp(ZmodF_t c, ZmodF_t a, ZmodF_t b, unsigned long s, unsigned long n) { FLINT_ASSERT(s > 0); FLINT_ASSERT(s < n); FLINT_ASSERT(c != a); FLINT_ASSERT(c != b); // get low limbs of a - b into high limbs of c c[n] = -mpn_sub_n(c+s, a, b, n-s); // get high limbs of b - a into low limbs of c mp_limb_t overflow = b[n] - a[n] - mpn_sub_n(c, b+n-s, a+n-s, s); // propagate overflow signed_add_1(c+s, n+1-s, overflow); } /* b := B^s (1 - B^(n/2)) a PRECONDITIONS: 0 <= s < 2n n must be odd b must not alias a */ void ZmodF_mul_pseudosqrt2_n_odd(ZmodF_t b, ZmodF_t a, unsigned long s, unsigned long n); /* b := B^s (1 - B^(n/2)) a PRECONDITIONS: 0 <= s < 2n n must be even b must not alias a */ void ZmodF_mul_pseudosqrt2_n_even(ZmodF_t b, ZmodF_t a, unsigned long s, unsigned long n); /* b := 2^s a PRECONDITIONS: 0 <= s < n*FLINT_BITS b may not alias a */ void ZmodF_mul_2exp(ZmodF_t b, ZmodF_t a, unsigned long s, unsigned long n); /* b := 2^(s/2) a PRECONDITIONS: 0 <= s < 2*n*FLINT_BITS */ void ZmodF_mul_sqrt2exp(ZmodF_t b, ZmodF_t a, unsigned long s, unsigned long n); /* c := 2^s (a - b) PRECONDITIONS: c must not alias a or b 0 <= s < n*FLINT_BITS */ void ZmodF_sub_mul_2exp(ZmodF_t c, ZmodF_t a, ZmodF_t b, unsigned long s, unsigned long n); /* ============================================================================ Butterflies ============================================================================ */ /* a := a + b b := B^s (a - b) z := destroyed PRECONDITIONS: a, b, z may not alias each other 0 < s < n NOTE: a, b, z may get permuted */ static inline void ZmodF_forward_butterfly_Bexp(ZmodF_t* a, ZmodF_t* b, ZmodF_t* z, unsigned long s, unsigned long n) { FLINT_ASSERT(s > 0); FLINT_ASSERT(s < n); FLINT_ASSERT(*a != *b); FLINT_ASSERT(*a != *z); FLINT_ASSERT(*z != *b); ZmodF_sub_mul_Bexp(*z, *a, *b, s, n); ZmodF_add(*a, *a, *b, n); ZmodF_swap(b, z); } /* a := a + b b := 2^s (a - b) z := destroyed PRECONDITIONS: a, b, z may not alias each other 0 <= s < n*FLINT_BITS NOTE: a, b, z may get permuted */ void ZmodF_forward_butterfly_2exp(ZmodF_t* a, ZmodF_t* b, ZmodF_t* z, unsigned long s, unsigned long n); /* a := a + b b := 2^(s/2) (a - b) z := destroyed PRECONDITIONS: a, b, z may not alias each other 0 <= s < 4*FLINT_BITS NOTE: a, b, z may get permuted */ void ZmodF_forward_butterfly_sqrt2exp(ZmodF_t* a, ZmodF_t* b, ZmodF_t* z, unsigned long s, unsigned long n); /* a := a + B^(-s) b b := a - B^(-s) b z := destroyed PRECONDITIONS: a, b, z may not alias each other 0 < s < n NOTE: a, b, z may get permuted */ static inline void ZmodF_inverse_butterfly_Bexp(ZmodF_t* a, ZmodF_t* b, ZmodF_t* z, unsigned long s, unsigned long n) { FLINT_ASSERT(s > 0); FLINT_ASSERT(s < n); FLINT_ASSERT(*a != *b); FLINT_ASSERT(*a != *z); FLINT_ASSERT(*z != *b); ZmodF_div_Bexp_sub(*z, *a, *b, s, n); ZmodF_div_Bexp_add(*a, *a, *b, s, n); ZmodF_swap(z, b); } /* a := a + 2^(-s) b b := a - 2^(-s) b z := destroyed PRECONDITIONS: a, b, z may not alias each other 0 <= s < n*FLINT_BITS NOTE: a, b, z may get permuted */ void ZmodF_inverse_butterfly_2exp(ZmodF_t* a, ZmodF_t* b, ZmodF_t* z, unsigned long s, unsigned long n); /* a := a + 2^(-s/2) b b := a - 2^(-s/2) b z := destroyed PRECONDITIONS: a, b, z may not alias each other 0 <= s < 2*n*FLINT_BITS NOTE: a, b, z may get permuted */ void ZmodF_inverse_butterfly_sqrt2exp(ZmodF_t* a, ZmodF_t* b, ZmodF_t* z, unsigned long s, unsigned long n); /* a := a + b b := a - b z := destroyed PRECONDITIONS: a, b, z may not alias each other NOTE: a, b, z may get permuted */ static inline void ZmodF_simple_butterfly(ZmodF_t* a, ZmodF_t* b, ZmodF_t* z, unsigned long n) { FLINT_ASSERT(*a != *b); FLINT_ASSERT(*a != *z); FLINT_ASSERT(*z != *b); ZmodF_add(*z, *a, *b, n); ZmodF_sub(*b, *a, *b, n); ZmodF_swap(z, a); } /* ============================================================================ Miscellaneous ============================================================================ */ /* b := a / 3 PRECONDITIONS: n < 2^(FLINT_BITS/2) NOTE: a and b may alias each other a may get modified mod p */ void ZmodF_divby3(ZmodF_t b, ZmodF_t a, unsigned long n); #ifdef __cplusplus } #endif #endif // end of file **************************************************************** flint-1.011/bernoulli.c0000644017361200017500000001552511025357254014672 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** bernoulli.c: Finds Bernoulli numbers B_{2k} Based on the implementation in SAGE written by David Harvey Uses mpz_polys for the calculations. See bernoulli_fmpz.c and bernoulli_zmod.c for use of other polys. Copyright (C) 2007, David Howden *****************************************************************************/ #include #include #include #include #include "flint.h" #include "mpz_poly.h" #include "long_extras.h" #define TRUE 1; #define FALSE 0; /* Computes the bernoulli numbers B_0, B_2, ..., B_{p-3} for prime p Requires that res be allocated for (p-1)/2 unsigned longs which will hold the result. If returns 0, then the factoring of p has failed, otherwise will always return 1. */ int bernoulli_mod_p_mpz(unsigned long *res, unsigned long p) { FLINT_ASSERT(p > 2); FLINT_ASSERT(z_isprime(p) == 1); unsigned long g, g_inv, g_sqr, g_sqr_inv; double p_inv = z_precompute_inverse(p); g = z_primitive_root(p); if(!g) { return FALSE; } g_inv = z_invert(g, p); g_sqr = z_mulmod_precomp(g, g, p, p_inv); g_sqr_inv = z_mulmod_precomp(g_inv, g_inv, p, p_inv); unsigned long poly_size = (p-1)/2; int is_odd = poly_size % 2; unsigned long g_power, g_power_inv; g_power = g_inv; g_power_inv = 1; // constant is (g-1)/2 mod p unsigned long constant; if(g % 2) { constant = (g-1)/2; } else { constant = (g+p-1)/2; } // fudge holds g^{i^2}, fudge_inv holds g^{-i^2} unsigned long fudge, fudge_inv; fudge = fudge_inv = 1; // compute the polynomials F(X) and G(X) mpz_poly_t F, G; mpz_poly_init2(F, poly_size); mpz_poly_init2(G, poly_size); unsigned long i, temp, h; for(i = 0; i < poly_size; i++) { // compute h(g^i)/g^i (h(x) is as in latex notes) temp = g * g_power; h = z_mulmod_precomp(p + constant - (temp / p), g_power_inv, p, p_inv); g_power = z_mod_precomp(temp, p, p_inv); g_power_inv = z_mulmod_precomp(g_power_inv, g_inv, p, p_inv); // store coefficient g^{i^2} h(g^i)/g^i mpz_poly_set_coeff_ui(G, i, z_mulmod_precomp(h, fudge, p, p_inv)); mpz_poly_set_coeff_ui(F, i, fudge_inv); // update fudge and fudge_inv fudge = z_mulmod_precomp(z_mulmod_precomp(fudge, g_power, p, p_inv), z_mulmod_precomp(g_power, g, p, p_inv), p, p_inv); fudge_inv = z_mulmod_precomp(z_mulmod_precomp(fudge_inv, g_power_inv, p, p_inv), z_mulmod_precomp(g_power_inv, g, p, p_inv), p, p_inv); } mpz_poly_set_coeff_ui(F, 0, 0); // step 2: multiply the polynomials... mpz_poly_t product; mpz_poly_init(product); mpz_poly_mul(product, G, F); // step 3: assemble the result... unsigned long g_sqr_power, value; g_sqr_power = g_sqr; fudge = g; res[0] = 1; mpz_t value_coeff; mpz_init(value_coeff); unsigned long value_coeff_ui; for(i = 1; i < poly_size; i++) { mpz_poly_get_coeff(value_coeff, product, i + poly_size); value = mpz_fdiv_ui(value_coeff, p); value = z_mod_precomp(mpz_poly_get_coeff_ui(product, i + poly_size), p, p_inv); mpz_poly_get_coeff(value_coeff, product, i); if(is_odd) { value = z_mod_precomp(mpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + p - value, p, p_inv); } else { value = z_mod_precomp(mpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + value, p, p_inv); } value = z_mulmod_precomp(z_mulmod_precomp(z_mulmod_precomp(4, i, p, p_inv), fudge, p, p_inv), value, p, p_inv); value = z_mulmod_precomp(value, z_invert(p+1-g_sqr_power, p), p, p_inv); res[i] = value; g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); fudge = z_mulmod_precomp(fudge, g_sqr_power, p, p_inv); g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); } mpz_clear(value_coeff); mpz_poly_clear(F); mpz_poly_clear(G); mpz_poly_clear(product); return TRUE; } /* Verifies that the ouput of bernoulli_mod_p above is correct. Takes the result from bernoulli_mod_p (res - an array of (p-1)/2 unsigned longs), and the prime p. Returns 0 if res is incorrect, 1 if res is correct. */ int verify_bernoulli_mod_p(unsigned long *res, unsigned long p) { unsigned long N, i, product, sum, value, element; double p_inv; N = (p-1)/2; product = 1; sum = 0; p_inv = z_precompute_inverse(p); for(i = 0; i < N; i++) { element = res[i]; value = z_mulmod_precomp(z_mulmod_precomp(product, 2*i+1, p, p_inv), element, p, p_inv); sum = z_mod_precomp(sum + value, p, p_inv); product = z_mulmod_precomp(product, 4, p, p_inv); } if(z_mod_precomp(sum + 2, p, p_inv)) { return FALSE; } return TRUE; } /* Test function for bernoulli_mod_p Calculates bernoulli_mod_p for the prime p and verifies the result. Returs 0 if incorrect, and 1 if correct. */ int test_bernoulli_mod_p(unsigned long p) { unsigned long *res = (unsigned long*) flint_stack_alloc((p-1)/2); if(!bernoulli_mod_p_mpz(res, p)) { printf("Could not factor p = %d\n", p); flint_stack_release(); return FALSE; } int result = verify_bernoulli_mod_p(res, p); flint_stack_release(); return result; } int main (int argc, char const *argv[]) { unsigned long p = 2; unsigned long tests = 1000; unsigned long fail = 0; for(unsigned long i = 0; i < tests; i++) { p = z_nextprime(p); if(!test_bernoulli_mod_p(p)) { printf("Fails on p = %d\n", p); fail++; } else { printf("Works on p = %d\n", p); } } printf("\nResults: %d OK, %d FAILED.\n", tests - fail, fail); return 0; } flint-1.011/test-support.h0000644017361200017500000000431711025357254015372 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** test-support.h: Support code for test modules Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #ifndef FLINT_TEST_SUPPORT_H #define FLINT_TEST_SUPPORT_H #ifdef __cplusplus extern "C" { #endif #include // set up and clean up the test module void test_support_init(); void test_support_cleanup(); // a single GMP random state object that test modules may use extern gmp_randstate_t randstate; // returns random unsigned long in [0, max) unsigned long random_ulong(unsigned long max); unsigned long random_ulong2(unsigned long max); // returns random limb mp_limb_t random_limb(); // writes "limbs" random limbs to dest, using mpz_rrandomb void random_limbs(mp_limb_t* dest, unsigned long limbs); // writes "limbs" random limbs to dest, uniform distribution void urandom_limbs(mp_limb_t* dest, unsigned long limbs); // converts mpz to mpn, writes "limbs" limbs (zero-padded), ignores sign of src void mpz_to_mpn(mp_limb_t* dest, unsigned long limbs, mpz_t src); // convert mpn to mpz, reads exactly "limbs" limbs void mpn_to_mpz(mpz_t dest, mp_limb_t* src, unsigned long limbs); #define TEST_MPZ_COUNT 5 #ifdef __cplusplus } #endif #endif // *************** end of file flint-1.011/pari-profiles/0000755017361200017500000000000011025357255015300 5ustar tabbotttabbottflint-1.011/pari-profiles/pari-profile.p0000644017361200017500000001136111025357252020051 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /***************************************************************** pari-profile.p - code for timing PARI polynomial multiplication in Z[x] over various lengths and bitsizes. Based on Magma profiling code by David Harvey. Copyright (C) 2007, Tomasz Lechowski Some corrections (C) 2007, Bill Allombert *****************************************************************/ target_name="PolyMul"; target_description="PARI polynomial multiplication in Z[x] over various lengths and bitsizes, NON-NEGATIVE coefficients only"; Max=16000000; ratio=1.2; \p4; DURATION_THRESHOLD=200000; DURATION_TARGET=300000; sampler(Length, bits, count)={ countmod=4; if(count>1000,countmod=100); if(count>100,countmod=10); gettime(); for(i=1,count, if(((i-1)%countmod)==0, a=vector(Length); b=vector(Length); for(j=1,Length, a[j]=random(2^(bits)); b[j]=random(2^(bits))); a=Pol(a); b=Pol(b)); c=a*b); time1=gettime(); for(i=1,count, if(((i-1)%countmod)==0, a=vector(Length); b=vector(Length); for(j=1,Length, a[j]=random(2^(bits)); b[j]=random(2^(bits))); a=Pol(a); b=Pol(b)); ); time2=gettime(); return((time1-time2)/1000); }; format_sci(x)={ L=floor(log(x)/log(10)); x=x/(10^L); s=Str(x); if(L<0, s=concat(s,"e-"), s=concat(s,"e+") ); s=concat(s,Str(floor(abs(L / 10)))); s=concat(s,Str((abs(L))%10)); return(s); }; prof2d_sample(x, y)={ good_count=0; num_trials=4; last_time=sampler(x,y,num_trials)*1000000.0; max_time=0; min_time=0; true=1; while(true, per_trial=last_time/num_trials; if(last_time>DURATION_THRESHOLD, if(good_count>0, max_time=max(max_time,per_trial); min_time=min(min_time, per_trial), max_time=per_trial; min_time=per_trial); good_count=good_count+1; if(good_count==5, print(Str(x," ", y," ",format_sci(min_time)," ",format_sci(max_time))); write(Pariprof,Str(x," ", y," ",format_sci(min_time)," ",format_sci(max_time))); return; ) ); if(last_time<0.0001, last_time=0.0001 ); adjust_ratio= 1.0*DURATION_TARGET/last_time; if(adjust_ratio>1.25, adjust_ratio=1.25 ); if(adjust_ratio<0.75, adjust_ratio=0.75 ); num_trials=ceil(adjust_ratio*num_trials); if(num_trials==0, num_trials=1 ); last_time=sampler(x,y,num_trials)*1000000.0; ) }; print_header()={ print("FLINT profile output"); write(Pariprof,"FLINT profile output"); print(""); write(Pariprof," "); print("TIMESTAMP: "); write(Pariprof,"TIMESTAMP:"); print("MACHINE: "); write(Pariprof,"MACHINE:"); print(""); write(Pariprof," "); print("MODULE:PARI"); write(Pariprof,"MODULE:PARI"); print("TARGET:", target_name); write(Pariprof, "TARGET:", Str(target_name)); print(""); write(Pariprof," "); print("DESCRIPTION:"); write(Pariprof,"DESCRIPTION:"); print(target_description); write(Pariprof, Str(target_description)); print(""); write(Pariprof," "); print("============================================== begin data"); write(Pariprof,"============================================== begin data"); }; driver()={ max_iter=ceil(log(Max)/log(ratio)); last_length=0; for(i=0,max_iter, Length=floor((ratio)^i); if(Length!=last_length, last_length=Length; last_bits=0; for(j=0,max_iter, bits=floor(ratio^j); if(bits!=last_bits, last_bits=bits; if(Length*bits #include #include #include "flint.h" #include "long_extras.h" #include "test-support.h" #include "memory-manager.h" #define DEBUG 0 // prints debug information #define DEBUG2 1 #define RUN_TEST(targetfunc) \ printf("Testing " #targetfunc "()... "); \ fflush(stdout); \ success = test_##targetfunc(); \ all_success = all_success && success; \ printf(success ? "ok\n" : "FAIL!\n"); /*int test_z_mulmod32_precomp() { uint32_t ninv; unsigned long n; unsigned long a, b, res1, res2, bits; int result = 1; mpz_t mpz_a, mpz_b, mpz_n, mpz_res; mpz_init(mpz_a); mpz_init(mpz_b); mpz_init(mpz_n); mpz_init(mpz_res); for (unsigned long count = 0; (count < 10000) && (result == 1); count++) { bits = z_randint(32)+1; n = random_ulong2((1UL< 65535) || (n2 == 1)); n = n1*n2; #if DEBUG printf("n1 = %ld, n2 = %ld\n", n1, n2); #endif result = !z_issquarefree(n); } for (unsigned long count = 0; (count < 500000) && (result == 1); count++) { n = 1; n1 = 1; n2 = random_ulong(999998)+2; do { n = n*n1; for (unsigned long i = 0; i < random_ulong(3)+1; i++) { n1 = z_nextprime(n1); } } while (n*n1 < n2); #if DEBUG printf("%ld\n", n); #endif result = z_issquarefree(n); } return result; } int test_z_factor_trial() { unsigned long n, prod, orig_n; factor_t factors; int i; int result = 1; for (unsigned long count = 0; (count < 100000) && (result == 1); count++) { orig_n = random_ulong(1000000); for (unsigned long j = 0; j < 10; j++) n = z_factor_trial(&factors, orig_n); prod = n; for (i = 0; i < factors.num; i++) { prod *= z_pow(factors.p[i], factors.exp[i]); } result = (prod == orig_n); #if DEBUG if (!result) { printf("n = %ld: [", orig_n); for (i = 0; i < factors.num - 1; i++) { printf("%ld, %ld; ", factors.p[i], factors.exp[i]); } printf("%ld, %ld", factors.p[i], factors.exp[i]); if (n != 1) printf("; %ld, 1]\n", n); else printf("]\n"); } #endif } return result; } int test_z_factor_SQUFOF() { unsigned long n, factor, bits; int result = 1; for (unsigned long count = 0; (count < 1000) && (result == 1); count++) { do { bits = z_randint(FLINT_BITS - 1)+1; n = random_ulong((1UL< #include #include #include #include "flint.h" #include "memory-manager.h" #include "test-support.h" #include "zmod_poly.h" #include "long_extras.h" #define VARY_BITS 0 #define SPARSE 0 #define TESTFILE 0 // Set this to test polynomial reading and writing to a file in the current dir #define DEBUG 0 // prints debug information #define DEBUG2 1 /* Generate a random integer in the range [0, limit) If limit == 0, return a random limb */ unsigned long randint(unsigned long limit) { #if FLINT_BITS == 32 static uint64_t randval = 4035456057U; randval = ((uint64_t)randval*(uint64_t)1025416097U+(uint64_t)286824430U)%(uint64_t)4294967311U; if (limit == 0L) return (unsigned long) randval; return (unsigned long)randval%limit; #else static unsigned long randval = 4035456057U; static unsigned long randval2 = 6748392731U; randval = ((unsigned long)randval*(unsigned long)1025416097U+(unsigned long)286824428U)%(unsigned long)4294967311U; randval2 = ((unsigned long)randval2*(unsigned long)1647637699U+(unsigned long)286824428U)%(unsigned long)4294967357U; if (limit == 0L) return (unsigned long) randval; return (unsigned long)(randval+(randval2<<32))%limit; #endif } /* Generate a random integer with up to the given number of bits [0, FLINT_BITS] */ unsigned long randbits(unsigned long bits) { return randint(l_shift(1L, bits)); } /* Return a random prime of (upto) the given number of bits [2, FLINT_BITS] */ unsigned long randprime(unsigned long bits) { unsigned long limit, rand; if (bits < 2) { printf("FLINT Exception: attempt to generate prime < 2!\n"); abort(); } if (bits == FLINT_BITS) { do { rand = randbits(bits); #if FLINT_BITS == 32 } while (rand > 4294967290UL); #else } while (rand > 18446744073709551556UL); #endif rand = z_nextprime(rand); } else { do { rand = randbits(bits); rand = z_nextprime(rand); } while ((rand >> bits) > 0L); } return rand; } /* Generate a random zmod polynomial with the modulus n of the given length with normalised coefficients */ void randpoly(zmod_poly_t poly, long length, unsigned long n) { if (length == 0) { zmod_poly_fit_length(poly, 1); poly->length = 0; return; } zmod_poly_fit_length(poly, length); for (unsigned long i = 0; i < length; i++) poly->coeffs[i] = randint(n); poly->length = length; __zmod_poly_normalise(poly); } #define RUN_TEST(targetfunc) \ printf("Testing " #targetfunc "()... "); \ fflush(stdout); \ success = test_##targetfunc(); \ all_success = all_success && success; \ printf(success ? "ok\n" : "FAIL!\n"); int test_zmod_poly_reverse() { zmod_poly_t poly, poly2; int result = 1; unsigned long bits, length, length2; for (unsigned long count1 = 0; (count1 < 5000) && (result == 1) ; count1++) { bits = randint(FLINT_BITS-1)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(poly, modulus); zmod_poly_init(poly2, modulus); length = randint(100); length2 = length + randint(200); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld\n", length, length2, bits); #endif randpoly(poly, length, modulus); zmod_poly_reverse(poly2, poly, length2); zmod_poly_reverse(poly2, poly2, length2); result = zmod_poly_equal(poly2, poly); zmod_poly_clear(poly); zmod_poly_clear(poly2); } for (unsigned long count1 = 0; (count1 < 5000) && (result == 1) ; count1++) { bits = randint(FLINT_BITS-1)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(poly, modulus); zmod_poly_init(poly2, modulus); length = randint(100); length2 = length + randint(200); #if DEBUG printf("length = %ld, length2 = %ld, bits = %ld\n", length, length2, bits); #endif randpoly(poly, length, modulus); zmod_poly_set(poly2, poly); zmod_poly_reverse(poly, poly, length2); zmod_poly_reverse(poly, poly, length2); result = zmod_poly_equal(poly2, poly); zmod_poly_clear(poly); zmod_poly_clear(poly2); } return result; } int test_zmod_poly_addsub() { int result = 1; zmod_poly_t pol1, pol2, res; unsigned long bits; for (unsigned long count1 = 0; (count1 < 100) && (result == 1); count1++) { bits = randint(FLINT_BITS-1)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res, modulus); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { unsigned long length1 = randint(100); unsigned long length2 = randint(100); randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_add(res, pol1, pol2); zmod_poly_sub(res, res, pol2); result &= zmod_poly_equal(res, pol1); #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(res); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res); } return result; } int test_zmod_poly_neg() { int result = 1; zmod_poly_t pol1, pol2, res1, res2; unsigned long bits; for (unsigned long count1 = 0; (count1 < 100) && (result == 1); count1++) { bits = randint(FLINT_BITS-1)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(res2, modulus); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { unsigned long length1 = randint(100); unsigned long length2 = randint(100); randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_sub(res1, pol1, pol2); zmod_poly_neg(res2, pol2); zmod_poly_add(res2, res2, pol1); result &= zmod_poly_equal(res1, res2); #if DEBUG if (!result) { zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(res2); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_clear(res2); } return result; } int test_zmod_poly_shift() { int result = 1; zmod_poly_t pol1, res; unsigned long bits; for (unsigned long count1 = 0; (count1 < 100) && (result == 1); count1++) { bits = randint(FLINT_BITS-1)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(res, modulus); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { unsigned long length1 = randint(100); unsigned long shift = randint(100); randpoly(pol1, length1, modulus); zmod_poly_left_shift(res, pol1, shift); zmod_poly_right_shift(res, res, shift); result &= zmod_poly_equal(res, pol1); #if DEBUG if (!result) { zmod_poly_print(res); printf("\n\n"); zmod_poly_print(pol1); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(res); } return result; } int test_zmod_poly_swap() { int result = 1; zmod_poly_t pol1, pol2, res1, res2; unsigned long bits; for (unsigned long count1 = 0; (count1 < 100) && (result == 1); count1++) { bits = randint(FLINT_BITS-1)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(res2, modulus); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { unsigned long length1 = randint(100); unsigned long length2 = randint(100); unsigned long shift = randint(100); randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_sub(res1, pol1, pol2); zmod_poly_swap(pol1, pol2); zmod_poly_sub(res2, pol2, pol1); result &= zmod_poly_equal(res1, res2); #if DEBUG if (!result) { zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(res2); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_clear(res2); } return result; } int test_zmod_poly_setequal() { int result = 1; zmod_poly_t pol1, res; unsigned long bits; for (unsigned long count1 = 0; (count1 < 100) && (result == 1); count1++) { bits = randint(FLINT_BITS-1)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(res, modulus); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { unsigned long length1 = randint(100); randpoly(pol1, length1, modulus); zmod_poly_set(res, pol1); result &= zmod_poly_equal(res, pol1); #if DEBUG if (!result) { zmod_poly_print(res); printf("\n\n"); zmod_poly_print(pol1); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(res); } return result; } int test_zmod_poly_getset_coeff() { int result = 1; zmod_poly_t pol1, pol2; unsigned long bits; for (unsigned long count1 = 0; (count1 < 100) && (result == 1); count1++) { bits = randint(FLINT_BITS-1)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { unsigned long length1 = randint(100); unsigned long num = randint(200); unsigned long coeff = randint(modulus); randpoly(pol1, length1, modulus); zmod_poly_set(pol2, pol1); zmod_poly_set_coeff_ui(pol1, num, coeff); result &= (coeff == zmod_poly_get_coeff_ui(pol1, num)); if (num + 1 > length1) { zmod_poly_set_coeff_ui(pol1, num, 0); result &= zmod_poly_equal(pol1, pol2); } #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); } return result; } int test_zmod_poly_mul_classicalKS() { int result = 1; zmod_poly_t pol1, pol2, res1, res2; unsigned long bits; for (unsigned long count1 = 0; (count1 < 1) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(res2, modulus); for (unsigned long count2 = 0; (count2 < 1) && (result == 1); count2++) { unsigned long length1 = randint(400); unsigned long length2 = randint(400); #if DEBUG printf("bits = %ld, length1 = %ld, length2 = %ld, modulus = %ld\n", bits, length1, length2, modulus); #endif randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_mul_classical(res1, pol1, pol2); for (unsigned long i = 0; i < 10; i++) zmod_poly_mul_KS(res2, pol1, pol2, 0); result &= zmod_poly_equal(res1, res2); #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(pol2); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(res2); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_clear(res2); } return result; } int test_zmod_poly_sqr_classicalKS() { int result = 1; zmod_poly_t pol1, res1, res2; unsigned long bits; for (unsigned long count1 = 0; (count1 < 50) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(res2, modulus); for (unsigned long count2 = 0; (count2 < 50) && (result == 1); count2++) { unsigned long length1 = randint(400); #if DEBUG printf("bits = %ld, length1 = %ld, length2 = %ld, modulus = %ld\n", bits, length1, length2, modulus); #endif randpoly(pol1, length1, modulus); zmod_poly_sqr_classical(res1, pol1); zmod_poly_mul_KS(res2, pol1, pol1, 0); result &= zmod_poly_equal(res1, res2); #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(res2); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(res1); zmod_poly_clear(res2); } return result; } int test_zmod_poly_mul_classical_trunc() { int result = 1; zmod_poly_t pol1, pol2, res1, res2; unsigned long bits; for (unsigned long count1 = 0; (count1 < 50) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(res2, modulus); for (unsigned long count2 = 0; (count2 < 50) && (result == 1); count2++) { unsigned long length1 = randint(400); unsigned long length2 = randint(400); unsigned long trunc; if (length1 + length2 > 1) trunc = randint(2*(length1 + length2 - 1)); else trunc = 0; #if DEBUG printf("bits = %ld, length1 = %ld, length2 = %ld, modulus = %ld\n", bits, length1, length2, modulus); #endif randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_mul_classical(res1, pol1, pol2); zmod_poly_truncate(res1, trunc); zmod_poly_mul_classical_trunc(res2, pol1, pol2, trunc); result &= zmod_poly_equal(res1, res2); #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(pol2); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(res2); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_clear(res2); } return result; } int test_zmod_poly_mul_KS_trunc() { int result = 1; zmod_poly_t pol1, pol2, res1, res2; unsigned long bits; for (unsigned long count1 = 0; (count1 < 100) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(res2, modulus); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { unsigned long length1 = randint(400); unsigned long length2 = randint(400); unsigned long trunc; if (length1 + length2 > 1) trunc = randint(2*(length1 + length2 - 1)); else trunc = 0; #if DEBUG printf("bits = %ld, length1 = %ld, length2 = %ld, modulus = %ld, trunc = %ld\n", bits, length1, length2, modulus, trunc); #endif randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_mul_KS(res1, pol1, pol2, 0); zmod_poly_truncate(res1, trunc); zmod_poly_mul_KS_trunc(res2, pol1, pol2, 0, trunc); result &= zmod_poly_equal(res1, res2); #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(pol2); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(res2); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_clear(res2); } return result; } int test_zmod_poly_mul_KS_trunc_precomp() { int result = 1; zmod_poly_t pol1, pol2, res1, res2; unsigned long bits; for (unsigned long count1 = 0; (count1 < 30) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(res2, modulus); for (unsigned long count2 = 0; (count2 < 30) && (result == 1); count2++) { unsigned long length1 = randint(2000)+2000; unsigned long length2 = randint(2000)+2000; unsigned long trunc; trunc = randint(length1 + length2 - 2000)+2000; #if DEBUG printf("bits = %ld, length1 = %ld, length2 = %ld, modulus = %ld, trunc = %ld\n", bits, length1, length2, modulus, trunc); #endif randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_precomp_t pre; zmod_poly_mul_trunc_n_precomp_init(pre, pol2, 0, trunc); zmod_poly_mul_trunc_n_precomp(res1, pol1, pre, trunc); if (pol1->length > pol2->length) zmod_poly_mul_KS_trunc(res2, pol1, pol2, 0, trunc); else zmod_poly_mul_KS_trunc(res2, pol2, pol1, 0, trunc); zmod_poly_precomp_clear(pre); result = 1;//&= zmod_poly_equal(res1, res2); #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(pol2); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(res2); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_clear(res2); } return result; } int test_zmod_poly_mul_KS_precomp() { int result = 1; zmod_poly_t pol1, pol2, res1, res2; unsigned long bits; for (unsigned long count1 = 0; (count1 < 30) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(res2, modulus); for (unsigned long count2 = 0; (count2 < 30) && (result == 1); count2++) { unsigned long length1 = randint(2000)+2000; unsigned long length2 = randint(2000)+2000; #if DEBUG printf("bits = %ld, length1 = %ld, length2 = %ld, modulus = %ld\n", bits, length1, length2, modulus, trunc); #endif randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_precomp_t pre; zmod_poly_mul_precomp_init(pre, pol2, 0, length1); _zmod_poly_mul_KS_precomp(res1, pol1, pre, 0); if (pol1->length > pol2->length) zmod_poly_mul_KS(res2, pol1, pol2, 0); else zmod_poly_mul_KS(res2, pol2, pol1, 0); zmod_poly_precomp_clear(pre); result &= zmod_poly_equal(res1, res2); #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(pol2); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(res2); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_clear(res2); } return result; } #if USE_MIDDLE_PRODUCT int test_zmod_poly_mul_KS_middle() { int result = 1; zmod_poly_t pol1, pol2, res1, res2; unsigned long bits; for (unsigned long count1 = 0; (count1 < 30) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(res2, modulus); for (unsigned long count2 = 0; (count2 < 30) && (result == 1); count2++) { unsigned long length1 = randint(1000)+1000; unsigned long length2 = (length1+1)/2; unsigned long trunc = length1; #if DEBUG printf("bits = %ld, length1 = %ld, length2 = %ld, modulus = %ld, trunc = %ld\n", bits, length1, length2, modulus, trunc); #endif randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_mul_KS_trunc(res1, pol1, pol2, 0, trunc); for (unsigned long i = 0; i < (trunc-1)/2; i++) res1->coeffs[i] = 0L; zmod_poly_mul_KS_middle(res2, pol1, pol2, 0, trunc); result &= zmod_poly_equal(res1, res2); #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(pol2); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(res2); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_clear(res2); } return result; } #endif int test_zmod_poly_mul_classical_trunc_left() { int result = 1; zmod_poly_t pol1, pol2, res1, res2; unsigned long bits; for (unsigned long count1 = 0; (count1 < 50) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randbits(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(res2, modulus); for (unsigned long count2 = 0; (count2 < 50) && (result == 1); count2++) { unsigned long length1 = randint(400); unsigned long length2 = randint(400); unsigned long trunc; if (length1 + length2 > 1) trunc = randint(2*(length1 + length2 - 1)); else trunc = 0; #if DEBUG printf("bits = %ld, length1 = %ld, length2 = %ld, modulus = %ld, trunc = %ld\n", bits, length1, length2, modulus, trunc); #endif randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_mul_classical(res1, pol1, pol2); zmod_poly_mul_classical_trunc_left(res2, pol1, pol2, trunc); for (unsigned long i = trunc; i < res1->length; i++) if (res1->coeffs[i] != res2->coeffs[i]) result = 0; #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(pol2); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(res2); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_clear(res2); } return result; } int test_zmod_poly_scalar_mul() { int result = 1; zmod_poly_t pol1, res1; unsigned long bits; for (unsigned long count1 = 0; (count1 < 1000) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randprime(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(res1, modulus); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { unsigned long length1 = randint(100); unsigned long scalar = randint(modulus-1) + 1; unsigned long scalar_inv = z_invert(scalar, modulus); #if DEBUG printf("length1 = %ld, bits = %ld, modulus = %ld, scalar = %ld, scalar_inv = %ld\n", length1, bits, modulus, scalar, scalar_inv); #endif randpoly(pol1, length1, modulus); zmod_poly_scalar_mul(res1, pol1, scalar); zmod_poly_scalar_mul(res1, res1, scalar_inv); result &= zmod_poly_equal(res1, pol1); #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(res1); } return result; } int test_zmod_poly_divrem_classical() { int result = 1; zmod_poly_t pol1, pol2, res1, Q, R; unsigned long bits; for (unsigned long count1 = 0; (count1 < 400) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randprime(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(Q, modulus); zmod_poly_init(R, modulus); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { unsigned long length1 = randint(100); unsigned long length2 = randint(100); #if DEBUG printf("length1 = %ld, length2 = %ld, bits = %ld, modulus = %ld\n", length1, length2, bits, modulus); #endif randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_mul(res1, pol1, pol2); if (pol2->length) { zmod_poly_divrem_classical(Q, R, res1, pol2); result &= zmod_poly_equal(Q, pol1); } #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(pol2); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(Q); printf("\n\n"); zmod_poly_print(R); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_clear(Q); zmod_poly_clear(R); } return result; } int test_zmod_poly_div_classical() { int result = 1; zmod_poly_t pol1, pol2, res1, Q; unsigned long bits; for (unsigned long count1 = 0; (count1 < 400) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randprime(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(Q, modulus); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { unsigned long length1 = randint(100); unsigned long length2 = randint(100); #if DEBUG printf("length1 = %ld, length2 = %ld, bits = %ld, modulus = %ld\n", length1, length2, bits, modulus); #endif randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_mul(res1, pol1, pol2); if (pol2->length) { zmod_poly_div_classical(Q, res1, pol2); result &= zmod_poly_equal(Q, pol1); } #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(pol2); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(Q); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_clear(Q); } return result; } int test_zmod_poly_divrem_divconquer() { int result = 1; zmod_poly_t pol1, pol2, res1, Q, R; unsigned long bits; for (unsigned long count1 = 0; (count1 < 100) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randprime(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(Q, modulus); zmod_poly_init(R, modulus); for (unsigned long count2 = 0; (count2 < 30) && (result == 1); count2++) { unsigned long length1 = randint(500); unsigned long length2 = randint(500); #if DEBUG printf("length1 = %ld, length2 = %ld, bits = %ld, modulus = %ld\n", length1, length2, bits, modulus); #endif randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_mul(res1, pol1, pol2); if (pol2->length) { zmod_poly_divrem_divconquer(Q, R, res1, pol2); result &= zmod_poly_equal(Q, pol1); } #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(pol2); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(Q); printf("\n\n"); zmod_poly_print(R); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_clear(Q); zmod_poly_clear(R); } return result; } int test_zmod_poly_div_divconquer() { int result = 1; zmod_poly_t pol1, pol2, res1, Q; unsigned long bits; for (unsigned long count1 = 0; (count1 < 400) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randprime(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(Q, modulus); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { unsigned long length1 = randint(100); unsigned long length2 = randint(100); #if DEBUG printf("length1 = %ld, length2 = %ld, bits = %ld, modulus = %ld\n", length1, length2, bits, modulus); #endif randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_mul(res1, pol1, pol2); if (pol2->length) { zmod_poly_div_divconquer(Q, res1, pol2); result &= zmod_poly_equal(Q, pol1); } #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(pol2); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(Q); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_clear(Q); } return result; } int test_zmod_poly_newton_invert_basecase() { zmod_poly_t poly, poly2, poly3; int result = 1; unsigned long bits, length, n; for (unsigned long count1 = 0; (count1 < 20000) && (result == 1) ; count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randprime(bits);} while (modulus < 2); zmod_poly_init(poly, modulus); zmod_poly_init(poly2, modulus); zmod_poly_init(poly3, modulus); length = random_ulong(64)+1; #if DEBUG printf("length = %ld, bits = %ld\n", length, bits); #endif do randpoly(poly, length, modulus); while (poly->length == 0); zmod_poly_set_coeff_ui(poly, poly->length - 1, 1L); n = randint(poly->length) + 1; zmod_poly_newton_invert_basecase(poly2, poly, n); zmod_poly_mul(poly3, poly, poly2); for (unsigned long i = 0; i < n - 1; i++) { result &= (poly3->coeffs[i+poly3->length-n] == 0L); } result &= (poly3->coeffs[poly3->length-1] == 1L); #if DEBUG if (!result) { zmod_poly_print(poly); printf("\n"); zmod_poly_print(poly2); printf("\n"); zmod_poly_print(poly3); printf("\n"); } #endif zmod_poly_clear(poly); zmod_poly_clear(poly2); zmod_poly_clear(poly3); } return result; } int test_zmod_poly_newton_invert() { zmod_poly_t poly, poly2, poly3; int result = 1; unsigned long bits, length; for (unsigned long count1 = 0; (count1 < 30) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randprime(bits);} while (modulus < 2); zmod_poly_init(poly, modulus); zmod_poly_init(poly2, modulus); zmod_poly_init(poly3, modulus); length = random_ulong(5000)+1; #if DEBUG printf("length = %ld, bits = %ld\n", length, bits); #endif for (unsigned long count2 = 0; (count2 < 30) && (result == 1); count2++) { do randpoly(poly, length, modulus); while ((poly->length == 0) || (poly->coeffs[0] == 0L)); zmod_poly_newton_invert(poly2, poly, length); zmod_poly_mul_trunc_n(poly3, poly, poly2, length); result &= (poly3->length == 1); result &= (poly3->coeffs[0] == 1L); } zmod_poly_clear(poly); zmod_poly_clear(poly2); zmod_poly_clear(poly3); } return result; } int test_zmod_poly_div_series() { zmod_poly_t poly, poly2, poly3, poly4; int result = 1; unsigned long bits, length; for (unsigned long count1 = 0; (count1 < 3000) && (result == 1) ; count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randprime(bits);} while (modulus < 2); zmod_poly_init(poly, modulus); zmod_poly_init(poly2, modulus); zmod_poly_init(poly3, modulus); zmod_poly_init(poly4, modulus); length = randint(200)+1; #if DEBUG printf("length = %ld, bits = %ld\n", length, bits); #endif do randpoly(poly, length, modulus); while ((poly->length == 0) || (poly->coeffs[0] == 0L)); randpoly(poly2, length, modulus); zmod_poly_div_series(poly3, poly2, poly, length); zmod_poly_mul_trunc_n(poly4, poly3, poly, length); result = zmod_poly_equal(poly4, poly2); #if DEBUG if (!result) { zmod_poly_print(poly); printf("\n"); zmod_poly_print(poly2); printf("\n"); zmod_poly_print(poly3); printf("\n"); zmod_poly_print(poly4); printf("\n"); } #endif zmod_poly_clear(poly); zmod_poly_clear(poly2); zmod_poly_clear(poly3); zmod_poly_clear(poly4); } return result; } int test_zmod_poly_div_newton() { int result = 1; zmod_poly_t pol1, pol2, res1, Q; unsigned long bits; for (unsigned long count1 = 0; (count1 < 100) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randprime(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(Q, modulus); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { unsigned long length1 = randint(200); unsigned long length2 = randint(200); #if DEBUG printf("length1 = %ld, length2 = %ld, bits = %ld, modulus = %ld\n", length1, length2, bits, modulus); #endif unsigned log_length = 0L; while ((1L<length) { zmod_poly_div_newton(Q, res1, pol2); result &= zmod_poly_equal(Q, pol1); } #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(pol2); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(Q); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(res1); zmod_poly_clear(Q); } return result; } int test_zmod_poly_gcd() { int result = 1; zmod_poly_t pol1, pol2, pol3, res1, res2, res3; unsigned long bits; for (unsigned long count1 = 0; (count1 < 100) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randprime(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(pol3, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(res2, modulus); zmod_poly_init(res3, modulus); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { unsigned long length1 = randint(100)+1; unsigned long length2 = randint(100)+1; unsigned long length3 = randint(100); #if DEBUG printf("length1 = %ld, length2 = %ld, bits = %ld, modulus = %ld\n", length1, length2, bits, modulus); #endif do { randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_gcd(res1, pol1, pol2); } while (res1->length != 1); randpoly(pol3, length3, modulus); zmod_poly_mul(pol1, pol1, pol3); zmod_poly_mul(pol2, pol2, pol3); zmod_poly_gcd(res1, pol1, pol2); if (pol3->length != 0) zmod_poly_divrem_newton(res2, res3, res1, pol3); else zmod_poly_zero(res3); result &= ((res3->length == 0) && (res1->length == pol3->length)); #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(pol2); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(pol3); zmod_poly_clear(res1); zmod_poly_clear(res2); zmod_poly_clear(res3); } return result; } int test_zmod_poly_gcd_invert() { int result = 1; zmod_poly_t pol1, pol2, pol3, res1, res2, res3, res4; unsigned long bits; for (unsigned long count1 = 0; (count1 < 100) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randprime(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(pol3, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(res2, modulus); zmod_poly_init(res3, modulus); zmod_poly_init(res4, modulus); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { unsigned long length1 = randint(100)+1; unsigned long length2 = randint(100)+2; #if DEBUG printf("length1 = %ld, length2 = %ld, bits = %ld, modulus = %ld\n", length1, length2, bits, modulus); #endif do { randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); if (pol2->length != 0) zmod_poly_divrem_newton(res2, pol1, pol1, pol2); else zmod_poly_zero(pol1); zmod_poly_gcd(res1, pol1, pol2); } while ((res1->length != 1) || (pol1->length == 0)); zmod_poly_gcd_invert(res1, pol1, pol2); zmod_poly_mul(res2, res1, pol1); zmod_poly_divrem_newton(res4, res3, res2, pol2); result &= (res3->length == 1); #if DEBUG if (!result) { zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(pol2); printf("\n\n"); zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(res2); printf("\n\n"); zmod_poly_print(res3); printf("\n\n"); } #endif } zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(pol3); zmod_poly_clear(res1); zmod_poly_clear(res2); zmod_poly_clear(res3); zmod_poly_clear(res4); } return result; } int test_zmod_poly_xgcd() { int result = 1; zmod_poly_t s, t, pol1, pol2, pol3, res1, res2, res3; unsigned long bits; for (unsigned long count1 = 0; (count1 < 100) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randprime(bits);} while (modulus < 2); zmod_poly_init(s, modulus); zmod_poly_init(t, modulus); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(pol3, modulus); zmod_poly_init(res1, modulus); zmod_poly_init(res2, modulus); zmod_poly_init(res3, modulus); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { unsigned long length1 = randint(100)+1; unsigned long length2 = randint(100)+1; unsigned long length3 = randint(100); #if DEBUG printf("length1 = %ld, length2 = %ld, bits = %ld, modulus = %ld\n", length1, length2, bits, modulus); #endif do { randpoly(pol1, length1, modulus); randpoly(pol2, length2, modulus); zmod_poly_gcd(res1, pol1, pol2); } while (res1->length != 1); randpoly(pol3, length3, modulus); zmod_poly_mul(pol1, pol1, pol3); zmod_poly_mul(pol2, pol2, pol3); zmod_poly_xgcd(res1, s, t, pol1, pol2); if (pol3->length != 0) zmod_poly_divrem_newton(res2, res3, res1, pol3); else zmod_poly_zero(res3); zmod_poly_mul(s, s, pol1); zmod_poly_mul(t, t, pol2); zmod_poly_add(s, s, t); result &= ((res3->length == 0) && (res1->length == pol3->length) && zmod_poly_equal(res1, s)); #if DEBUG if (!result) { zmod_poly_print(res1); printf("\n\n"); zmod_poly_print(s); printf("\n\n"); } #endif } zmod_poly_clear(s); zmod_poly_clear(t); zmod_poly_clear(pol1); zmod_poly_clear(pol2); zmod_poly_clear(pol3); zmod_poly_clear(res1); zmod_poly_clear(res2); zmod_poly_clear(res3); } return result; } int test_zmod_poly_resultant_euclidean() { int result = 1; zmod_poly_t pol1, pol2, lin; unsigned long bits; for (unsigned long count1 = 0; (count1 < 500) && (result == 1); count1++) { bits = randint(FLINT_BITS-2)+2; unsigned long modulus; do {modulus = randprime(bits);} while (modulus < 2); zmod_poly_init(pol1, modulus); zmod_poly_init(pol2, modulus); zmod_poly_init(lin, modulus); unsigned long r1 = randint(FLINT_MIN(10, modulus)); unsigned long r2 = randint(FLINT_MIN(10, modulus)); unsigned long * roots1 = flint_stack_alloc(r1+1); unsigned long * roots2 = flint_stack_alloc(r2+1); for (unsigned long count2 = 0; (count2 < 100) && (result == 1); count2++) { #if DEBUG printf("r1 = %ld, r2 = %ld, modulus = %ld\n", r1, r2, modulus); #endif int exists; for (unsigned long i = 0; i < r1; ) { exists = 0; unsigned long n = randint(modulus); for (unsigned long j = 0; j < i; j++) if (roots1[j] == n) exists = 1; if (!exists) { roots1[i] = n; i++; } } for (unsigned long i = 0; i < r2; ) { exists = 0; unsigned long n = randint(modulus); for (unsigned long j = 0; j < i; j++) if (roots2[j] == n) exists = 1; if (!exists) { roots2[i] = n; i++; } } zmod_poly_set_coeff_ui(pol1, 0, 1); pol1->length = 1; zmod_poly_set_coeff_ui(pol2, 0, 1); pol2->length = 1; zmod_poly_set_coeff_ui(lin, 1, 1L); lin->length = 2; for (unsigned long i = 0; i < r1; i++) { zmod_poly_set_coeff_ui(lin, 0, z_submod(0, roots1[i], modulus)); zmod_poly_mul(pol1, pol1, lin); } for (unsigned long i = 0; i < r2; i++) { zmod_poly_set_coeff_ui(lin, 0, z_submod(0, roots2[i], modulus)); zmod_poly_mul(pol2, pol2, lin); } unsigned long res1, res2; res1 = 1; for (unsigned long i = 0; i < r1; i++) { for (unsigned long j = 0; j < r2; j++) { res1 = z_mulmod2_precomp(res1, z_submod(roots1[i], roots2[j], modulus), modulus, pol1->p_inv); } } res2 = zmod_poly_resultant_euclidean(pol1, pol2); result = (res1 == res2); #if DEBUG if (!result) { printf("res1 = %ld, res2 = %ld\n", res1, res2); zmod_poly_print(pol1); printf("\n\n"); zmod_poly_print(pol2); printf("\n\n"); for (unsigned long i = 0; i < r1; i++) printf("%ld, ", roots1[i]); printf("\n"); for (unsigned long i = 0; i < r2; i++) printf("%ld, ", roots2[i]); printf("\n"); } #endif } flint_stack_release(); flint_stack_release(); zmod_poly_clear(lin); zmod_poly_clear(pol1); zmod_poly_clear(pol2); } return result; } void zmod_poly_test_all() { int success, all_success = 1; #if TESTFILE #endif RUN_TEST(zmod_poly_reverse); RUN_TEST(zmod_poly_addsub); RUN_TEST(zmod_poly_neg); RUN_TEST(zmod_poly_shift); RUN_TEST(zmod_poly_swap); RUN_TEST(zmod_poly_setequal); RUN_TEST(zmod_poly_getset_coeff); RUN_TEST(zmod_poly_mul_classicalKS); RUN_TEST(zmod_poly_sqr_classicalKS); RUN_TEST(zmod_poly_mul_classical_trunc); RUN_TEST(zmod_poly_mul_KS_trunc); #if USE_MIDDLE_PRODUCT RUN_TEST(zmod_poly_mul_KS_middle); #endif RUN_TEST(zmod_poly_mul_KS_precomp); RUN_TEST(zmod_poly_mul_KS_trunc_precomp); RUN_TEST(zmod_poly_mul_classical_trunc_left); RUN_TEST(zmod_poly_scalar_mul); RUN_TEST(zmod_poly_divrem_classical); RUN_TEST(zmod_poly_div_classical); RUN_TEST(zmod_poly_divrem_divconquer); RUN_TEST(zmod_poly_div_divconquer); RUN_TEST(zmod_poly_newton_invert_basecase); RUN_TEST(zmod_poly_newton_invert); RUN_TEST(zmod_poly_div_series); RUN_TEST(zmod_poly_div_newton); RUN_TEST(zmod_poly_gcd); RUN_TEST(zmod_poly_gcd_invert); RUN_TEST(zmod_poly_xgcd); RUN_TEST(zmod_poly_resultant_euclidean); printf(all_success ? "\nAll tests passed\n" : "\nAt least one test FAILED!\n"); } int main() { test_support_init(); zmod_poly_test_all(); test_support_cleanup(); flint_stack_cleanup(); return 0; } flint-1.011/ZmodF_mul-tuning.c0000644017361200017500000000307711025357254016074 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /* Tuning values for ZmodF_mul module Automatically generated by ZmodF_mul-tune program */ #include "ZmodF_mul-tuning.h" #include "ZmodF_mul.h" unsigned long ZmodF_mul_plain_threeway_threshold = 24; unsigned long ZmodF_mul_plain_fft_threshold = 265; unsigned long ZmodF_mul_threeway_fft_threshold = 471; unsigned long ZmodF_mul_fft_table[20] = {252, 561, 1183, 2555, 5368, 14973, 60163, 0}; unsigned long ZmodF_sqr_plain_threeway_threshold = 36; unsigned long ZmodF_sqr_plain_fft_threshold = 560; unsigned long ZmodF_sqr_threeway_fft_threshold = 330; unsigned long ZmodF_sqr_fft_table[20] = {153, 359, 796, 2144, 4314, 14977, 60163, 0}; // end of file ********************************* flint-1.011/ZmodF_mul-tuning.h0000644017361200017500000000355311025357254016100 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /* ZmodF_mul-tuning.h Tuning values for ZmodF_mul.c (C) 2007 David Harvey NOTE: the tuning values in this file are for sage.math only. TODO: write an automatic tuning utility!! */ #ifndef FLINT_ZMODF_MUL_TUNING_H #define FLINT_ZMODF_MUL_TUNING_H #ifdef __cplusplus extern "C" { #endif // for ZmodF_mul_fft_table[] and ZmodF_sqr_fft_table[], // first value is crossover n from depth 3 to depth 4, // then crossover from depth 4 to depth 5, etc. extern unsigned long ZmodF_mul_plain_threeway_threshold; extern unsigned long ZmodF_mul_plain_fft_threshold; extern unsigned long ZmodF_mul_threeway_fft_threshold; extern unsigned long ZmodF_mul_fft_table[]; extern unsigned long ZmodF_sqr_plain_threeway_threshold; extern unsigned long ZmodF_sqr_plain_fft_threshold; extern unsigned long ZmodF_sqr_threeway_fft_threshold; extern unsigned long ZmodF_sqr_fft_table[]; #ifdef __cplusplus } #endif #endif // end of file **************************************************************** flint-1.011/bernoulli-profile.c0000644017361200017500000005722511025357254016333 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** zmod_poly-profile.c : Profiling code for zmod_poly Copyright (C) 2007, David Howden *****************************************************************************/ #include "profiler-main.h" #include "zmod_poly.h" #include "mpz_poly.h" #include "fmpz_poly.h" #include "long_extras.h" #include "flint.h" #include #include /* Computes the bernoulli numbers B_0, B_2, ..., B_{p-3} for prime p Requires that res be allocated for (p-1)/2 unsigned longs which will hold the result. If returns 0, then the factoring of p has failed, otherwise will always return 1. */ int bernoulli_mod_p_mpz(unsigned long *res, unsigned long p) { FLINT_ASSERT(p > 2); FLINT_ASSERT(z_isprime(p) == 1); unsigned long g, g_inv, g_sqr, g_sqr_inv; double p_inv = z_precompute_inverse(p); g = z_primitive_root_precomp(p, p_inv); if(!g) { return 0; } g_inv = z_invert(g, p); g_sqr = z_mulmod_precomp(g, g, p, p_inv); g_sqr_inv = z_mulmod_precomp(g_inv, g_inv, p, p_inv); unsigned long poly_size = (p-1)/2; int is_odd = poly_size % 2; unsigned long g_power, g_power_inv; g_power = g_inv; g_power_inv = 1; // constant is (g-1)/2 mod p unsigned long constant; if(g % 2) { constant = (g-1)/2; } else { constant = (g+p-1)/2; } // fudge holds g^{i^2}, fudge_inv holds g^{-i^2} unsigned long fudge, fudge_inv; fudge = fudge_inv = 1; // compute the polynomials F(X) and G(X) mpz_poly_t F, G; mpz_poly_init2(F, poly_size); mpz_poly_init2(G, poly_size); unsigned long i, temp, h; for(i = 0; i < poly_size; i++) { // compute h(g^i)/g^i (h(x) is as in latex notes) temp = g * g_power; h = z_mulmod_precomp(p + constant - (temp / p), g_power_inv, p, p_inv); g_power = z_mod_precomp(temp, p, p_inv); g_power_inv = z_mulmod_precomp(g_power_inv, g_inv, p, p_inv); // store coefficient g^{i^2} h(g^i)/g^i mpz_poly_set_coeff_ui(G, i, z_mulmod_precomp(h, fudge, p, p_inv)); mpz_poly_set_coeff_ui(F, i, fudge_inv); // update fudge and fudge_inv fudge = z_mulmod_precomp(z_mulmod_precomp(fudge, g_power, p, p_inv), z_mulmod_precomp(g_power, g, p, p_inv), p, p_inv); fudge_inv = z_mulmod_precomp(z_mulmod_precomp(fudge_inv, g_power_inv, p, p_inv), z_mulmod_precomp(g_power_inv, g, p, p_inv), p, p_inv); } mpz_poly_set_coeff_ui(F, 0, 0); // step 2: multiply the polynomials... mpz_poly_t product; mpz_poly_init(product); mpz_poly_mul(product, G, F); // step 3: assemble the result... unsigned long g_sqr_power, value; g_sqr_power = g_sqr; fudge = g; res[0] = 1; mpz_t value_coeff; mpz_init(value_coeff); unsigned long value_coeff_ui; for(i = 1; i < poly_size; i++) { mpz_poly_get_coeff(value_coeff, product, i + poly_size); value = mpz_fdiv_ui(value_coeff, p); value = z_mod_precomp(mpz_poly_get_coeff_ui(product, i + poly_size), p, p_inv); mpz_poly_get_coeff(value_coeff, product, i); if(is_odd) { value = z_mod_precomp(mpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + p - value, p, p_inv); } else { value = z_mod_precomp(mpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + value, p, p_inv); } value = z_mulmod_precomp(z_mulmod_precomp(z_mulmod_precomp(4, i, p, p_inv), fudge, p, p_inv), value, p, p_inv); value = z_mulmod_precomp(value, z_invert(p+1-g_sqr_power, p), p, p_inv); res[i] = value; g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); fudge = z_mulmod_precomp(fudge, g_sqr_power, p, p_inv); g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); } mpz_poly_clear(product); mpz_poly_clear(F); mpz_poly_clear(G); mpz_clear(value_coeff); return 1; } int bernoulli_mod_p_fmpz(unsigned long *res, unsigned long p) { FLINT_ASSERT(p > 2); FLINT_ASSERT(z_isprime(p) == 1); unsigned long g, g_inv, g_sqr, g_sqr_inv; double p_inv = z_precompute_inverse(p); g = z_primitive_root_precomp(p, p_inv); if(!g) { return 0; } g_inv = z_invert(g, p); g_sqr = z_mulmod_precomp(g, g, p, p_inv); g_sqr_inv = z_mulmod_precomp(g_inv, g_inv, p, p_inv); unsigned long poly_size = (p-1)/2; int is_odd = poly_size % 2; unsigned long g_power, g_power_inv; g_power = g_inv; g_power_inv = 1; // constant is (g-1)/2 mod p unsigned long constant; if(g % 2) { constant = (g-1)/2; } else { constant = (g+p-1)/2; } // fudge holds g^{i^2}, fudge_inv holds g^{-i^2} unsigned long fudge, fudge_inv; fudge = fudge_inv = 1; // compute the polynomials F(X) and G(X) fmpz_poly_t F, G; fmpz_poly_init2(F, poly_size, 2); fmpz_poly_init2(G, poly_size, 2); unsigned long i, temp, h; for(i = 0; i < poly_size; i++) { // compute h(g^i)/g^i (h(x) is as in latex notes) temp = g * g_power; h = z_mulmod_precomp(p + constant - (temp / p), g_power_inv, p, p_inv); g_power = z_mod_precomp(temp, p, p_inv); g_power_inv = z_mulmod_precomp(g_power_inv, g_inv, p, p_inv); // store coefficient g^{i^2} h(g^i)/g^i fmpz_poly_set_coeff_ui(G, i, z_mulmod_precomp(h, fudge, p, p_inv)); fmpz_poly_set_coeff_ui(F, i, fudge_inv); // update fudge and fudge_inv fudge = z_mulmod_precomp(z_mulmod_precomp(fudge, g_power, p, p_inv), z_mulmod_precomp(g_power, g, p, p_inv), p, p_inv); fudge_inv = z_mulmod_precomp(z_mulmod_precomp(fudge_inv, g_power_inv, p, p_inv), z_mulmod_precomp(g_power_inv, g, p, p_inv), p, p_inv); } fmpz_poly_set_coeff_ui(F, 0, 0); // step 2: multiply the polynomials... fmpz_poly_t product; fmpz_poly_init(product); fmpz_poly_mul(product, G, F); // step 3: assemble the result... unsigned long g_sqr_power, value; g_sqr_power = g_sqr; fudge = g; res[0] = 1; mpz_t value_coeff; mpz_init(value_coeff); unsigned long value_coeff_ui; unsigned long value2; // know that there are either 1 limbs per coeff or 2 limbs per coeff (since we have a limit on p) //if(_fmpz_poly_limbs(product) == 1) //{ for(i = 1; i < poly_size; i++) { fmpz_poly_get_coeff_mpz(value_coeff, product, i + poly_size); //value = z_mod_precomp(_fmpz_poly_get_coeff_ui(product, i+poly_size), p, p_inv); //value_coeff_ui = z_mod_precomp(_fmpz_poly_get_coeff_ui(product, i), p, p_inv); value = mpz_fdiv_ui(value_coeff, p); //if(value != value2) printf("ERROR!!!! %d != %d\n", value, value2); fmpz_poly_get_coeff_mpz(value_coeff, product, i); if(is_odd) { value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + p - value, p, p_inv); //value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + value_coeff_ui + p - value, p, p_inv); } else { value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + value, p, p_inv); //value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + value_coeff_ui + value, p, p_inv); } value = z_mulmod_precomp(z_mulmod_precomp(z_mulmod_precomp(4, i, p, p_inv), fudge, p, p_inv), value, p, p_inv); value = z_mulmod_precomp(value, z_invert(p+1-g_sqr_power, p), p, p_inv); res[i] = value; g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); fudge = z_mulmod_precomp(fudge, g_sqr_power, p, p_inv); g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); } // } // else // { // for(i = 1; i < poly_size; i++) // { // //fmpz_poly_get_coeff_mpz(value_coeff, product, i + poly_size); // // value2 = z_ll_mod_precomp(product->coeffs[i+poly_size], product->coeffs[i+poly_size + 1], p, p_inv); // value_coeff_ui = z_ll_mod_precomp(product->coeffs[i], product->coeffs[i+1], p, p_inv); // // //value = mpz_fdiv_ui(value_coeff, p); // //if(value != value2) printf("ERROR!!!! %d != %d\n", value, value2); // // // //fmpz_poly_get_coeff_mpz(value_coeff, product, i); // if(is_odd) // { // //value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + p - value, p, p_inv); // value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + value_coeff_ui + p - value, p, p_inv); // } // else // { // //value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + value, p, p_inv); // value = z_mod_precomp(_fmpz_poly_get_coeff_ui(G, i) + value_coeff_ui + value, p, p_inv); // } // // value = z_mulmod_precomp(z_mulmod_precomp(z_mulmod_precomp(4, i, p, p_inv), fudge, p, p_inv), value, p, p_inv); // // value = z_mulmod_precomp(value, z_invert(p+1-g_sqr_power, p), p, p_inv); // // res[i] = value; // // g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); // fudge = z_mulmod_precomp(fudge, g_sqr_power, p, p_inv); // g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); // } // // } fmpz_poly_clear(product); fmpz_poly_clear(F); fmpz_poly_clear(G); return 1; } int bernoulli_mod_p_zmod(unsigned long *res, unsigned long p) { FLINT_ASSERT(p > 2); FLINT_ASSERT(z_isprime(p) == 1); unsigned long g, g_inv, g_sqr, g_sqr_inv; double p_inv = z_precompute_inverse(p); g = z_primitive_root_precomp(p, p_inv); if(!g) { return 0; } g_inv = z_invert(g, p); g_sqr = z_mulmod_precomp(g, g, p, p_inv); g_sqr_inv = z_mulmod_precomp(g_inv, g_inv, p, p_inv); unsigned long poly_size = (p-1)/2; int is_odd = poly_size % 2; unsigned long g_power, g_power_inv; g_power = g_inv; g_power_inv = 1; // constant is (g-1)/2 mod p unsigned long constant; if(g % 2) { constant = (g-1)/2; } else { constant = (g+p-1)/2; } // fudge holds g^{i^2}, fudge_inv holds g^{-i^2} unsigned long fudge, fudge_inv; fudge = fudge_inv = 1; // compute the polynomials F(X) and G(X) zmod_poly_t F, G; zmod_poly_init2(F, p, poly_size); zmod_poly_init2(G, p, poly_size); unsigned long i, temp, h; for(i = 0; i < poly_size; i++) { // compute h(g^i)/g^i (h(x) is as in latex notes) temp = g * g_power; h = z_mulmod_precomp(p + constant - (temp / p), g_power_inv, p, p_inv); g_power = z_mod_precomp(temp, p, p_inv); g_power_inv = z_mulmod_precomp(g_power_inv, g_inv, p, p_inv); // store coefficient g^{i^2} h(g^i)/g^i zmod_poly_set_coeff(G, i, z_mulmod_precomp(h, fudge, p, p_inv)); zmod_poly_set_coeff(F, i, fudge_inv); // update fudge and fudge_inv fudge = z_mulmod_precomp(z_mulmod_precomp(fudge, g_power, p, p_inv), z_mulmod_precomp(g_power, g, p, p_inv), p, p_inv); fudge_inv = z_mulmod_precomp(z_mulmod_precomp(fudge_inv, g_power_inv, p, p_inv), z_mulmod_precomp(g_power_inv, g, p, p_inv), p, p_inv); } zmod_poly_set_coeff(F, 0, 0); // step 2: multiply the polynomials... zmod_poly_t product; zmod_poly_init(product, p); zmod_poly_mul_KS(product, G, F, 0); // step 3: assemble the result... unsigned long g_sqr_power, value; g_sqr_power = g_sqr; fudge = g; res[0] = 1; unsigned long value_coeff_ui; for(i = 1; i < poly_size; i++) { value = zmod_poly_get_coeff(product, i + poly_size); if(is_odd) { value = z_mod_precomp(zmod_poly_get_coeff(G, i) + zmod_poly_get_coeff(product, i) + p - value, p, p_inv); } else { value = z_mod_precomp(zmod_poly_get_coeff(G, i) + zmod_poly_get_coeff(product, i) + value, p, p_inv); } value = z_mulmod_precomp(z_mulmod_precomp(z_mulmod_precomp(4, i, p, p_inv), fudge, p, p_inv), value, p, p_inv); value = z_mulmod_precomp(value, z_invert(p+1-g_sqr_power, p), p, p_inv); res[i] = value; g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); fudge = z_mulmod_precomp(fudge, g_sqr_power, p, p_inv); g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv); } zmod_poly_clear(product); zmod_poly_clear(F); zmod_poly_clear(G); return 1; } /* Verifies that the ouput of bernoulli_mod_p above is correct. Takes the result from bernoulli_mod_p (res - an array of (p-1)/2 unsigned longs), and the prime p. Returns 0 if res is incorrect, 1 if res is correct. */ int verify_bernoulli_mod_p(unsigned long *res, unsigned long p) { unsigned long N, i, product, sum, value, element; double p_inv; N = (p-1)/2; product = 1; sum = 0; p_inv = z_precompute_inverse(p); for(i = 0; i < N; i++) { element = res[i]; // if((signed long)element < 0) // { // printf("NEGATIVE NUMBER!!!!!\n"); // } // if(element > p) // { // printf("OVERFLOW!!!!!\n"); // } value = z_mulmod_precomp(z_mulmod_precomp(product, 2*i+1, p, p_inv), element, p, p_inv); sum = z_mod_precomp(sum + value, p, p_inv); product = z_mulmod_precomp(product, 4, p, p_inv); } if(z_mod_precomp(sum + 2, p, p_inv)) { return 0; } return 1; } /* This is a helper function used by the other sampler functions below. */ // void sample_ZmodF_mul_helper(ZmodF_mul_info_t info, unsigned long n, // unsigned long count) // { // mp_limb_t* x1 = (mp_limb_t*) malloc((n+1) * sizeof(mp_limb_t)); // mp_limb_t* x2 = (mp_limb_t*) malloc((n+1) * sizeof(mp_limb_t)); // mp_limb_t* x3 = (mp_limb_t*) malloc((n+1) * sizeof(mp_limb_t)); // // profiler_random_limbs(x1, n); // x1[n] = 0; // profiler_random_limbs(x2, n); // x2[n] = 0; // // prof_start(); // // for (unsigned long i = 0; i < count; i++) // ZmodF_mul_info_mul(info, x3, x1, x2); // // prof_stop(); // // free(x3); // free(x2); // free(x1); // } /* This is a helper function used by the other sampler functions below. */ // void sample_ZmodF_sqr_helper(ZmodF_mul_info_t info, unsigned long n, // unsigned long count) // { // mp_limb_t* x1 = (mp_limb_t*) malloc((n+1) * sizeof(mp_limb_t)); // mp_limb_t* x3 = (mp_limb_t*) malloc((n+1) * sizeof(mp_limb_t)); // // profiler_random_limbs(x1, n); // x1[n] = 0; // // prof_start(); // // for (unsigned long i = 0; i < count; i++) // ZmodF_mul_info_mul(info, x3, x1, x1); // // prof_stop(); // // free(x3); // free(x1); // } // ============================================================================ void sample_bernoulli_mpz(unsigned long n, void* arg, unsigned long count) { unsigned long *res = (unsigned long*) malloc(sizeof(unsigned long)*((n-1)/2)); prof_start(); while (count) { if(!bernoulli_mod_p_mpz(res, n)) { printf("Could not factor p = %d\n", n); } count--; } prof_stop(); free(res); } char* profDriverString_bernoulli_mpz(char* params) { return "Bernoulli mpz implementation.\n" "Parameters: n (number of primes to test above 2).\n"; } char* profDriverDefaultParams_bernoulli_mpz() { return "2 1000"; } void profDriver_bernoulli_mpz(char* params) { unsigned long p; unsigned long n; sscanf(params, "%ld %ld", &p, &n); prof1d_set_sampler(sample_bernoulli_mpz); for (unsigned long i = 0; i < n; i++) { p = z_nextprime(p); prof1d_sample(p, NULL); } } // ============================================================================ void sample_bernoulli_fmpz(unsigned long n, void* arg, unsigned long count) { unsigned long *res = (unsigned long*) malloc(sizeof(unsigned long)*((n-1)/2)); prof_start(); while (count) { if(!bernoulli_mod_p_fmpz(res, n)) { printf("Could not factor p = %d\n", n); } count--; } prof_stop(); free(res); } char* profDriverString_bernoulli_fmpz(char* params) { return "Bernoulli fmpz implementation.\n" "Parameters: n (number of primes to test above 2).\n"; } char* profDriverDefaultParams_bernoulli_fmpz() { return "2 1000"; } void profDriver_bernoulli_fmpz(char* params) { unsigned long p; unsigned long n; sscanf(params, "%ld %ld", &p, &n); prof1d_set_sampler(sample_bernoulli_fmpz); for (unsigned long i = 0; i < n; i++) { p = z_nextprime(p); prof1d_sample(p, NULL); } } // ============================================================================ void sample_bernoulli_zmod(unsigned long n, void* arg, unsigned long count) { unsigned long *res = (unsigned long*) malloc(sizeof(unsigned long)*((n-1)/2)); prof_start(); while (count) { if(!bernoulli_mod_p_zmod(res, n)) { printf("Could not factor p = %d\n", n); } count--; } prof_stop(); free(res); } char* profDriverString_bernoulli_zmod(char* params) { return "Bernoulli zmod implementation.\n" "Parameters: n (number of primes to test above 2).\n"; } char* profDriverDefaultParams_bernoulli_zmod() { return "2 1000"; } void profDriver_bernoulli_zmod(char* params) { unsigned long p; unsigned long n; sscanf(params, "%ld %ld", &p, &n); prof1d_set_sampler(sample_bernoulli_zmod); for (unsigned long i = 0; i < n; i++) { p = z_nextprime(p); prof1d_sample(p, NULL); } } // ============================================================================ // void sample_ZmodF_mul_threeway(unsigned long n, void* arg, unsigned long count) // { // ZmodF_mul_info_t info; // ZmodF_mul_info_init_threeway(info, n, 0); // sample_ZmodF_mul_helper(info, n, count); // ZmodF_mul_info_clear(info); // } // // // char* profDriverString_ZmodF_mul_threeway(char* params) // { // return // "ZmodF_mul using threeway algorithm.\n" // "Parameters: n_min, n_max, n_skip.\n" // "Note: n not divisible by 3 are skipped.\n"; // } // // // char* profDriverDefaultParams_ZmodF_mul_threeway() // { // return "1 1000 1"; // } // // // void profDriver_ZmodF_mul_threeway(char* params) // { // unsigned long n_min, n_max, n_skip; // sscanf(params, "%ld %ld %ld", &n_min, &n_max, &n_skip); // // prof1d_set_sampler(sample_ZmodF_mul_threeway); // // // round up n_min so we start on a permissible value // while (n_min % 3) // n_min++; // // for (unsigned long n = n_min; n <= n_max; n += n_skip) // { // if (n % 3 == 0) // prof1d_sample(n, NULL); // } // } // ============================================================================ // void sample_ZmodF_mul_auto(unsigned long n, void* arg, unsigned long count) // { // ZmodF_mul_info_t info; // ZmodF_mul_info_init(info, n, 0); // sample_ZmodF_mul_helper(info, n, count); // ZmodF_mul_info_clear(info); // } // // // char* profDriverString_ZmodF_mul_auto(char* params) // { // return // "ZmodF_mul using automatically selected algorithm.\n" // "Parameters: n_min, n_max, n_skip.\n"; // } // // // char* profDriverDefaultParams_ZmodF_mul_auto() // { // return "1 1000 1"; // } // // // void profDriver_ZmodF_mul_auto(char* params) // { // unsigned long n_min, n_max, n_skip; // sscanf(params, "%ld %ld %ld", &n_min, &n_max, &n_skip); // // prof1d_set_sampler(sample_ZmodF_mul_auto); // // for (unsigned long n = n_min; n <= n_max; n += n_skip) // prof1d_sample(n, NULL); // } // ============================================================================ // void sample_ZmodF_sqr_plain(unsigned long n, void* arg, unsigned long count) // { // ZmodF_mul_info_t info; // ZmodF_mul_info_init_plain(info, n, 1); // sample_ZmodF_sqr_helper(info, n, count); // ZmodF_mul_info_clear(info); // } // // // char* profDriverString_ZmodF_sqr_plain(char* params) // { // return // "ZmodF_sqr using plain algorithm.\n" // "Parameters: n_min, n_max, n_skip.\n"; // } // // // char* profDriverDefaultParams_ZmodF_sqr_plain() // { // return "1 1000 1"; // } // // // void profDriver_ZmodF_sqr_plain(char* params) // { // unsigned long n_min, n_max, n_skip; // sscanf(params, "%ld %ld %ld", &n_min, &n_max, &n_skip); // // prof1d_set_sampler(sample_ZmodF_sqr_plain); // // for (unsigned long n = n_min; n <= n_max; n += n_skip) // prof1d_sample(n, NULL); // } // ============================================================================ // void sample_ZmodF_sqr_threeway(unsigned long n, void* arg, unsigned long count) // { // ZmodF_mul_info_t info; // ZmodF_mul_info_init_threeway(info, n, 1); // sample_ZmodF_sqr_helper(info, n, count); // ZmodF_mul_info_clear(info); // } // // // char* profDriverString_ZmodF_sqr_threeway(char* params) // { // return // "ZmodF_sqr using threeway algorithm.\n" // "Parameters: n_min, n_max, n_skip.\n" // "Note: n not divisible by 3 are skipped.\n"; // } // // // char* profDriverDefaultParams_ZmodF_sqr_threeway() // { // return "1 1000 1"; // } // // // void profDriver_ZmodF_sqr_threeway(char* params) // { // unsigned long n_min, n_max, n_skip; // sscanf(params, "%ld %ld %ld", &n_min, &n_max, &n_skip); // // prof1d_set_sampler(sample_ZmodF_sqr_threeway); // // // round up n_min so we start on a permissible value // while (n_min % 3) // n_min++; // // for (unsigned long n = n_min; n <= n_max; n += n_skip) // { // if (n % 3 == 0) // prof1d_sample(n, NULL); // } // } // ============================================================================ // void sample_ZmodF_sqr_auto(unsigned long n, void* arg, unsigned long count) // { // ZmodF_mul_info_t info; // ZmodF_mul_info_init(info, n, 1); // sample_ZmodF_sqr_helper(info, n, count); // ZmodF_mul_info_clear(info); // } // // // char* profDriverString_ZmodF_sqr_auto(char* params) // { // return // "ZmodF_sqr using automatically selected algorithm.\n" // "Parameters: n_min, n_max, n_skip.\n"; // } // // // char* profDriverDefaultParams_ZmodF_sqr_auto() // { // return "1 1000 1"; // } // // // void profDriver_ZmodF_sqr_auto(char* params) // { // unsigned long n_min, n_max, n_skip; // sscanf(params, "%ld %ld %ld", &n_min, &n_max, &n_skip); // // prof1d_set_sampler(sample_ZmodF_sqr_auto); // // for (unsigned long n = n_min; n <= n_max; n += n_skip) // prof1d_sample(n, NULL); // } // end of file **************************************************************** flint-1.011/mpn_extras.h0000644017361200017500000001537011025357254015062 0ustar tabbotttabbott/*============================================================================ Copyright (C) 2007, William Hart, David Harvey This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ #ifndef MPN_EXTRAS_H #define MPN_EXTRAS_H #include "flint.h" #include "ZmodF_poly.h" #include "longlong_wrapper.h" #include "longlong.h" /*============================================================================ "mpn-wannabe" code. These are functions that I wish were in GMP's mpn layer. =============================================================================*/ #define pre_limb_t mp_limb_t static inline mp_limb_t F_mpn_precompute_inverse(mp_limb_t xl) { mp_limb_t dummy, invxl; udiv_qrnnd (invxl, dummy, ~(xl), ~(0L), xl); return invxl; } /* Computes the negation of a multiple-precision integer in 2's complement. Input is count limbs stored at src. Output is stored at dest. src and dest can be the same buffer. If they're not, they should be disjoint. todo: currently this code will make only 1 pass over the data, EXCEPT in the case where all limbs are zero, in which case it will make two passes. FIX THIS! todo: try writing another version that makes a block of zeroes and then uses mpn_sub_n repeatedly. This could be faster, if GMP's assembler is better than what gcc can come up with. todo: consider writing this in assembly todo: write test function for this todo: consider using GMP's mpn_com_n (undocumented) */ static inline void F_mpn_negate(mp_limb_t* dest, mp_limb_t* src, unsigned long count) { for (long i = count - 1; i >= 0; i--) dest[i] = ~src[i]; mpn_add_1(dest, dest, count, 1); } /* Copies a bunch of limbs from one buffer to another. Input is count limbs stored at src. Output is stored at dest. src and dest can be the same buffer. If they're not, they should be disjoint. todo: it's completely insane that we're not using memcpy. But memcpy seems to have crazy overhead and is slow!! Why is this? todo: GMP has code to do limb copying. Clearly memcpy wasn't good enough for them either. Work out how to use their code. It's not a documented interface, so some hackishness may be necessary. */ static inline void F_mpn_copy(mp_limb_t* dest, const mp_limb_t* src, unsigned long count) { for (long i = count - 1; i >= 0; i--) { dest[i] = src[i]; } } static inline void F_mpn_copy_forward(mp_limb_t* dest, const mp_limb_t* src, unsigned long count) { for (long i = 0; i < count; i++) { dest[i] = src[i]; } } /* Sets a bunch of limbs to zero. todo: why does memset have so much overhead????!!? */ static inline void F_mpn_clear(mp_limb_t* dest, unsigned long count) { for (long i = count - 1; i >= 0; i--) dest[i] = 0; } /* Sets a bunch of limbs to 0xfff.... todo: why does memset have so much overhead????!!? */ static inline void F_mpn_set(mp_limb_t* dest, unsigned long count) { for (long i = count - 1; i >= 0; i--) dest[i] = (mp_limb_t)(-1L); } mp_limb_t F_mpn_divrem_ui_precomp(mp_limb_t * qp, mp_limb_t * up, unsigned long un, mp_limb_t d, mp_limb_t dinv); mp_limb_t F_mpn_addmul(mp_limb_t * rp, mp_limb_t * s1p, unsigned long s1n, mp_limb_t * s2p, unsigned long s2n); static inline void F_mpn_printx(mp_limb_t * mpn, unsigned long count) { if (count) for (unsigned long i = 0; i < count; i++) printf("%lx ", mpn[i]); } /* Large integer multiplication */ typedef enum {FFT_PRE, KAR_PRE} precomp_type; typedef struct { precomp_type type; ZmodF_poly_p poly; unsigned long length; unsigned long length2; unsigned long coeff_limbs; unsigned long limbs1; unsigned long limbs2; unsigned long msl_bits; unsigned long bits; } F_mpn_precomp_s; typedef F_mpn_precomp_s F_mpn_precomp_t[1]; void F_mpn_FFT_split_bits(ZmodF_poly_t poly, mp_limb_t * limbs, unsigned long total_limbs, unsigned long bits, unsigned long output_limbs); void F_mpn_FFT_combine_bits(mp_limb_t * res, ZmodF_poly_t poly, unsigned long bits, unsigned long output_limbs, unsigned long total_limbs); mp_limb_t __F_mpn_mul(mp_limb_t * res, mp_limb_t * data1, unsigned long limbs1, mp_limb_t * data2, unsigned long limbs2, unsigned long twk); mp_limb_t F_mpn_mul(mp_limb_t * res, mp_limb_t * data1, unsigned long limbs1, mp_limb_t * data2, unsigned long limbs2); mp_limb_t F_mpn_mul_trunc(mp_limb_t * res, mp_limb_t * data1, unsigned long limbs1, mp_limb_t * data2, unsigned long limbs2, unsigned long trunc); void F_mpn_mul_precomp_init(F_mpn_precomp_t precomp, mp_limb_t * data1, unsigned long limbs1, unsigned long limbs2); void F_mpn_mul_precomp_clear(F_mpn_precomp_t precomp); mp_limb_t F_mpn_mul_precomp(mp_limb_t * res, mp_limb_t * data2, unsigned long limbs2, F_mpn_precomp_t precomp); mp_limb_t F_mpn_mul_precomp_trunc(mp_limb_t * res, mp_limb_t * data2, unsigned long limbs2, F_mpn_precomp_t precomp, unsigned long trunc); mp_limb_t __F_mpn_mul_middle(mp_limb_t * res, mp_limb_t * data1, unsigned long limbs1, mp_limb_t * data2, unsigned long limbs2, unsigned long start, unsigned long trunc); mp_limb_t __F_mpn_mul_middle_precomp(mp_limb_t * res, mp_limb_t * data1, unsigned long limbs1, F_mpn_precomp_t pre, unsigned long start, unsigned long trunc); #endif flint-1.011/mpz_poly-tuning.h0000644017361200017500000000317111025357254016051 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /* mpz_poly_tuning.h Tuning values for mpz_poly.c (C) 2007 David Harvey NOTE: the tuning values in this file are for sage.math only. TODO: write an automatic tuning utility!! */ #ifndef FLINT_MPZ_POLY_TUNING_H #define FLINT_MPZ_POLY_TUNING_H #ifdef __cplusplus extern "C" { #endif /* mpz_poly_kara_crossover_table[k] is the smallest length for which karatsuba should be used when the coefficients have k+1 limbs. The number of entries in the table is mpz_poly_kara_crossover_table_size. */ extern unsigned long mpz_poly_kara_crossover_table[]; extern unsigned long mpz_poly_kara_crossover_table_size; #ifdef __cplusplus extern "C" { #endif #endif // end of file **************************************************************** flint-1.011/doc/0000755017361200017500000000000011035134776013273 5ustar tabbotttabbottflint-1.011/doc/mpz_poly.tex0000644017361200017500000007223111025357252015666 0ustar tabbotttabbott% (C) 2007, William Hart, David Harvey % This file is part of FLINT. % FLINT is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % FLINT is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % You should have received a copy of the GNU General Public License % along with FLINT; if not, write to the Free Software % Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA \documentclass[a4paper,10pt]{article} \usepackage{amsfonts} \usepackage{amsmath} \usepackage{eucal} \usepackage{amscd} \usepackage{url} \usepackage{hyperref} \usepackage{listings} \usepackage{wrapfig} \urlstyle{sf} \addtolength{\oddsidemargin}{-0.75in} \addtolength{\evensidemargin}{-0.75in} \addtolength{\textwidth}{1.5in} \newcommand{\Z}{\mathbb{Z}} \newcommand{\N}{\mathbb{N}} \newcommand{\HH}{\mathcal{H}} \newcommand{\Q}{\mathbb{Q}} \newcommand{\I}{\mathbb{I}} \newcommand{\C}{\mathbb{C}} \newcommand{\R}{\mathbb{R}} \newcommand{\Pee}{\mathbb{P}} \newcommand{\EuO}{\mathcal{O}} \newcommand{\Qbar}{\overline{\mathbb{Q}}} \newcommand{\fn}{\hfill[Function]} \newcommand{\macro}{\hfill[Macro]} \newcommand{\gmp}{\hfill[GMP]} \newcommand{\code}{\lstinline} \newcommand{\ljk}[2]{\left(\frac{#1}{#2}\right)} \newcommand{\modulo}[1]{\;\left(\mbox{mod}\;#1\right)} \newcommand{\fr}{\mathfrak} \def\notdivides{\mathrel{\kern-3pt\not\!\kern4.5pt\bigm|}} \def\nmid{\notdivides} \def\nsubseteq{\mathrel{\kern-3pt\not\!\kern2.5pt\subseteq}} \parindent=0pt \parskip 4pt plus 2pt minus 2pt %\email{w.b.hart@warwick.ac.uk} \title{FLINT: Fast Library for Number Theory} \author{William B. Hart and David Harvey} \begin{document} \maketitle \lstset{language=c} \lstset{basicstyle=\ttfamily} \lstset{keywordstyle=} %\lstset{morekeywords={mpz_t,mpz_poly_t,fmpz_poly_t}} \lstset{escapeinside=\%\%} \section{The mpz\_poly module} The \code{mpz_poly_t} data type represents elements of $\Z[x]$ by an array of \code{mpz_t}'s. It provides routines for memory management, basic arithmetic, and conversions to/from other types. Unless otherwise specified, all functions in this section permit aliasing between their input and output arguments. \subsection{Simple example} The following example computes the square of the polynomial $5x^3 - 1$. \begin{lstlisting} #include "mpz_poly.h" .... mpz_poly_t x, y; mpz_poly_init(x); mpz_poly_init(y); mpz_poly_set_coeff_ui(x, 3, 5); mpz_poly_set_coeff_si(x, 0, -1); mpz_poly_mul(y, x, x); mpz_poly_print(x); printf("\n"); mpz_poly_print(y); printf("\n"); mpz_poly_clear(x); mpz_poly_clear(y); \end{lstlisting} Output is: \begin{lstlisting} 4 -1 0 0 5 7 1 0 0 -10 0 0 25 \end{lstlisting} \subsection{Definition of \code{mpz_poly_t}} The \code{mpz_poly_t} type is actually a typedef for an array of length 1 of \code{mpz_poly_struct}. This permits passing parameters of type \code{mpz_poly_t} `by reference'. The \code{mpz_poly_struct} struct has three members: \begin{itemize} \item \code{mpz_t* coeffs}. An array of \code{mpz_t}'s of length \code{alloc}. All of them are \code{mpz_init}'d. \item \code{unsigned long alloc}. Length of \code{coeffs}. Always \code{alloc >= 1}. \item \code{unsigned long length}. The current length of the polynomial. That is, for \code{n < length}, the coefficient of $x^n$ is \code{coeffs[n]}, and for \code{n >= length}, the coefficient of $x^n$ is zero. Always \code{length <= alloc}. If \code{length == 0} then this is the zero polynomial. \end{itemize} An \code{mpz_poly_t} is said to be \emph{normalised} if either \code{length == 0}, or if \code{coeffs[length-1]} is nonzero. All \code{mpz_poly_blah()} functions expect their inputs to be normalised, and unless other specified they produce output that is normalised. If you modify the coefficients yourself, you must ensure that the polynomial is subsequently normalised (for example by using \code{mpz_poly_normalise()}). All \code{mpz_poly_t}'s are allocated on the heap. The reason we don't bother with stack storage is that most of the memory allocation overhead for \code{mpz_poly_t} is in the coefficients anyway, and providing both stack and heap allocation would just make things unnecessarily complicated. \subsection{Comparison with \code{fmpz_poly_t}} Advantages of \code{mpz_poly_t} over \code{fmpz_poly_t} are: \begin{itemize} \item GMP's mpz functions may be used directly on the coefficients. \item If the coefficients vary a lot in size, the memory usage will be more efficient. (In fact it might be completely impractical to use \code{fmpz_poly_t} for such a polynomial.) \end{itemize} Disadvantages compared to \code{fmpz_poly_t} are: \begin{itemize} \item \code{fmpz_poly_t} is more efficient (in both time and space) for dense polynomials with relatively small, equally-sized coefficients, because it has much less memory management overhead. \end{itemize} \subsection{Initialisation and memory management} \begin{lstlisting} void mpz_poly_init(mpz_poly_t poly) \end{lstlisting} \begin{quote} Initialises an \code{mpz_poly_t} object. This function must be called before using the polynomial. The initial allocated size is set to 1. The length is set to zero, so this is the zero polynomial. This function should not be used twice on the same polynomial without an intervening \code{mpz_poly_clear()}; this will cause memory leaks. \end{quote} \begin{lstlisting} void mpz_poly_clear(mpz_poly_t poly) \end{lstlisting} \begin{quote} Frees the resources associated with an \code{mpz_poly_t} object. The coefficients are \code{mpz_clear}ed and the polynomial object becomes unusable. To use it again, \code{mpz_poly_init()} must be called. \end{quote} \begin{lstlisting} void mpz_poly_init2(mpz_poly_t poly, unsigned long alloc) \end{lstlisting} \begin{quote} Same as \code{mpz_poly_init()}, but with \code{alloc} coefficients initially allocated. Must have \code{alloc >= 1}. \end{quote} \begin{lstlisting} void mpz_poly_realloc(mpz_poly_t poly, unsigned long alloc) \end{lstlisting} \begin{quote} Reallocates the array of coefficients to length \code{alloc}. Must have \code{alloc >= 1}. The value of the polynomial is preserved as far as possible (i.e.~up to at most \code{alloc} coefficients). \end{quote} \begin{lstlisting} void mpz_poly_ensure_alloc(mpz_poly_t poly, unsigned long alloc) \end{lstlisting} \begin{quote} Ensures that at least \code{alloc} coefficients are allocated in \code{poly}, by increasing the number of allocated coefficients if necessary. If more coefficients are required, the number of allocated coefficients is at least doubled. The value of the polynomial is preserved. \end{quote} \subsection{Setting/retrieving coefficients} \begin{lstlisting} mpz_t* mpz_poly_get_coeff_ptr(mpz_poly_t poly, unsigned long n) \end{lstlisting} \begin{quote} Returns a pointer to the coefficient of $x^n$ in \code{poly}, or \code{NULL} if $n$ is beyond the current length of the polynomial. \end{quote} \begin{lstlisting} void mpz_poly_get_coeff(mpz_t c, mpz_poly_t poly, unsigned long n) \end{lstlisting} \begin{quote} Copies the coefficient of $x^n$ in \code{poly} into \code{c}. If $n$ is beyond the current length of the polynomial, \code{c} is set to zero. \end{quote} \begin{lstlisting} unsigned long mpz_poly_get_coeff_ui(mpz_poly_t poly, unsigned long n) \end{lstlisting} \begin{quote} Returns the absolute value of the coefficient of $x^n$ in \code{poly} as an \code{unsigned long}. If it doesn't fit, only the least significant bits are returned. (See GMP's \code{mpz_get_ui()} function.) If $n$ is beyond the current length of the polynomial, the return value is zero. \end{quote} \begin{lstlisting} long mpz_poly_get_coeff_si(mpz_poly_t poly, unsigned long n) \end{lstlisting} \begin{quote} Returns the coefficient of $x^n$ in \code{poly} as a \code{long}. If it doesn't fit, the return value probably doesn't mean much (but see GMP's \code{mpz_get_si()} function). If $n$ is beyond the current length of the polynomial, the return value is zero. \end{quote} \begin{lstlisting} mpz_t* _mpz_poly_get_coeff_ptr(mpz_poly_t poly, unsigned long n) void _mpz_poly_get_coeff(mpz_t c, mpz_poly_t poly, unsigned long n) unsigned long _mpz_poly_get_coeff_ui(mpz_poly_t poly, unsigned long n) long _mpz_poly_get_coeff_si(mpz_poly_t poly, unsigned long n) \end{lstlisting} \begin{quote} These are the same as the functions above, but they are inlined, and do no bounds checking. If $n$ is beyond the current length of the polynomial, the result is undefined. \end{quote} \begin{lstlisting} void mpz_poly_set_coeff(mpz_poly_t poly, unsigned long n, mpz_t c) void mpz_poly_set_coeff_ui(mpz_poly_t poly, unsigned long n, unsigned long c) void mpz_poly_set_coeff_si(mpz_poly_t poly, unsigned long n, long c) \end{lstlisting} \begin{quote} Sets the coefficient of $x^n$ in \code{poly} to $c$. If $n$ is beyond the current length of the polynomial, the polynomial is extended and reallocated appropriately. \end{quote} \begin{lstlisting} void _mpz_poly_set_coeff(mpz_poly_t poly, unsigned long n, mpz_t c) void _mpz_poly_set_coeff_ui(mpz_poly_t poly, unsigned long n, unsigned long c) void _mpz_poly_set_coeff_si(mpz_poly_t poly, unsigned long n, long c) \end{lstlisting} \begin{quote} These are the same as the functions above, but they are inlined, and do no bounds checking. If $n$ is beyond the current length of the polynomial, the result is undefined. Additionally, they do not ensure that the result is normalised. \end{quote} \subsection{String conversions and I/O} The functions in this section are not intended to be particularly fast. They are intended mainly as a debugging aid. All of the functions use the same string representation of polynomials. It is given by a sequence of integers, in decimal notation, separated by whitespace. The first integer gives the length of the polynomial; the remaining \code{length} integers are the coefficients. For example $5x^3 - x + 1$ is represented by the string ``\code{4 1 -1 0 5}'', and the zero polynomial is represented by ``\code{0}''. \begin{lstlisting} int mpz_poly_from_string(mpz_poly_t poly, char* s) \end{lstlisting} \begin{quote} Converts \code{s} into a polynomial, stored in \code{poly}. The return value is 1 if the conversion succeeded. The return value is zero if the string did not represent a valid polynomial, in which case \code{poly} will be in a legal state, but with an undefined value. \end{quote} \begin{lstlisting} char* mpz_poly_to_string(mpz_poly_t poly) \end{lstlisting} \begin{quote} Converts the polynomial to a string and returns a character buffer that was allocated by \code{malloc}. You should call \code{free} when the string is no longer needed. \end{quote} \begin{lstlisting} void mpz_poly_print(mpz_poly_t poly) \end{lstlisting} \begin{quote} Prints the given polynomial to standard output. \end{quote} \begin{lstlisting} void mpz_poly_fprint(mpz_poly_t poly, FILE* f) \end{lstlisting} \begin{quote} Prints the given polynomial to the given stream. \end{quote} \begin{lstlisting} int mpz_poly_read(mpz_poly_t poly) \end{lstlisting} \begin{quote} Reads a string from standard input and converts it to a polynomial. Return value has the same meaning as for \code{mpz_poly_from_string()}. \end{quote} \begin{lstlisting} int mpz_poly_fread(mpz_poly_t poly, FILE* f) \end{lstlisting} \begin{quote} Reads a string from the given stream and converts it to a polynomial. Return value has the same meaning as for \code{mpz_poly_from_string()}. \end{quote} \subsection{Length and degree} \begin{lstlisting} unsigned long mpz_poly_length(mpz_poly_t poly) \end{lstlisting} \begin{quote} Return the polynomial's length. \end{quote} \begin{lstlisting} long mpz_poly_degree(mpz_poly_t poly) \end{lstlisting} \begin{quote} Returns the polynomial's degree, which is defined to be \code{length - 1}. In particular the degree of the zero polynomial is $-1$. \end{quote} \begin{lstlisting} void mpz_poly_normalise(mpz_poly_t poly) \end{lstlisting} \begin{quote} Normalises the polynomial; that is, reduces its length until either the length is zero, or the coefficient of $x^{\text{length} - 1}$ is nonzero. \end{quote} \begin{lstlisting} int mpz_poly_normalised(mpz_poly_t poly) \end{lstlisting} \begin{quote} Returns a nonzero value if the polynomial is normalised. \end{quote} \begin{lstlisting} void mpz_poly_truncate(mpz_poly_t res, mpz_poly_t poly, unsigned long length) \end{lstlisting} \begin{quote} Truncates \code{poly} to length \code{length}, puts result in \code{res}. \end{quote} \begin{lstlisting} void mpz_poly_pad(mpz_poly_t poly, unsigned long length) \end{lstlisting} \begin{quote} Ensures that the polynomial has length at least \code{length}, by zero-padding the polynomial if necessary. \emph{The polynomial will not necessarily be normalised after this operation.} The value of the polynomial is preserved. \end{quote} \subsection{Assignment} \begin{lstlisting} void mpz_poly_set(mpz_poly_t res, mpz_poly_t poly) \end{lstlisting} \begin{quote} Copies the value of \code{poly} into \code{res}. \end{quote} \begin{lstlisting} void mpz_poly_zero(mpz_poly_t poly) \end{lstlisting} \begin{quote} Sets \code{poly} to zero (by setting its length to zero). \end{quote} \begin{lstlisting} void mpz_poly_swap(mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} Swaps the contents of \code{poly1} and \code{poly2} by pointer swapping. This is much more efficient than going via a temporary. \end{quote} \subsection{Conversions} \begin{lstlisting} void mpz_poly_to_fmpz_poly(fmpz_poly_t res, mpz_poly_t poly) \end{lstlisting} \begin{quote} Converts \code{poly} into \code{fmpz_poly_t} format. \end{quote} \begin{lstlisting} void fmpz_poly_to_mpz_poly(mpz_poly_t res, fmpz_poly_t poly) \end{lstlisting} \begin{quote} Converts \code{poly} into \code{mpz_poly_t} format. \end{quote} \subsection{Comparison} \begin{lstlisting} int mpz_poly_equal(mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} Returns a nonzero value if \code{poly1} and \code{poly2} are equal. \end{quote} \subsection{Addition/subtraction} \begin{lstlisting} void mpz_poly_add(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} Sets \code{res} equal to \code{poly1} plus \code{poly2}. \end{quote} \begin{lstlisting} void mpz_poly_sub(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} Sets \code{res} equal to \code{poly1} minus \code{poly2}. \end{quote} \begin{lstlisting} void mpz_poly_neg(mpz_poly_t res, mpz_poly_t poly) \end{lstlisting} \begin{quote} Sets \code{res} equal to the negative of \code{poly}. \end{quote} \subsection{Shifting} \begin{lstlisting} void mpz_poly_lshift(mpz_poly_t res, mpz_poly_t poly, unsigned long k) \end{lstlisting} \begin{quote} Sets \code{res} equal to \code{poly} times $x^k$. If \code{res} is the same object as \code{poly}, this is done efficiently by pointer swapping. \end{quote} \begin{lstlisting} void mpz_poly_rshift(mpz_poly_t res, mpz_poly_t poly, unsigned long k) \end{lstlisting} \begin{quote} Sets \code{res} equal to \code{poly} divided by $x^k$, with the lower order terms discarded. If \code{res} is the same object as \code{poly}, this is done efficiently by pointer swapping. \end{quote} \begin{lstlisting} void mpz_poly_shift(mpz_poly_t res, mpz_poly_t poly, long k) \end{lstlisting} \begin{quote} Sets \code{res} equal to \code{poly} multiplied by $x^k$, where the semantics are the same as \code{mpz_poly_lshift()} or \code{mpz_poly_rshift()}, depending on whether $k$ is non-negative or negative. \end{quote} \subsection{Scalar multiplication and division} \begin{lstlisting} void mpz_poly_scalar_mul(mpz_poly_t res, mpz_poly_t poly, mpz_t c) void mpz_poly_scalar_mul_ui(mpz_poly_t res, mpz_poly_t poly, unsigned long c) void mpz_poly_scalar_mul_si(mpz_poly_t res, mpz_poly_t poly, long c) \end{lstlisting} \begin{quote} Sets \code{res} equal to \code{poly} times \code{c}. \end{quote} \begin{lstlisting} void mpz_poly_scalar_div(mpz_poly_t res, mpz_poly_t poly, mpz_t c) void mpz_poly_scalar_div_ui(mpz_poly_t res, mpz_poly_t poly, unsigned long c) void mpz_poly_scalar_div_si(mpz_poly_t res, mpz_poly_t poly, long c) \end{lstlisting} \begin{quote} Sets \code{res} equal to \code{poly} divided by $c$. Rounding is towards zero (similar to the \code{mpz_tdiv} family in GMP). If $c$ is zero then a division-by-zero is raised. In the \code{ui} and \code{si} cases, in appropriate circumstances some precomputation is performed which is then shared among the coefficients, so this routine will be faster than dividing each coefficient by $c$ separately. Similar functionality is planned for the \code{mpz_t} case. \end{quote} \begin{lstlisting} void mpz_poly_scalar_div_exact(mpz_poly_t res, mpz_poly_t poly, mpz_t c) void mpz_poly_scalar_div_exact_ui(mpz_poly_t res, mpz_poly_t poly, unsigned long c) void mpz_poly_scalar_div_exact_si(mpz_poly_t res, mpz_poly_t poly, long c) \end{lstlisting} \begin{quote} Sets \code{res} equal to \code{poly} divided by $c$, \emph{assuming} that $c$ divides each coefficient exactly. If $c$ does not divide them, the result is undefined. If $c$ is zero then a division-by-zero is raised. The remarks made above for \code{mpz_poly_scalar_div} regarding precomputation apply here also. \end{quote} \begin{lstlisting} void mpz_poly_scalar_mod(mpz_poly_t res, mpz_poly_t poly, mpz_t c) void mpz_poly_scalar_mod_ui(mpz_poly_t res, mpz_poly_t poly, unsigned long c) \end{lstlisting} \begin{quote} Sets \code{res} equal to \code{poly} modulo $c$, that is, reduces each coefficient into the range $[0, c)$. In the \code{mpz_t} case, the sign of \code{c} is ignored. The remarks made above for \code{mpz_poly_scalar_div} regarding precomputation apply here also. \end{quote} \subsection{Polynomial multiplication} \begin{lstlisting} void mpz_poly_mul(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} Sets \code{res} equal to \code{poly1} times \code{poly2}. An appropriate multiplication algorithm is selected based on the degree and the maximum size of the coefficients of the input polynomials. The automatic algorithm selection strategy is based on the assumption that the polynomials are dense and have coefficients whose size does not vary too much. If this assumption is not satisfied, the chosen algorithm may be inappropriate. For example, if the polynomials represent the first few terms of the $q$-expansion of a modular form, then the coefficients might grow quite rapidly, in which case \code{mpz_poly_mul} will probably choose an FFT-based algorithm tuned for the largest coefficient; but the naive multiplication algorithm would probably do much better. Another example: if the polynomial is very sparse, then quite possibly FLINT is the wrong tool for the job, since it does not (yet) implement algorithms that can efficiently multiply sparse polynomials. \end{quote} \begin{lstlisting} void mpz_poly_mul_naive(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} Sets \code{res} equal to \code{poly1} times \code{poly2}, using the `naive' (classical) algorithm. \end{quote} \begin{lstlisting} void mpz_poly_mul_karatsuba(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} Sets \code{res} equal to \code{poly1} times \code{poly2}, using Karatsuba's algorithm. This is asymptotically faster than the naive algorithm, but not as fast as FFT-based methods. \end{quote} \begin{lstlisting} void mpz_poly_mul_SS(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} Sets \code{res} equal to \code{poly1} times \code{poly2}, using a Sch\"onhage--Strassen FFT algorithm \cite{ss}. This is asymptotically the fastest multiplication algorithm implemented in FLINT, and is used for very large multiplications (several thousand words or higher). The underlying algorithm is a Sch\"onhage--Strassen FFT operating on a polynomial whose coefficients have about the same number of bits as the degrees of the input polynomials (see the \code{ZmodF_poly_t} data type). A modification of the truncated Fourier transform \cite{tft} is used to improve smoothness of the running time. To convert the original multiplication to a problem of this type, FLINT either packs coefficients together (in the case that the coefficients are initially too small compared to the degree), or splits them apart (in the case that the coefficients are too large compared to the degree). The first approach is similar to Kronecker segmentation, except that instead of packing all the way into a single integer, we aim directly for the polynomial on which the Sch\"onhage--Strassen FFT operates. This was suggested independently by Paul Zimmerman and David Harvey. The splitting approach for the other case is due to William Hart. \end{quote} \begin{lstlisting} void mpz_poly_mul_naive_KS(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} Sets \code{res} equal to \code{poly1} times \code{poly2}, using a `naive Kronecker segmentation' algorithm. This function is provided for testing purposes only; it is never called by \code{mpz_poly_mul()}. It simply packs the coefficients into a single large integer, and multiplies the integers using GMP. It is asymptotically fast, and less likely to contain bugs than the other functions, as it is based on very mature GMP code. \end{quote} \begin{lstlisting} void mpz_poly_sqr(mpz_poly_t res, mpz_poly_t poly) void mpz_poly_sqr_naive(mpz_poly_t res, mpz_poly_t poly) void mpz_poly_sqr_karatsuba(mpz_poly_t res, mpz_poly_t poly) void mpz_poly_sqr_SS(mpz_poly_t res, mpz_poly_t poly) void mpz_poly_sqr_naive_KS(mpz_poly_t res, mpz_poly_t poly) \end{lstlisting} \begin{quote} These functions are the same as the multiplication functions given above, but specialised for squaring. Note that the multiplication functions will automatically call the squaring versions if they are passed two identical inputs. \end{quote} \subsection{Polynomial division} \begin{lstlisting} void mpz_poly_monic_inverse(mpz_poly_t res, mpz_poly_t poly, unsigned long k) \end{lstlisting} \begin{quote} Let $n$ be the degree of \code{poly}, and assume that \code{poly} is monic. This function computes a monic polynomial \code{res} of degree $k$ such that \[ x^{k+n} = \text{res} \cdot \text{poly} + R, \] where $R$ has degree less than $n$. In other words it computes an approximate inverse of \code{poly}, scaled by an appropriate power of $x$. For sufficiently small $k$ and sufficiently small input polynomials, this function uses a naive division algorithm (see \code{mpz_poly_monic_inverse_naive()} below). For larger problems it switches to a divide-and-conquer algorithm, and eventually a Newton iteration method. \end{quote} \begin{lstlisting} void mpz_poly_pseudo_inverse(mpz_poly_t res, mpz_poly_t poly, unsigned long k) \end{lstlisting} \begin{quote} Let $n$ be the degree of \code{poly}, and let $d$ be the leading coefficient of \code{poly} (assumed nonzero). This function computes a polynomial \code{res} of degree $k$ such that \[ d^{k+1} x^{k+n} = \text{res} \cdot \text{poly} + R, \] where $R$ has degree less than $n$. In other words it computes an approximate inverse of \code{poly}, scaled by an appropriate power of $x$ and $d$. The algorithms used are similar to those described above for \code{mpz_poly_monic_inverse()}, with appropriate modifications to handle $d \neq 1$. \end{quote} \begin{lstlisting} void mpz_poly_monic_div(mpz_poly_t quot, mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} This function divides \code{poly1} by \code{poly2}, assuming that \code{poly2} is monic. That is, it computes a polynomial \code{quot} such that \[ \text{poly1} = \text{quot} \cdot \text{poly2} + \text{rem}, \] where the remainder \code{rem} has degree less than \code{poly2}. \end{quote} \begin{lstlisting} void mpz_poly_pseudo_div(mpz_poly_t quot, mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} This function pseudo-divides \code{poly1} by \code{poly2}. That is, let $d$ be the leading coefficient of \code{poly2} (assumed nonzero). Let $n$ and $m$ be the degrees of \code{poly1} and \code{poly2}. This function computes a polynomial \code{quot} such that \[ d^{n-m+1} \text{poly1} = \text{quot} \cdot \text{poly2} + \text{rem}, \] where the remainder \code{rem} has degree less than \code{poly2}. \end{quote} \begin{lstlisting} void mpz_poly_monic_rem(mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) void mpz_poly_pseudo_rem(mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) void mpz_poly_monic_div_rem(mpz_poly_t quot, mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) void mpz_poly_pseudo_div_rem(mpz_poly_t quot, mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} The same as the functions above, but compute the remainder, or the quotient and the remainder. \end{quote} \begin{lstlisting} void mpz_poly_monic_inverse_naive(mpz_poly_t res, mpz_poly_t poly, unsigned long k) void mpz_poly_pseudo_inverse_naive(mpz_poly_t res, mpz_poly_t poly, unsigned long k) void mpz_poly_monic_div_naive(mpz_poly_t quot, mpz_poly_t poly1, mpz_poly_t poly2) void mpz_poly_pseudo_div_naive(mpz_poly_t quot, mpz_poly_t poly1, mpz_poly_t poly2) void mpz_poly_monic_rem_naive(mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) void mpz_poly_pseudo_rem_naive(mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) void mpz_poly_monic_div_rem_naive(mpz_poly_t quot, mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) void mpz_poly_pseudo_div_rem_naive(mpz_poly_t quot, mpz_poly_t rem, mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} The same as the functions above, but they always use a naive division algorithm. \end{quote} \subsection{GCD and extended GCD} \begin{lstlisting} void mpz_poly_content(mpz_t x, mpz_poly_t poly) \end{lstlisting} \begin{quote} Computes the content of \code{poly} (the non-negative GCD of the coefficients) and stores it in \code{x}. \end{quote} \begin{lstlisting} unsigned long mpz_poly_content_ui(mpz_poly_t poly) \end{lstlisting} \begin{quote} Computes the content of \code{poly}, and returns it as an \code{unsigned long}. If it doesn't fit, the least significant bits are returned. \end{quote} \begin{lstlisting} void mpz_poly_gcd(mpz_poly_t res, mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} (.....) \end{quote} \begin{lstlisting} void mpz_poly_xgcd(mpz_poly_t res, mpz_poly_t a, mpz_poly_t b, mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} (.....) \end{quote} \subsection{Miscellaneous} \begin{lstlisting} unsigned long mpz_poly_max_limbs(mpz_poly_t poly) unsigned long mpz_poly_max_bits(mpz_poly_t poly) \end{lstlisting} \begin{quote} Return the maximum number of limbs (respectively bits) in the coefficients of \code{poly}. Note that the former is somewhat faster, so it should be used if only a rough upper bound on the size is required. \end{quote} \begin{lstlisting} unsigned long mpz_poly_product_max_limbs(mpz_poly_t poly1, mpz_poly_t poly2) unsigned long mpz_poly_product_max_bits(mpz_poly_t poly1, mpz_poly_t poly2) \end{lstlisting} \begin{quote} Returns the maximum number of limbs (respectively bits) that the coefficients of the product of \code{poly1} and \code{poly2} could possibly have, based on their lengths and coefficient sizes. Note that \code{mpz_poly_product_max_limbs()} only examines the limb sizes of each input polynomial, so it's a fairly coarse estimate; it could overshoot the true bound by several limbs. It should not be used in situations where a tight bound is required. On the other hand it is faster than \code{mpz_poly_product_max_bits()}. \end{quote} \bibliographystyle{amsalpha} \bibliography{flint} \end{document} flint-1.011/doc/flint-roadmap.tex0000644017361200017500000004231311025357252016550 0ustar tabbotttabbott% (C) 2007, William Hart, David Harvey % This file is part of FLINT. % FLINT is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % FLINT is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % You should have received a copy of the GNU General Public License % along with FLINT; if not, write to the Free Software % Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA \documentclass[a4paper,10pt]{article} \usepackage{amsfonts} \usepackage{amsmath} \usepackage{eucal} \usepackage{amscd} \usepackage{url} \usepackage{hyperref} \usepackage{listings} \usepackage{wrapfig} \urlstyle{sf} \addtolength{\oddsidemargin}{-0.75in} \addtolength{\evensidemargin}{-0.75in} \addtolength{\textwidth}{1.5in} \newcommand{\Z}{\mathbb{Z}} \newcommand{\N}{\mathbb{N}} \newcommand{\HH}{\mathcal{H}} \newcommand{\Q}{\mathbb{Q}} \newcommand{\I}{\mathbb{I}} \newcommand{\C}{\mathbb{C}} \newcommand{\R}{\mathbb{R}} \newcommand{\Pee}{\mathbb{P}} \newcommand{\EuO}{\mathcal{O}} \newcommand{\Qbar}{\overline{\mathbb{Q}}} \newcommand{\fn}{\hfill[Function]} \newcommand{\macro}{\hfill[Macro]} \newcommand{\gmp}{\hfill[GMP]} \newcommand{\ljk}[2]{\left(\frac{#1}{#2}\right)} \newcommand{\modulo}[1]{\;\left(\mbox{mod}\;#1\right)} \newcommand{\fr}{\mathfrak} \def\notdivides{\mathrel{\kern-3pt\not\!\kern4.5pt\bigm|}} \def\nmid{\notdivides} \def\nsubseteq{\mathrel{\kern-3pt\not\!\kern2.5pt\subseteq}} \parindent=0pt \parskip 4pt plus 2pt minus 2pt %\email{w.b.hart@maths.warwick.ac.uk} \title{FLINT: Fast Library for Number Theory} \author{William B. Hart} \begin{document} \maketitle \lstset{language=c} \lstset{escapeinside=\%\%} \section{Introduction} FLINT is a C library for doing number theory. It is released under the GPL and we encourage interested people to contribute and/or fork our code. FLINT will eventually have implementations of algorithms in number theory, specifically algebraic number theory, including p-adics. We have no plans to implement algebraic geometry, group theory or elliptic curve algorithms, but this may change if a suitable maintainer is found who would like to oversee such a project. FLINT is currently maintained by Bill Hart from Warwick University and David Harvey from Harvard. Although FLINT is designed as a standalone C library for direct use in C programs by number theorists, parts of FLINT will be made available for use in SAGE, maintained by William Stein. FLINT 1.0 is the first version of FLINT which will be a standalone C library with a documented interface which can be used by an end user. Its release date is December 1st 2007. \subsection{Code Base} FLINT is written entirely in C and all code must conform to the C99 standard. It must compile with the GCC toolset, available on most unix based systems. FLINT should depend only on tightly coded, highly respected libraries. In particular any function from GMP may be used. There is also an intention to add the packages fpLLL, mpfr, gf2x and GMP-ECM to FLINT in the near future. The code is maintained at a sourceforge SVN repository. The main development code is available at: https://flint.svn.sourceforge.net/svnroot/flint/trunk/ Released versions of FLINT are forked from the main trunk and stored in separate folders in the repository, e.g. FLINT 1.0 is at https://flint.svn.sourceforge.net/svnroot/flint/flint-1.0/ Various experimental branches are held at: https://flint.svn.sourceforge.net/svnroot/flint/branches/ Programmers who wish to fiddle with some new ideas can start a branch ad libitum and play with FLINT files there without affecting the main development code. \subsection{Website} The FLINT website is found at: http://www.flintlib.org/ Information about FLINT (pre)releases, progress updates and future directions can be found there. Profiles will also be displayed on the website for comparison with other comparable packages and projects. In addition, programmers can access the FLINT sourceforge project at: http://www.sourceforge.org/flint/ \subsection{Development forums} Sourceforge provides us with a development forum. Developers who wish to be added to the FLINT development list can send an email requesting addition to hart\_wb@yahoo.com In addition, on occasion, FLINT developers find it useful to discuss things on IRC. The channel for this is flint-dev on the irc.freenode.net server. \subsection{Performance} The aim is for all FLINT functions to be at least as fast as the comparable functions available in the open source projects of a similar nature. The more elaborate functions will be faster in FLINT than in other open source projects where possible, and sometimes significantly faster. In particular FLINT will perform as well as or better than NTL, Pari and LiDIA, which seem to be the most popular open source alternatives. FLINT will be regularly profiled and compared against these packages on a function by function basis. The more elaborate functions will have more elaborate profiles. We also aim to beat MAGMA where possible, however it won't be a condition for a release of FLINT to be made that all functions in FLINT perform better eveywhere, than their MAGMA counterparts. Profiles comparing FLINT with MAGMA will also be done regularly. However such a comparison is not fair to either FLINT or MAGMA, since MAGMA is an interpreted package, not a C library, and MAGMA is closed source and non-free, whereas FLINT is free and open source. \subsection{Testing} All functions available to an end user in FLINT will have a corresponding test function (to be written by the person who wrote the function, if no one else volunteers to do it for them). Also, all sufficiently sophisticated internal FLINT functions must have a corresponding test function. One line functions, which for example just return the value of some field of a structure, need not have a test function. The general strategies used for testing FLINT functions are: 1) Send a large amount of random data of varying sizes and parameters to the function where possible. 2) Use the special GMP functions for generating random integers with long strings of 1's and 0's where this is possible. 3) If there is an associated function which should undo the effect of the function being tested (e.g. an addition function and a corresponding subtraction function), test the functions against one another. 4) If possible, get the function to do a standard computation, the result of which can be checked, e.g. check a factoring function by feeding numbers which are the known product of random integers and check the result. 5) If no other form of testing is possible, write a very simple version of the function which performs very poorly perhaps, or which uses a much simpler algorithm but produces the same result and compare the outputs. 6) Always do sufficient "eyeball" tests, i.e. get the function to print its output to the screen and look at the output to see if it looks like it is returning vaguely reasonable looking results to the eye. 7) Check boundary cases and just either side of them. If it is only possible to test a function in situ (i.e. as part of a larger function which calls it), and a simpler version cannot be implemented to test against, insert checkpoints within each branch of the function and run random data through the function until such time as all branches have been worked. Explicitly check that all branches did what they were supposed to. For convenience, a macro called FLINT\_ASSERT is available. It works like a function which takes a condition as a parameter. If at that point in the code, the condition fails, then the assert will pick that up and tell you the line number where the assert failed. These are particularly useful to check that certain conditions were met after a branch executed. For FLINT\_ASSERT's to be operational, one needs to set the appropriate flag in flint.h. The functions for testing the functions in the module fmpz\_poly.c should all be in a file called fmpz_poly-test.c, etc. The final version of a test file should take approximately 1-2 seconds to test each function in the file being tested, where possible (sometimes a much longer time may be necessary). However, much more extensive tests should be run by the programmer when the function is first written, to ensure that the function works as expected in every conceivable situation, especially if the function is very involved. Such test code should be retained, but need not execute when a user executes a make test. Each final test function should print which function is being tested and then ok or fail. Examples of easy ways to set up such a test file can be found in the trunk of the development code, e.g. fmpz\_poly-test.c \subsection{Parallel Processing} FLINT will eventually support parallel processing at the thread level using pthreads. All functions that are sufficiently complicated will allow threads to be used. A global \#define USE\_THREADS in flint.h will specify whether threads should be used, and flint files using threads should contain \begin{verbatim}#include "flint.h" #ifdef USE_THREADS //code that makes use of threads #else //code that doesn't use threads #endif\end{verbatim} The files flint-threads.h and flint-threads.c will contain a flint thread manager. It will have a function which can be accessed which gives an upper limit on the number of new threads that should be created by a function which wants to create some new threads. All threaded functions should check how many threads it is allowed to create before creating any. It will also have various other helper functions for implementing more complicated threaded scenarios where threads will be kept hanging around waiting for work and woken up when work is available for them, or for implementing work stealing etc. \subsection{Memory Manager} FLINT has a memory manager. When we were implementing polynomial multiplication, we found that just allocating memory as needed with malloc, was too slow. It is hopelessly bad if the function is recursive. At the very least, functions should allocate as much of the memory as they need up front, then break it up as needed, rather than allocate lots of small chunks. But even this approach slows some things down. Thus we introduced a memory manager. The FLINT memory manager is included in files memory-manager.h and memory-manager.c. It is a stack based memory manager (or will be). Since stack based memory management is not ideal for threaded programs, it is implemented in a slightly strange way. Flint memory allocation functions automatically determine the current thread number. So if there are numerous threads running within a FLINT function (or indeed a program running multiple threads, each calling different FLINT functions), each thread that is started will have a different stack of memory allocated to it. However, the implementation details of the memory manager are irrelevant, since it will just work, regardless of how it is implemented. The only constraint in actual programming is that since the memory manager is stack based, any given thread will free memory in the reverse order to what it was allocated in the first place, e.g. \begin{verbatim}mp_limb_t * data1 = (mp_limb_t *) flint_stack_alloc(1000); mp_limb_t * data2 = (mp_limb_t *) flint_stack_alloc(2000); mp_limb_t * data3 = (mp_limb_t *) flint_stack_alloc(300); // intervening code flint_stack_release(); //free data3 flint_stack_release(); //free data2 flint_stack_release(); //free data1 \end{verbatim} Flint will automatically determine which thread made the call and allocate/deallocate from the correct stack. \subsection{FLINT modules} FLINT is implemented as a series of modules which perform related functions. Examples of modules are fmpz\_poly, fmpz, ZmodF, ZmodF_poly, mpz\_poly, etc. Each module has associated .c and .h files named after it, and an associated test file. E.g. the module fmpz\_poly contains functions for doing arithmetic with polynomials over the integers all of which are contained in fmpz\_poly.c and fmpz\_poly.h. The test file for fmpz\_poly will be called fmpz\_poly-test.h. Running ``make test'' will compile and run all test files in FLINT. To run a specific test program, one can just type the name of the module, e.g. ./fmpz\_poly-test, after all the test files have compiled. Files with names like fmpz\_poly-profile.c are for generating profiles for functions in fmpz\_poly. All such profile files are similar. To make all the profile files, one types ``make profile''. To run a specific profile, one types for example ./fmpz\_poly-profile once they have all compiled and a list of profile targets wil be given. Eventually FLINT will have all of the following modules: mpz\_extras - Arithmetic for GMP mpz\_t integers fmpz - Arithmetic for the FLINT ``flat'' multi=precision integer format long\_extras - Arithmetic for long/unsigned long integers including modulo arithmetic mpn\_extras - Arithmetic for integers in GMP mpn format ZmodF - Arithmetic for integers modulo a Fermat number $p = 2^nB+1$ where $B$ is the number of bits per limb Zmod - Arithmetic for $Z/nZ$ for a multi precision modulus $n$ Zp - padic arithmetic FF - Arithmetic for finite fields GF2 - Functions for arithmetic over GF2 \vspace{5mm} mpz\_poly - Polynomials over mpz\_t integers fmpz\_poly - Polynomials over integers in fmpz format ZmodF\_poly - Polynomial functions for polys mod a Fermat number Zmod\_poly - Polynomials over $Z/nZ$ for multiprecision $n$ zmod\_poly - Polynomials over $Z/nZ$ for $n$ an unsigned long Zp\_poly - Polys over padics GF2\_poly - Polys over GF2 \vspace{5mm} mpz\_mat - Linear Algebra over mpz\_t integers fmpz\_mat - Linear Algebra over integers in fmpz format Zmod\_mat - Linear Algebra over $Z/nZ$ for multiprecision $n$ zmod\_mat - Linear Algebra over $Z/nZ$ for an unsigned long $n$ Zp\_mat - Linear Algebra over padics GF2\_mat - Linear Algebra over GF2 \vspace{5mm} QFB - Binary quadratic forms QNF - Quadratic number fields QZeta - Cyclotomic number fields NF - General Number Fields \subsection{Introduction to the FLINT C files} The file flint.h contains all the univeral \#defines for flint, including ones that specify how many bits per limb the machine has, whether threads should be used and many other useful pieces of information. \section{mpz\_poly} The mpz\_poly interface has functions for doing arithmetic with polynomials defined over integers implemented as GMP mpz\_t's. The ``alloc'' field of the mpz\_poly\_t type specifies the number of coefficients which have been allocated and the ``length'' field specifies the current length of the polynomial. Alloc must be at least 1 but length can be 0 for the zero polynomial. Alloc should always be less than or equal to length. \section{fmpz\_poly} The fmpz\_poly interface has functions for doing arithmetic with polynomials defined over integers implemented as a special flint type which has a sign and magnitude. Each coefficient has a sign limb, followed by zero or more limbs (the number of which is specified by the absolute value of the sign limb) which contain a multiprecision coefficient. If the coefficient is zero, the sign limb is zero. If the sign limb is negative, the coefficient is interpreted to be negative, etc. However, each coefficient is allocated exactly the same number of limbs (even if not all of them are used in each coefficient). The number of limbs allocated for each limb (excluding the sign limb) is specified in the ``limbs'' field of the fmpz\_poly\_t type. The length of the polynomial is given by the ``length'' field and the ``alloc'' field specifies the number of currently allocated coefficients (length should always be less than or equal to alloc). Alloc may be 0 and length can be 0 for the zero polynomial. The fmpz\_poly module is divided into two halves. The first half implements functions beginning fmpz\_poly, which manage everything for the user. In particular, if the result of a function returns a polynomial which is too long to fit in the allocated space of the output polynomial the whole output polynomial is reallocated automatically. The other half of the module implements functions beginning \_fmpz\_poly. These functions do not allocate extra space and require the user to do the allocation in advance. This includes increasing the number of allocated coefficients and increasing the number of limbs allocated for each coefficient, as necessary. The useful feature of the \_fmpz\_poly functions is that one can specify a subset of the coefficients of a polynomial and operate on just those coefficients without copying them out to another polynomial first. As such, no such function should modify the ``limbs'' field of any fmpz\_poly\_t's that are passed to it. These functions should also never even look at the ``alloc'' field, since it is not even guaranteed to be set. \end{document} flint-1.011/doc/flint-1.0.11.tex0000644017361200017500000026772011035134632015652 0ustar tabbotttabbott% (C) 2007, William Hart, David Harvey % This file is part of FLINT. % FLINT is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % FLINT is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % You should have received a copy of the GNU General Public License % along with FLINT; if not, write to the Free Software % Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA \documentclass[a4paper,10pt]{article} \usepackage{amsfonts} \usepackage{amsmath} \usepackage{eucal} \usepackage{amscd} \usepackage{url} \usepackage{hyperref} \usepackage{listings} \usepackage{wrapfig} \urlstyle{sf} \addtolength{\oddsidemargin}{-0.75in} \addtolength{\evensidemargin}{-0.75in} \addtolength{\textwidth}{1.5in} \newcommand{\Z}{\mathbb{Z}} \newcommand{\N}{\mathbb{N}} \newcommand{\HH}{\mathcal{H}} \newcommand{\Q}{\mathbb{Q}} \newcommand{\I}{\mathbb{I}} \newcommand{\C}{\mathbb{C}} \newcommand{\R}{\mathbb{R}} \newcommand{\Pee}{\mathbb{P}} \newcommand{\EuO}{\mathcal{O}} \newcommand{\Qbar}{\overline{\mathbb{Q}}} \newcommand{\fn}{\hfill[Function]} \newcommand{\macro}{\hfill[Macro]} \newcommand{\gmp}{\hfill[GMP]} \newcommand{\code}{\lstinline} \newcommand{\ljk}[2]{\left(\frac{#1}{#2}\right)} \newcommand{\modulo}[1]{\;\left(\mbox{mod}\;#1\right)} \newcommand{\fr}{\mathfrak} \def\notdivides{\mathrel{\kern-3pt\not\!\kern4.5pt\bigm|}} \def\nmid{\notdivides} \def\nsubseteq{\mathrel{\kern-3pt\not\!\kern2.5pt\subseteq}} \parindent=0pt \parskip 4pt plus 2pt minus 2pt %\email{w.b.hart@maths.warwick.ac.uk} \title{FLINT 1.0.11: Fast Library for Number Theory} \author{William B. Hart and David Harvey} \begin{document} \maketitle \tableofcontents \lstset{language=c} \lstset{basicstyle=\ttfamily} \lstset{keywordstyle=} %\lstset{morekeywords={mpz_t,mpz_poly_t,fmpz_poly_t}} \lstset{escapeinside=\%\%} \section{Introduction} FLINT is a C library of functions for doing number theory. It is highly optimised and can be compiled on numerous platforms. FLINT also has the aim of providing support for multicore and multiprocessor computer architectures, though we do not yet provide this facility. FLINT is currently maintained by William Hart of Warwick University in the UK and David Harvey of Harvard University in the US. As of version 1.0, FLINT compiles on and supports 32 and 64 bit x86 processors, the G5 and Alpha processors, though in theory it compiles on any machine with gcc version 3.4 or later and with GMP version 4.2.1 or later. FLINT is supplied as a set of modules, \code{fmpz}, \code{fmpz_poly}, etc., each of which can be linked to a C program making use of their functionality. All of the functions in FLINT have a corresponding test function provided in an appropriately named test file, e.g: all the functions in the file \code{fmpz_poly.c} have test functions in the file \code{fmpz_poly-test.c}. \section{Building and using FLINT} The easiest way to use FLINT is to build a shared library. Simply download the FLINT tarball and untar it on your system. Next, set the environment variables \code{FLINT_GMP_LIB_DIR} and \code{FLINT_GMP_INCLUDE_DIR} to point to your GMP library and include directories respectively. Also set the environment variables \code{FLINT_NTL_LIB_DIR} and \code{FLINT_NTL_INCLUDE_DIR} to point to your NTL library and include directories respectively. Next type: \code{source flint_env} in the main directory of the FLINT directory tree. Finally type: \code{make library} Move the library file \code{libflint.so}, \code{libflint.dll} or \code{libflint.dylib} (depending on your platform) into your library path and move all the .h files in the main directory of FLINT into your include path. Now to use FLINT, simply include the appropriate header files for the FLINT modules you wish to use in your C program. Then compile your program, linking against the FLINT library and GMP with the options \code{-lflint -lgmp}. If you are using the NTL-interface, you will also need to link against NTL with the \code{-lntl} linker option. \section{Test code} Each module of FLINT has an extensive associated test module. We strongly recommend running the test programs before relying on results from FLINT on your system. To make the test programs, simply type: \code{make test} in the main FLINT directory. The following is a list of the test programs which should be run: \code{mpn_extras-test} \code{fmpz_poly-test} \code{fmpz-test} \code{ZmodF-test} \code{ZmodF_poly-test} \code{mpz_poly-test} \code{ZmodF_mul-test} \code{long_extras-test} \code{zmod_poly-test} \code{NTL-interface-test} \section{Reporting bugs} The maintainers wish to be made aware of any and all bugs. Please send an email with your bug report to hart\_wb@yahoo.com. If possible please include details of your system, version of gcc, version of GMP and precise details of how to replicate the bug. Note that FLINT needs to be linked against version 4.2.1 or later of GMP and must be compiled with gcc version 3.4 or later. \section{Example programs} FLINT comes with a number of example programs to demonstrate current and future FLINT features. To make the example programs, type: \code{make examples} The current example programs are: \code{delta_qexp} Compute the first $n$ terms of the delta function, e.g. \code{delta_qexp 1000000} will compute the first one million terms of the $q$-expansion of delta. \code{BPTJCubes} Implements the algorithm of Beck, Pine, Tarrant and Jensen for finding solutions to the equation $x^3+y^3+z^3 = k$. \code{bernoulli_zmod} Compute bernoulli numbers modulo a large number of primes. \code{expmod} Computes a very large modular exponentiation. \section{FLINT macros} In the file flint.h are various useful macros. The macro constant \code{FLINT_BITS} is set at compile time to be the number of bits per limb on the machine. FLINT requires it to be either 32 or 64 bits. Other architectures are not currently supported. The macro constant \code{FLINT_D_BITS} is set at compile time to be the number of bits per double on the machine or the number of bits per limb, whichever is smaller. This will have the value 53 or 32 on currently supported architectures. Numerous functions using precomputed inverses only support operands up to \code{FLINT_D_BITS - 1} bits, hence the macro. \code{FLINT_ABS(x)} returns the absolute value of a \code{long x}. \code{FLINT_MIN(x, y)} returns the minimum of two \code{long} or two \code{unsigned long} values \code{x} and \code{y}. \code{FLINT_MAX(x, y)} returns the maximum of two \code{long} or two \code{unsigned long} values \code{x} and \code{y}. \code{FLINT_BIT_COUNT(x)} returns the number of binary bits required to represent an \code{unsigned long x}. \section{The fmpz\_poly module} The \code{fmpz_poly_t} data type represents elements of $\Z[x]$. The \code{fmpz_poly} module provides routines for memory management, basic arithmetic, and conversions to/from other types. Each coefficient of an \code{fmpz_poly_t} is an integer of the FLINT \code{fmpz_t} type. Unless otherwise specified, all functions in this section permit aliasing between their input arguments and between their input and output arguments. \subsection{Simple example} The following example computes the square of the polynomial $5x^3 - 1$. \begin{lstlisting} #include "fmpz_poly.h" .... fmpz_poly_t x, y; fmpz_poly_init(x); fmpz_poly_init(y); fmpz_poly_set_coeff_ui(x, 3, 5); fmpz_poly_set_coeff_si(x, 0, -1); fmpz_poly_mul(y, x, x); fmpz_poly_print(x); printf("\n"); fmpz_poly_print(y); printf("\n"); fmpz_poly_clear(x); fmpz_poly_clear(y); \end{lstlisting} The output is: \begin{lstlisting} 4 -1 0 0 5 7 1 0 0 -10 0 0 25 \end{lstlisting} \subsection{Definition of the fmpz\_poly\_t polynomial type} The \code{fmpz_poly_t} type is a typedef for an array of length 1 of \code{fmpz_poly_struct}'s. This permits passing parameters of type \code{fmpz_poly_t} `by reference' in a manner similar to the way GMP integers of type \code{mpz_t} can be passed by reference. In reality one never deals directly with the struct and simply deals with objects of type \code{fmpz_poly_t}. For simplicity we will think of an \code{fmpz_poly_t} as a struct, though in practice to access fields of this struct, one needs to dereference first, e.g. to access the \code{length} field of an \code{fmpz_poly_t} called \code{poly1} one writes \code{poly1->length}. An \code{fmpz_poly_t} is said to be \emph{normalised} if either \code{length == 0}, or if the final coefficient is nonzero. All \code{fmpz_poly} functions expect their inputs to be normalised, and unless otherwise specified they produce output that is normalised. It is recommended that users do not access the fields of an \code{fmpz_poly_t} or its coefficient data directly, but make use of the functions designed for this purpose (detailed below). Functions in \code{fmpz_poly} do all the memory management for the user. One does not need to specify the maximum length or number of limbs per coefficient in advance before using a polynomial object. FLINT reallocates space automatically as the computation proceeds, if more space is required. We now describe the functions available in \code{fmpz_poly}. \subsection{Initialisation and memory management} \begin{lstlisting} void fmpz_poly_init(fmpz_poly_t poly) \end{lstlisting} \begin{quote} Initialise an \code{fmpz_poly_t} for use. The length of \code{poly} is set to zero. A corresponding call to \code{fmpz_poly_clear} must be made after finishing with the \code{fmpz_poly_t} to free the memory used by the polynomial. For efficiency reasons, a call to \code{fmpz_poly_init} does not actually allocate any memory for coefficients. Each of the functions will automatically allocate any space needed for coefficients and in fact the easiest way to use \code{fmpz_poly} is to let FLINT do all the allocation automatically. To this end, a user need only ever make calls to the \code{fmpz_poly_init} and \code{fmpz_poly_clear} memory management functions if they so wish. Naturally, more efficient code may result if the other memory management functions are also used. \end{quote} \begin{lstlisting} void fmpz_poly_realloc(fmpz_poly_t poly, unsigned long alloc) \end{lstlisting} \begin{quote} Shrink or expand the polynomial so that it has space for precisely \code{alloc} coefficients. If \code{alloc} is less than the current length, the polynomial is truncated (and then normalised), otherwise the coefficients and current length remain unaffected. If the parameter \code{alloc} is zero, any space currently allocated for coefficients in \code{poly} is free'd. A subsequent call to \code{fmpz_poly_clear} is still permitted and does nothing. \end{quote} \begin{lstlisting} void fmpz_poly_fit_length(fmpz_poly_t poly, unsigned long alloc) \end{lstlisting} \begin{quote} Expand the polynomial (if necessary) so that it has space for at least \code{alloc} coefficients. This function will never shrink the memory allocated for coefficients and the contents of the existing coefficients and the current length remain unaffected. \end{quote} \begin{lstlisting} void fmpz_poly_fit_limbs(fmpz_poly_t poly, unsigned long limbs) \end{lstlisting} \begin{quote} Currently all the coefficients of an \code{fmpz_poly_t} have the same number of limbs of space allocated for them (plus an additional limb for the sign/size limb). This function can be used to increase the space allocated for the coefficients. As all functions in the \code{fmpz_poly} module automatically manage memory allocation for the user, this function should only be used when directly manipulating the coefficients by means of the functions in the \code{fmpz} module (described below). In a later version of FLINT, this function will become defunct, as FLINT will automatically reallocate \code{fmpz_t}'s when there is insufficient space, and this will include polynomial coefficients. \end{quote} \begin{lstlisting} void fmpz_poly_clear(fmpz_poly_t poly)\end{lstlisting} \begin{quote} Free all memory used by the coefficients of \code{poly}. The polynomial object \code{poly} cannot be used again until a subsequent call to an initialisation function is made. \end{quote} \subsection{Setting/retrieving coefficients} \begin{lstlisting} void fmpz_poly_get_coeff_mpz(mpz_t x, const fmpz_poly_t poly, unsigned long n) \end{lstlisting} \begin{quote} Retrieve coefficient $n$ as an \code{mpz_t}. Coefficients are numbered from zero, starting with the constant coefficient. Sets \code{x} to zero when $n >= $ \code{poly->length}. \end{quote} \begin{lstlisting} void fmpz_poly_get_coeff_mpz_read_only(mpz_t x, const fmpz_poly_t poly, unsigned long n) \end{lstlisting} \begin{quote} Retrieve coefficient $n$ as a read only \code{mpz_t}. The function must be passed an uninitialised \code{mpz_t}. The \code{mpz_t} can then be used as an input to a GMP functions, but not as an output. Its contents may be inspected, but not alterered. This function is faster than \code{fmpz_poly_get_coeff_mpz} which makes an extra copy of the data. Coefficients are numbered from zero, starting with the constant coefficient. Sets \code{x} to zero when $n >= $ \code{poly->length}. \end{quote} \begin{lstlisting} void fmpz_poly_set_coeff_mpz(fmpz_poly_t poly, unsigned long n, mpz_t x) \end{lstlisting} \begin{quote} Set coefficient $n$ to the value of the given \code{mpz_t}. Coefficients are numbered from zero, starting with the constant coefficient. If $n$ represents a coefficient beyond the current length of \code{poly}, zero coefficients are added in between the existing coefficients and the new coefficient, if required. \end{quote} \begin{lstlisting} void fmpz_poly_get_coeff_fmpz(fmpz_t x, const fmpz_poly_t poly, unsigned long n) \end{lstlisting} \begin{quote} Retrieve coefficient $n$ as an \code{fmpz_t}. Coefficients are numbered from zero, starting with the constant coefficient. Sets \code{x} to zero when $n >= $ \code{poly->length}. \end{quote} \begin{lstlisting} void fmpz_poly_set_coeff_fmpz(fmpz_poly_t poly, unsigned long n, fmpz_t x) \end{lstlisting} \begin{quote} Set coefficient $n$ to the value of the given \code{fmpz_t}. Coefficients are numbered from zero, starting with the constant coefficient. If $n$ represents a coefficient beyond the current length of \code{poly}, zero coefficients are added in between the existing coefficients and the new coefficient, if required. \end{quote} \begin{lstlisting} unsigned long fmpz_poly_get_coeff_ui(const fmpz_poly_t poly, unsigned long n) \end{lstlisting} \begin{quote} Return the absolute value of coefficient $n$ as an \code{unsigned long}. Coefficients are numbered from zero, starting with the constant coefficient. If the coefficient is longer than a single limb, the first limb is returned. Returns zero when $n >= $ \code{poly->length}. \end{quote} \begin{lstlisting} void fmpz_poly_set_coeff_ui(fmpz_poly_t poly, unsigned long n, unsigned long x) \end{lstlisting} \begin{quote} Set coefficient $n$ to the value of the given \code{unsigned long}. Coefficients are numbered from zero, starting with the constant coefficient. If $n$ represents a coefficient beyond the current length of \code{poly}, zero coefficients are added in between the existing coefficients and the new coefficient, if required. \end{quote} \begin{lstlisting} long fmpz_poly_get_coeff_si(const fmpz_poly_t poly, unsigned long n) \end{lstlisting} \begin{quote} Return the value of coefficient $n$ as a \code{long}. Coefficients are numbered from zero, starting with the constant coefficient. If the coefficient will not fit into a \code{long}, i.e. if its absolute value takes up more than \code{FLINT_BITS - 1} bits then the result is undefined. Returns zero when $n >= $ \code{poly->length}. \end{quote} \begin{lstlisting} void fmpz_poly_set_coeff_si(fmpz_poly_t poly, unsigned long n, long x) \end{lstlisting} \begin{quote} Set coefficient $n$ to the value of the given \code{long}. Coefficients are numbered from zero, starting with the constant coefficient. If $n$ represents a coefficient beyond the current length of \code{poly}, zero coefficients are added in between the existing coefficients and the new coefficient, if required. \end{quote} \begin{lstlisting} fmpz_t fmpz_poly_get_coeff_ptr(fmpz_poly_t poly, unsigned long n) \end{lstlisting} \begin{quote} Return a reference to coefficient $n$ (as an \code{fmpz_t}). This function is provided so that individual coefficients can be accessed and operated on by functions in the \code{fmpz} module. This function does not make a copy of the data, but returns a reference to the actual coefficient. Coefficients are numbered from zero, starting with the constant coefficient. Returns NULL when $n >= $ \code{poly->length}. \end{quote} \begin{lstlisting} fmpz_t fmpz_poly_lead(const fmpz_poly_t poly) \end{lstlisting} \begin{quote} Return a reference to leading coefficient (as an \code{fmpz_t}) of \code{poly}. This function is provided so that the leading coefficient can be easily accessed and operated on by functions in the \code{fmpz} module. This function does not make a copy of the data, but returns a reference to the actual coefficient. Returns NULL when the polynomial has length zero. \end{quote} \subsection{String conversions and I/O} The functions in this section are not intended to be particularly fast. They are intended mainly as a debugging aid. For the string output functions there are two variants. The first uses a simple string representation of polynomials which prints only the length of the polynomial and the integer coefficients, whilst the latter variant (appended with \code{_pretty}) uses a more traditional string representation of polynomials which prints a variable name as part of the representation. The first string representation is given by a sequence of integers, in decimal notation, separated by whitespace. The first integer gives the length of the polynomial; the remaining \code{length} integers are the coefficients. For example $5x^3 - x + 1$ is represented by the string ``\code{4 1 -1 0 5}'', and the zero polynomial is represented by ``\code{0}''. The coefficients may be signed and arbitrary precision. The string representation of the functions appended by \code{_pretty} includes only the non-zero terms of the polynomial, starting with the one of highest degree. Each term starts with a coefficient, prepended with a sign (positive or negative), followed by the character \code{*}, followed by a variable name, which must be passed as a string parameter to the function, followed by a carot \code{^} followed by a non-negative exponent. If the sign of the leading coefficient is positive, it is omitted. Also the exponents of the degree 1 and 0 terms are omitted, as is the variable and the \code{*} character in the case of the degree 0 coefficient. If the coefficient is plus or minus one, the coefficient is omitted, except for the sign. Some examples of the \code{_pretty} representation are: \begin{lstlisting} 5*x^3+7*x-4 x^2+3 -x^4+2*x-1 x+1 5 \end{lstlisting} \begin{lstlisting} int fmpz_poly_from_string(fmpz_poly_t poly, const char* s) \end{lstlisting} \begin{quote} Import a polynomial from a string. If the string represents a valid polynomial the function returns 1, otherwise it returns 0. \end{quote} \begin{lstlisting} char* fmpz_poly_to_string(const fmpz_poly_t poly) char* fmpz_poly_to_string_pretty(const fmpz_poly_t poly, const char * x) \end{lstlisting} \begin{quote} Convert a polynomial to a string and return a pointer to the string. Space is allocated for the string by this function and must be freed when it is no longer used, by a call to \code{free}. The \code{pretty} version must be supplied with a string \code{x} which represents the variable name to be used when printing the polynomial. \end{quote} \begin{lstlisting} void fmpz_poly_fprint(const fmpz_poly_t poly, FILE* f) void fmpz_poly_fprint_pretty(const fmpz_poly_t poly, FILE* f, const char * x) \end{lstlisting} \begin{quote} Convert a polynomial to a string and write it to the given stream. The \code{pretty} version must be supplied with a string \code{x} which represents the variable name to be used when printing the polynomial. \end{quote} \begin{lstlisting} void fmpz_poly_print(const fmpz_poly_t poly) void fmpz_poly_print_pretty(const fmpz_poly_t poly, const char * x) \end{lstlisting} \begin{quote} Convert a polynomial to a string and write it to \code{stdout}. The \code{pretty} version must be supplied with a string \code{x} which represents the variable name to be used when printing the polynomial. \end{quote} \begin{lstlisting} void fmpz_poly_fread(fmpz_poly_t poly, FILE* f) \end{lstlisting} \begin{quote} Read a polynomial from the given stream. Return 1 if the data from the stream represented a valid polynomial, otherwise return 0. \end{quote} \begin{lstlisting} void fmpz_poly_read(fmpz_poly_t poly) \end{lstlisting} \begin{quote} Read a polynomial from \code{stdin}. Return 1 if the data read from \code{stdin} represented a valid polynomial, otherwise return 0. \end{quote} \subsection{Polynomial parameters (length, degree, max limbs, etc.)} \begin{lstlisting} long fmpz_poly_degree(const fmpz_poly_t poly) \end{lstlisting} \begin{quote} Return \code{poly->length - 1}. The zero polynomial is defined to have degree $-1$. \end{quote} \begin{lstlisting} unsigned long fmpz_poly_length(const fmpz_poly_t poly) \end{lstlisting} \begin{quote} Return \code{poly->length}. The zero polynomial is defined to have length $0$. \end{quote} \begin{lstlisting} unsigned long fmpz_poly_max_limbs(const fmpz_poly_t poly) \end{lstlisting} \begin{quote} Returns the maximum number of limbs required to store the absolute value of coefficients of \code{poly}. \end{quote} \begin{lstlisting} long fmpz_poly_max_bits(const fmpz_poly_t poly) \end{lstlisting} \begin{quote} Computes the maximum number of bits $b$ required to store the absolute value of coefficients of \code{poly}. If all the coefficients of \code{poly} are non-negative, $b$ is returned, otherwise $-b$ is returned. \end{quote} \begin{lstlisting} long fmpz_poly_max_bits1(const fmpz_poly_t poly) \end{lstlisting} \begin{quote} Computes the maximum number of bits $b$ required to store the absolute value of coefficients of \code{poly}. If all the coefficients of \code{poly} are non-negative, $b$ is returned, otherwise $-b$ is returned. The assumption is made that the absolute value of each coefficient fits into an unsigned long. This function will be more efficient than the more general \code{fmpz_poly_max_bits} in this situation. \end{quote} \subsection{Assignment and basic manipulation} \begin{lstlisting} void fmpz_poly_set(fmpz_poly_t output, const fmpz_poly_t poly) \end{lstlisting} \begin{quote} Set polynomial \code{output} equal to the polynomial \code{poly}. \end{quote} \begin{lstlisting} void fmpz_poly_swap(fmpz_poly_t poly1, fmpz_poly_t poly2) \end{lstlisting} \begin{quote} Efficiently swap two polynomials. The coefficients are not moved in memory, pointers are simply switched. \end{quote} \begin{lstlisting} void fmpz_poly_zero(fmpz_poly_t poly) \end{lstlisting} \begin{quote} Set the polynomial to the zero polynomial. \end{quote} \begin{lstlisting} void fmpz_poly_zero_coeffs(fmpz_poly_t poly, unsigned long n) \end{lstlisting} \begin{quote} Set the first $n$ coefficients of \code{poly} to zero. \end{quote} \begin{lstlisting} void fmpz_poly_neg(fmpz_poly_t output, fmpz_poly_t poly) \end{lstlisting} \begin{quote} Negate the polynomial \code{poly}, i.e. set \code{output} to \code{-poly}. \end{quote} \begin{lstlisting} void fmpz_poly_truncate(fmpz_poly_t poly, const unsigned long trunc) \end{lstlisting} \begin{quote} If \code{trunc} is less than the current length of the polynomial, truncate the polynomial to that length. Note that as the function normalises its output, the eventual length of the polynomial may be less than \code{trunc}. \end{quote} \begin{lstlisting} void fmpz_poly_reverse(fmpz_poly_t output, const fmpz_poly_t poly, unsigned long length) \end{lstlisting} \begin{quote} This function considers the polynomial \code{poly} to be of length $n$, notionally truncating and zero padding if required, and reverses the result. Since this function normalises its result the eventual length of \code{output} may be less than \code{length}. \end{quote} \begin{lstlisting} void fmpz_poly_normalise(fmpz_poly_t poly) \end{lstlisting} \begin{quote} This function normalises \code{poly} so that the leading coefficient is non-zero (or the polynomial is the zero polynomial). As all functions in \code{fmpz_poly} expect and return normalised polynomials, this function is only used when manipulating the coefficients directly by making use of the functions in the \code{fmpz} module (described below). \end{quote} \subsection{Conversions} \begin{lstlisting} void fmpz_poly_to_zmod_poly(zmod_poly_t zpol, fmpz_poly_t fpol) \end{lstlisting} \begin{quote} Reduce the coefficients of the \code{fmpz_poly_t fpol} mod the modulus of the \code{zmod_poly_t zpol} and store the result in \code{zpol}. This function is provided to enable the implementation of multimodular algorithms. \end{quote} \begin{lstlisting} void zmod_poly_to_fmpz_poly_unsigned(fmpz_poly_t fpol, zmod_poly_t zpol) \end{lstlisting} \begin{quote} Convert the \code{zmod_poly_t zpol} to an \code{fmpz_poly_t}. The coefficients of the \code{fmpz_poly_t} will all be unsigned. \end{quote} \begin{lstlisting} void zmod_poly_to_fmpz_poly(fmpz_poly_t fpol, zmod_poly_t zpol) \end{lstlisting} \begin{quote} Convert the \code{zmod_poly_t zpol} to an \code{fmpz_poly_t}. If \code{p} is the modulus of \code{zpol} then coefficients which lie in $[0, p/2]$ are unchanged, however, coefficients $a$ in the range $(p/2, p)$ become $a - p$. This function is provided to enable the implementation of multimodular algorithms. \end{quote} \subsection{Chinese remaindering} \begin{lstlisting} int fmpz_poly_CRT_unsigned(fmpz_poly_t res, fmpz_poly_t fpol, zmod_poly_t zpol, fmpz_t newmod, fmpz_t oldmod) \end{lstlisting} \begin{quote} Performs modular recombination using the Chinese Remainder Theorem. If \code{zpol} has modulus $p$, \code{newmod} is set equal to \code{oldmod*p} and each coefficient of \code{res} is set to the unique value modulo \code{newmod}, in the range $[0, \mbox{newmod})$ which is $a$ modulo \code{oldmod} and $b$ modulo $p$, where $a$ is the coefficient of \code{fpol} and $b$ is the corresponding coefficient of \code{zpol}. The coefficients of \code{fpol} are assumed to be unsigned. \end{quote} \begin{lstlisting} int fmpz_poly_CRT(fmpz_poly_t res, fmpz_poly_t fpol, zmod_poly_t zpol, fmpz_t newmod, fmpz_t oldmod) \end{lstlisting} \begin{quote} Performs modular recombination using the Chinese Remainder Theorem. If \code{zpol} has modulus $p$, \code{newmod} is set equal to \code{oldmod*p} and each coefficient of \code{res} is set to the unique value modulo \code{newmod}, in the range $(-\mbox{newmod}/2, \mbox{newmod}/2]$ which is $a$ modulo \code{oldmod} and $b$ modulo $p$, where $a$ is the coefficient of \code{fpol} and $b$ is the corresponding coefficient of \code{zpol}. \end{quote} \subsection{Comparison} \begin{lstlisting} int fmpz_poly_equal(const fmpz_poly_t poly1, const fmpz_poly_t poly2) \end{lstlisting} \begin{quote} Return 1 if the two polynomials are equal, 0 otherwise. \end{quote} \subsection{Shifting} \begin{lstlisting} void fmpz_poly_left_shift(fmpz_poly_t output, const fmpz_poly_t poly, unsigned long n) \end{lstlisting} \begin{quote} Shift poly to the left by $n$ coefficients (multiply by $x^n$) and write the result to \code{output}. Zero coefficients are inserted. The parameter $n$ must be non-negative, but can be zero. \end{quote} \begin{lstlisting} void fmpz_poly_right_shift(fmpz_poly_t output, const fmpz_poly_t poly, unsigned long n) \end{lstlisting} \begin{quote} Shift poly to the right by $n$ coefficients (divide by $x^n$ and discard the remainder) and write the result to \code{output}. The parameter $n$ must be non-negative, but can be zero. Shifting right by more than the current length of the polynomial results in the zero polynomial. \end{quote} \subsection{Norms} \begin{lstlisting} void fmpz_poly_2norm(fmpz_t norm, fmpz_poly_t pol) \end{lstlisting} \begin{quote} Sets \code{norm} to the euclidean norm of \code{pol}, i.e. the integer square root of the sum of the squares of the coefficients of \code{pol}. \end{quote} \subsection{Addition/subtraction} \begin{lstlisting} void fmpz_poly_add(fmpz_poly_t output, const fmpz_poly_t poly1, const fmpz_poly_t poly2) \end{lstlisting} \begin{quote} Set the output to the sum of the input polynomials. Note that if \code{poly1} and \code{poly2} have the same length, cancellation may occur (if the leading coefficients have the same absolute values but opposite signs) and so the result may have less coefficients than either of the inputs. \end{quote} \begin{lstlisting} void fmpz_poly_sub(fmpz_poly_t output, const fmpz_poly_t poly1, const fmpz_poly_t poly2) \end{lstlisting} \begin{quote} Set the output to \code{poly1 - poly2}. Note that if \code{poly1} and \code{poly2} have the same length, cancellation may occur (if the leading coefficients have the same values) and so the result may have less coefficients than either of the inputs. \end{quote} \subsection{Scalar multiplication and division} \begin{lstlisting} void fmpz_poly_scalar_mul_ui(fmpz_poly_t output, const fmpz_poly_t poly, unsigned long x) \end{lstlisting} \begin{quote} Multiply \code{poly} by the \code{unsigned long x} and write the result to \code{output}. \end{quote} \begin{lstlisting} void fmpz_poly_scalar_mul_si(fmpz_poly_t output, const fmpz_poly_t poly, long x) \end{lstlisting} \begin{quote} Multiply \code{poly} by the \code{long x} and write the result to \code{output}. \end{quote} \begin{lstlisting} void fmpz_poly_scalar_mul_fmpz(fmpz_poly_t output, const fmpz_poly_t poly, const fmpz_t x) \end{lstlisting} \begin{quote} Multiply \code{poly} by the \code{fmpz_t x} and write the result to \code{output}. \end{quote} \begin{lstlisting} void fmpz_poly_scalar_mul_mpz(fmpz_poly_t output, const fmpz_poly_t poly, const mpz_t x) \end{lstlisting} \begin{quote} Multiply \code{poly} by the \code{mpz_t x} and write the result to \code{output}. \end{quote} \begin{lstlisting} void fmpz_poly_scalar_div_ui(fmpz_poly_t output, const fmpz_poly_t poly, unsigned long x) \end{lstlisting} \begin{quote} Divide \code{poly} by the \code{unsigned long x}, round quotients towards minus infinity, discard remainders and write the result to \code{output}. \end{quote} \begin{lstlisting} void fmpz_poly_scalar_div_si(fmpz_poly_t output, const fmpz_poly_t poly, long x) \end{lstlisting} \begin{quote} Divide \code{poly} by the \code{long x}, round quotients towards minus infinity, discard remainders and write the result to \code{output}. \end{quote} \begin{lstlisting} void fmpz_poly_scalar_tdiv_ui(fmpz_poly_t output, const fmpz_poly_t poly, unsigned long x) \end{lstlisting} \begin{quote} Divide \code{poly} by the \code{unsigned long x}, round quotients towards zero, discard remainders and write the result to \code{output}. \end{quote} \begin{lstlisting} void fmpz_poly_scalar_tdiv_si(fmpz_poly_t output, const fmpz_poly_t poly, long x) \end{lstlisting} \begin{quote} Divide \code{poly} by the \code{long x}, round quotients towards zero, discard remainders and write the result to \code{output}. \end{quote} \begin{lstlisting} void fmpz_poly_scalar_div_exact_ui(fmpz_poly_t output, const fmpz_poly_t poly, unsigned long x) \end{lstlisting} \begin{quote} Divide \code{poly} by the \code{unsigned long x}. Division is assumed to be exact and the result is undefined otherwise. \end{quote} \begin{lstlisting} void fmpz_poly_scalar_div_exact_si(fmpz_poly_t output, const fmpz_poly_t poly, long x) \end{lstlisting} \begin{quote} Divide \code{poly} by the \code{long x}. Division is assumed to be exact and the result is undefined otherwise. \end{quote} \begin{lstlisting} void fmpz_poly_scalar_div_fmpz(fmpz_poly_t output, const fmpz_poly_t poly, const fmpz_t x) \end{lstlisting} \begin{quote} Divide \code{poly} by the \code{fmpz_t x}, round quotients towards minus infinity, discard remainders, and write the result to \code{output}. \end{quote} \begin{lstlisting} void fmpz_poly_scalar_div_mpz(fmpz_poly_t output, const fmpz_poly_t poly, const mpz_t x) \end{lstlisting} \begin{quote} Divide \code{poly} by the \code{mpz_t x}, round quotients towards minus infinity, discard remainders, and write the result to \code{output}. \end{quote} \subsection{Polynomial multiplication} \begin{lstlisting} void fmpz_poly_mul(fmpz_poly_t output, const fmpz_poly_t poly1, const fmpz_poly_t poly2) \end{lstlisting} \begin{quote} Multiply the two given polynomials and return the result in \code{output}. The length of the output polynomial will be \code{poly1->length + poly2->length - 1}. \end{quote} \begin{lstlisting} void fmpz_poly_mul_trunc_n(fmpz_poly_t output, const fmpz_poly_t poly1, const fmpz_poly_t poly2, unsigned long n) \end{lstlisting} \begin{quote} Multiply the two given polynomials and truncate the result to $n$ coefficients, storing the result in \code{output}. This is sometimes known as a short product. The length of the output polynomial will be at most the minimum of $n$ and the value \code{poly1->length + poly2->length - 1}. It is permissible to set $n$ to any non-negative value, however the function is optimised for $n$ about half of \code{poly1->length + poly2->length}. This function is more efficient than multiplying the two polynomials then truncating. It is the operation used when multiplying power series. \end{quote} \begin{lstlisting} void fmpz_poly_mul_trunc_left_n(fmpz_poly_t output, const fmpz_poly_t poly1, const fmpz_poly_t poly2, unsigned long n) \end{lstlisting} \begin{quote} Multiply the two given polynomials storing the result in \code{output}. This function guarantees all the coefficients except the first $n$, which may be arbitrary. This is sometimes known as an opposite short product. The length of the output polynomial will be \code{poly1->length + poly2->length - 1} unless $n$ is greater than or equal to this value, in which case it will return the zero polynomial. It is permissible to set $n$ to any non-negative value, however the function is optimised for $n$ about half of \code{poly1->length + poly2->length}. For short polynomials, this function is more efficient than computing the full product. \end{quote} \subsection{Polynomial division} \begin{lstlisting} void fmpz_poly_divrem(fmpz_poly_t Q, fmpz_poly_t R, const fmpz_poly_t A, const fmpz_poly_t B) \end{lstlisting} \begin{quote} Performs division with remainder in $\Z[x]$. Computes polynomials \code{Q} and \code{R} in $\Z[x]$ such that the equation \code{A = B*Q + R}, holds. All but the final \code{B->length - 1} coefficients of \code{R} will be positive and less than the absolute value of the lead coefficient of \code{B}. Note that in the special cases where the leading coefficient of \code{B} is $\pm 1$ or \code{A = B*Q} for some polynomial \code{Q}, the result of this function is the same as if the computation had been done over $\Q$. \end{quote} \begin{lstlisting} void fmpz_poly_div(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B) \end{lstlisting} \begin{quote} Performs division without remainder in $\Z[x]$. The computation returns the same result as \code{fmpz_poly_divrem}, but no remainder is computed. This is in general faster than computing quotient and remainder. Note that in the special cases where the leading coefficient of \code{B} is $\pm 1$ or \code{A = B*Q} for some polynomial \code{Q}, the result of this function is the same as if the computation had been done over $\Q$. In particular it can be used efficiently for exact division in $\Z[x]$. \end{quote} \begin{lstlisting} void fmpz_poly_div_series(fmpz_poly_t Q, const fmpz_poly_t A, const fmpz_poly_t B, unsigned long n) \end{lstlisting} \begin{quote} Performs power series division in $\Z[[x]]$. The function considers the polynomials \code{A} and \code{B} to be power series of length $n$ starting with the constant terms. The function assumes that \code{B} is normalised, i.e. that the constant coefficient is $\pm 1$. The result is truncated to length $n$ regardless of the inputs. \end{quote} \begin{lstlisting} int fmpz_poly_divides(fmpz_poly_t Q, fmpz_poly_t A, fmpz_poly_t B) \end{lstlisting} \begin{quote} If the polynomial \code{A} is divisible by the polynomial \code{B} this function returns 1 and sets \code{Q} to the quotient, otherwise it returns 0. At this point, this function is provided for convenience only; it is not efficient when \code{B} does not actually divide \code{A}. \end{quote} \subsection{Pseudo division} \begin{lstlisting} void fmpz_poly_pseudo_divrem(fmpz_poly_t Q, fmpz_poly_t R, unsigned long * d, const fmpz_poly_t A, const fmpz_poly_t B) \end{lstlisting} \begin{quote} Performs division with remainder of two polynomials in $\Z[x]$, notionally returning the results in $\Q[x]$ (actually in $\Z[x]$ with a single common denominator). Computes polynomials \code{Q} and \code{R} such that \code{lead(B)^d*A = B*Q + R} where \code{R} has degree less than that of \code{B}. This function may be used to do division of polynomials in $\Q[x]$ as follows. Suppose polynomials \code{C} and \code{D} are given in $\Q[x]$. 1) Write \code{C = d1*A} and \code{D = d2*B} for some polynomials \code{A} and \code{B} in $\Z[x]$ and integers \code{d1} and \code{d2}. 2) Use pseudo-division to compute \code{Q} and \code{R} in $\Z[x]$ so that \code{l^d*A = B*Q + R} where \code{l} is the leading coefficient of \code{B}. 3) We can now write \code{C = (d1/d2*D*Q + d1*R)/l^d}. \end{quote} \begin{lstlisting} void fmpz_poly_pseudo_div(fmpz_poly_t Q, unsigned long * d, const fmpz_poly_t A, const fmpz_poly_t B) \end{lstlisting} \begin{quote} Performs division without remainder of two polynomials in $\Z[x]$, notionally returning the results in $\Q[x]$ (actually in $\Z[x]$ with a single common denominator). Notionally computes polynomials \code{Q} and \code{R} such that \code{lead(B)^d*A = B*Q + R} where \code{R} has degree less than that of \code{B}, but returns only \code{Q}. This is slightly more efficient than computing the quotient and remainder. \end{quote} \subsection{Powering} \begin{lstlisting} void fmpz_poly_power(fmpz_poly_t output, const fmpz_poly_t poly, unsigned long exp) \end{lstlisting} \begin{quote} Raises \code{poly} to the power \code{exp} and writes the result in \code{output}. \end{quote} \begin{lstlisting} void fmpz_poly_power_trunc_n(fmpz_poly_t output, const fmpz_poly_t poly, unsigned long exp, unsigned long n) \end{lstlisting} \begin{quote} Notionally raises \code{poly} to the power \code{exp}, truncates the result to length $n$ and writes the result in \code{output}. This is computed much more efficiently than simply powering the polynomial and truncating. This function can be used to raise power series to a power in an efficient way. \end{quote} \subsection{Gaussian content} \begin{lstlisting} void fmpz_poly_content(fmpz_t c, fmpz_poly_t poly) \end{lstlisting} \begin{quote} Set the \code{fmpz_t c} to the Gaussian content of the polynomial \code{poly}, i.e. to the greatest common divisor of its coefficients. \end{quote} \begin{lstlisting} void _fmpz_poly_primitive_part(fmpz_poly_t prim, fmpz_poly_t poly) \end{lstlisting} \begin{quote} Set \code{prim} to the primitive part of the polynomial \code{poly}, i.e. to \code{poly} divided by its Gaussian content. \end{quote} \subsection{Greatest common divisor and resultant} \begin{lstlisting} void fmpz_poly_gcd(fmpz_poly_t res, const fmpz_poly_t poly1, const fmpz_poly_t poly2) \end{lstlisting} \begin{quote} Sets \code{res} to the greatest common divisor of the polynomials \code{poly1} and \code{poly2}. \end{quote} \begin{lstlisting} unsigned long fmpz_poly_resultant_bound(fmpz_poly_t a, fmpz_poly_t b) void fmpz_poly_resultant(fmpz_t r, fmpz_poly_t a, fmpz_poly_t b) \end{lstlisting} \begin{quote} Compute the resultant of the polynomials \code{a} and \code{b}. If \code{a} and \code{b} are monic with $a(x) = \prod_i (x - \alpha_i)$ and $b(x) = \prod_j (x - \beta_j)$, when factored over the complex numbers, then the resultant is given by the expression $r(x) = \prod_{i,j} (\alpha_i - \beta_j)$. If the polynomials are not monic, and \code{a} and \code{b} have leading coefficients $l_1$ and $l_2$ and degrees $d_1$ and $d_2$ respectively, then this quantity is multiplied by $l_1^{d_2-1}l_2^{d_1-1}$. Note that the resultant is zero iff the polynomials share a root over the algebraic closure of $\Q$. Currently it is necessary to ensure \code{r} has sufficient space to store the result. The function \code{fmpz_poly_resultant_bound} is used to determine a bit bound on the number of bits \code{b} required and \code{r} must have space for \code{b/FLINT_BITS + 2} limbs. In a future version of FLINT, this computation will not be necessary. \end{quote} \begin{lstlisting} void fmpz_poly_xgcd(fmpz_t r, fmpz_poly_t s, fmpz_poly_t t, fmpz_poly_t a, fmpz_poly_t b) \end{lstlisting} \begin{quote} Given coprime polynomials \code{a} and \code{b} this function computes polynomials \code{s} and \code{t} and the resultant \code{r} of the polynomials such that \code{r = a*s + b*t}. See the function \code{fmpz_poly_resultant} for information on how large \code{r} needs to be to hold the result. \end{quote} \subsection{Modular arithmetic} \begin{lstlisting} void fmpz_poly_invmod(fmpz_t d, fmpz_poly_t H, fmpz_poly_t poly1, fmpz_poly_t poly2) \end{lstlisting} \begin{quote} Computes a polynomial \code{H} and a denominator \code{d} such that \code{poly1*H} is \code{d} modulo \code{poly2}. Assumes that \code{poly1} and \code{poly2} are coprime and that \code{poly2} is monic. \end{quote} \subsection{Subpolynomials} A number of functions are provided for attaching an \code{fmpz_poly_t} object to an existing polynomial or to a range of coefficients of an existing polynomial providing an alias for the original polynomial or part thereof. Each of the functions in this section normalise the subpolynomials so that they can be used as inputs to \code{fmpz_poly} functions. As FLINT has no way of reallocating space in subpolynomials, they should not be used for outputs of \code{fmpz_poly} functions, but only for inputs. In a later version of FLINT, this restriction will be lifted. Note that FLINT may perform suboptimally if a polynomial and an alias of the polynomial are passed as inputs to the same function, as FLINT has no way to tell that it is dealing with aliases of the same polynomial. \begin{lstlisting} void fmpz_poly_attach(fmpz_poly_t output, const fmpz_poly_t poly) \end{lstlisting} \begin{quote} Attach the \code{fmpz_poly_t} object \code{output} to the polynomial \code{poly}. Any changes made to the \code{length} field of \code{output} then do not affect \code{poly}. \end{quote} \begin{lstlisting} void fmpz_poly_attach_shift(fmpz_poly_t output, const fmpz_poly_t input, unsigned long n) \end{lstlisting} \begin{quote} Attach the \code{fmpz_poly_t} object \code{output} to \code{poly} but shifted to the left by $n$ coefficients. This is equivalent to notionally shifting the original polynomial right (dividing by $x^n$) then attaching to the result. \end{quote} \begin{lstlisting} void fmpz_poly_attach_truncate(fmpz_poly_t output, const fmpz_poly_t input, unsigned long n) \end{lstlisting} \begin{quote} Attach the \code{fmpz_poly_t} object \code{output} to the first $n$ coefficients of the polynomial \code{poly}. This is equivalent to notionally truncating the original polynomial to $n$ coefficients then attaching to the result. \end{quote} \section{The fmpz module} The \code{fmpz} module is designed for manipulation of the FLINT flat multiprecision integer format \code{fmpz_t}. Internally, the data for an \code{fmpz_t} has first limb a sign/size limb. If it is 0 the integer represented by the \code{fmpz_t} is 0. The absolute value of the sign/size limb is the number of subsequent limbs that the absolute value of the integer being represented, takes up. The absolute value of the integer is then stored as limbs, least significant limb first, in the subsequent limbs after the sign/size limb. If the sign/size limb is positive, a positive integer is intended and if the sign/size limb is negative the negative integer with the stored absolute value is intended. The \code{fmpz_t} type is not intended as a standalone integer type. It is intended to be used in composite types such as polynomials and matrices which consist of many integer entries. Currently the user is responsible for memory management of \code{fmpz_t}'s, i.e. one must ensure that the output of a function in the \code{fmpz} module contains sufficient space to store the result. This will be changed in a later version of FLINT, where automatic memory management will be done for the user. To ensure that the correct number of limbs are available in each \code{fmpz_t} of an \code{fmpz_poly_t} one must currently call \code{void fmpz_poly_fit_limbs(fmpz_poly_t pol, unsigned long limbs)}, which will then ensure that each coefficient of \code{pol} has space for at least the given number of limbs (referring to the absolute value of the coefficients). Again, in a later version of FLINT, this step will be unnecessary as automatic memory management will be done for all \code{fmpz_t}'s, including coefficients of \code{fmpz_poly_t}'s. Note that \code{fmpz_t}'s are not currently guaranteed to allow aliasing between inputs or between inputs and outputs. However some optimised inplace functions are provided. \subsection{A simple example} We start with a simple example of the use of the \code{fmpz} module. This example sets $x$ to 3 and adds 5 to it. \begin{lstlisting} #include "fmpz.h" .... fmpz_t x = fmpz_init(1); // Allocate 1 limb of space fmpz_set_ui(x, 3); fmpz_add_ui_inplace(x, 5); printf("3 + 5 is "); fmpz_print(x); printf("\n"); fmpz_clear(x); \end{lstlisting} We now discuss the functions available in the \code{fmpz} module. \subsection{Memory management} \begin{lstlisting} fmpz_t fmpz_init(unsigned long limbs) \end{lstlisting} \begin{quote} Allocates space for an \code{fmpz_t} with the given number of limbs (plus an additional limb for the sign/size) on the heap and return a pointer to the space. \end{quote} \begin{lstlisting} fmpz_t fmpz_realloc(fmpz_t f, unsigned long limbs) \end{lstlisting} \begin{quote} Reallocate the space used by the \code{fmpz_t f} so that it has space for the given number of limbs (plus a sign/size limb). The parameter \code{limbs} must be non-negative. The existing contents of \code{f} are not altered if they still fit in the new size. \end{quote} \begin{lstlisting} void fmpz_clear(const fmpz_t f) \end{lstlisting} \begin{quote} Free space used by the \code{fmpz_t f}. \end{quote} \subsection{Random numbers} \begin{lstlisting} void fmpz_random_limbs2(fmpz_t x, unsigned long n) \end{lstlisting} \begin{quote} Set \code{x} to a random number of $n$ limbs consisting of long strings of ones and zeroes. \end{quote} \subsection{String operations} \begin{lstlisting} void fmpz_print(const fmpz_t f) \end{lstlisting} \begin{quote} Print the multiprecision integer \code{f}. A minus sign is prepended if the integer is negative. \end{quote} \subsection{fmpz properties} \begin{lstlisting} unsigned long fmpz_size(const fmpz_t f) \end{lstlisting} \begin{quote} Return the number of limbs used to store the absolute value of the multiprecision integer \code{f}. \end{quote} \begin{lstlisting} unsigned long fmpz_bits(const fmpz_t f) \end{lstlisting} \begin{quote} Return the number of bits required to store the absolute value of the multiprecision integer \code{f}. \end{quote} \begin{lstlisting} long fmpz_sgn(const fmpz_t f) \end{lstlisting} \begin{quote} Return the sign/size limb of the multiprecision integer \code{f}. The sign of the sign/size limb is the sign of the multiprecision integer. The absolute value of the sign/size limb is the size in limbs of the absolute value of the multiprecision integer \code{f}. \end{quote} \subsection{Assignment} \begin{lstlisting} void fmpz_set_ui(fmpz_t res, unsigned long x) \end{lstlisting} \begin{quote} Set the multiprecision integer \code{res} to the \code{unsigned long x}. \end{quote} \begin{lstlisting} void fmpz_set_si(fmpz_t res, long x) \end{lstlisting} \begin{quote} Set the multiprecision integer \code{res} to the \code{long x}. \end{quote} \begin{lstlisting} void fmpz_set(fmpz_t res, const fmpz_t f) \end{lstlisting} \begin{quote} Set the multiprecision integer \code{res} to equal the multiprecision integer \code{f}. \end{quote} \subsection{Comparison} \begin{lstlisting} int fmpz_equal(const fmpz_t f1, const fmpz_t f2) \end{lstlisting} \begin{quote} Return 1 if \code{f1} is equal to \code{f2}, otherwise return 0. \end{quote} \begin{lstlisting} int fmpz_is_one(const fmpz_t f) \end{lstlisting} \begin{quote} Return 1 if \code{f} is one, otherwise return 0. \end{quote} \begin{lstlisting} int fmpz_is_zero(const fmpz_t f) \end{lstlisting} \begin{quote} Return 1 if \code{f} is zero, otherwise return 0. \end{quote} \begin{lstlisting} int fmpz_cmpabs(const fmpz_t f1, const fmpz_t f2) \end{lstlisting} \begin{quote} Compares the absolute values of \code{f1} and \code{f2}. If the absolute value of \code{f1} is less than that of \code{f2} then a negative value is returned. If the absolute value of \code{f1} is greater than that of \code{f2} then a positive value is returned. If the absolute values are equal, then zero is returned. \end{quote} \subsection{Conversions} \begin{lstlisting} void mpz_to_fmpz(fmpz_t res, const mpz_t x) \end{lstlisting} \begin{quote} Convert the \code{mpz_t x} to the \code{fmpz_t res}. \end{quote} \begin{lstlisting} void fmpz_to_mpz(mpz_t res, const fmpz_t f) \end{lstlisting} \begin{quote} Convert the \code{fmpz_t f} to the \code{mpz_t res}. \end{quote} \subsection{Addition/subtraction} \begin{lstlisting} void fmpz_add(fmpz_t res, const fmpz_t f1, const fmpz_t f2) \end{lstlisting} \begin{quote} Set \code{res} to the sum of \code{f1} and \code{f2}. \end{quote} \begin{lstlisting} void fmpz_add_ui_inplace(fmpz_t res, unsigned long x) \end{lstlisting} \begin{quote} Set \code{res} to the sum of \code{res} and the \code{unsigned long x}. \end{quote} \begin{lstlisting} void fmpz_add_ui(fmpz_t res, const fmpz_t f, unsigned long x) \end{lstlisting} \begin{quote} Set \code{res} to the sum of \code{f} and the \code{unsigned long x}. \end{quote} \begin{lstlisting} void fmpz_sub(fmpz_t res, const fmpz_t f1, const fmpz_t f2) \end{lstlisting} \begin{quote} Set \code{res} to \code{f1} minus \code{f2}. \end{quote} \begin{lstlisting} void fmpz_sub_ui_inplace(fmpz_t res, unsigned long x) \end{lstlisting} \begin{quote} Set \code{res} to \code{res} minus the \code{unsigned long x}. \end{quote} \begin{lstlisting} void fmpz_sub_ui(fmpz_t res, const fmpz_t f, unsigned long x) \end{lstlisting} \begin{quote} Set \code{res} to \code{f} minus the \code{unsigned long x}. \end{quote} \subsection{Multiplication} \begin{lstlisting} void fmpz_mul(fmpz_t res, const fmpz_t f1, const fmpz_t f2) \end{lstlisting} \begin{quote} Set \code{res} to \code{f1} times \code{f2}. \end{quote} \begin{lstlisting} void fmpz_mul_ui(fmpz_t res, const fmpz_t f1, unsigned long x) \end{lstlisting} \begin{quote} Set \code{res} to \code{f1} times the \code{unsigned long x}. \end{quote} \begin{lstlisting} void fmpz_mul_2exp(fmpz_t output, fmpz_t x, unsigned long exp) \end{lstlisting} \begin{quote} Multiply \code{x} by $2^{\mbox{exp}}$. \end{quote} \begin{lstlisting} void fmpz_addmul(fmpz_t res, const fmpz_t f1, const fmpz_t f2) \end{lstlisting} \begin{quote} Set \code{res} to \code{res + f1 * f2}. \end{quote} \subsection{Division} \begin{lstlisting} void fmpz_tdiv(fmpz_t res, const fmpz_t f1, const fmpz_t f2) \end{lstlisting} \begin{quote} Set \code{res} to the quotient of \code{f1} by \code{f2}. Round the quotient towards zero and discard the remainder. \end{quote} \begin{lstlisting} void fmpz_fdiv(fmpz_t res, const fmpz_t f1, const fmpz_t f2) \end{lstlisting} \begin{quote} Set \code{res} to the quotient of \code{f1} by \code{f2}. Round the quotient towards minus infinity and discard the remainder. \end{quote} \begin{lstlisting} void fmpz_tdiv_ui(fmpz_t res, const fmpz_t f1, unsigned long x) \end{lstlisting} \begin{quote} Set \code{res} to the quotient of \code{f1} by the unsigned long \code{x}. Round the quotient towards zero and discard the remainder. \end{quote} \begin{lstlisting} void fmpz_div_2exp(fmpz_t output, fmpz_t x, unsigned long exp) \end{lstlisting} \begin{quote} Divide \code{x} by $2^{\mbox{exp}}$, returning the quotient and discarding the remainder. \end{quote} \begin{lstlisting} unsigned long fmpz_mod_ui(const fmpz_t input, const unsigned long x) \end{lstlisting} \begin{quote} Returns \code{f1} modulo the \code{unsigned long x}. Note that \code{f1} may be unsigned. \end{quote} \subsection{Powering} \begin{lstlisting} void fmpz_pow_ui(fmpz_t res, const fmpz_t f, unsigned long exp) \end{lstlisting} \begin{quote} Set \code{res} to \code{f} raised to the power \code{exp}. This requires \code{exp} to be non-negative. \end{quote} \subsection{Root extraction} \begin{lstlisting} void fmpz_sqrtrem(fmpz_t sqrt, fmpz_t rem, fmpz_t x) \end{lstlisting} \begin{quote} Computes the square root of \code{x} and returns the integer part of the square root, \code{sqrt}, and the remainder, \code{rem = x - sqrt^2}. Note that \code{x} must be non-negative, else an exception is raised. \end{quote} \subsection{Number theoretical} \begin{lstlisting} void fmpz_gcd(fmpz_t output, fmpz_t x1, fmpz_t x2) \end{lstlisting} \begin{quote} Compute the greatest common divisor of \code{x1} and \code{x2}. The result is always non-negative and will be zero if both of the inputs are zero. \end{quote} \subsection{Chinese remaindering} \begin{lstlisting} void fmpz_CRT_ui_precomp(fmpz_t x, fmpz_t r1, fmpz_t m1, unsigned long r2, unsigned long m2, unsigned long c, pre_inv_t pre) void fmpz_CRT_ui2_precomp(fmpz_t x, fmpz_t r1, fmpz_t m1, unsigned long r2, unsigned long m2, unsigned long c, pre_inv2_t pre) \end{lstlisting} \begin{quote} Computes the unique value \code{x} modulo \code{m1*m2} that is \code{r1} modulo \code{m1} and \code{r2} modulo \code{m2}. Requires \code{m1} and \code{m2} to be coprime, \code{c} to be set to the value \code{m1} modulo \code{m2} and \code{pre} to be a precomputed inverse of \code{m2} (computed using \code{z_precompute_inverse(m2)}). The first version of the function requires that \code{m2} be no more than \code{FLINT_D_BITS} bits, whereas the second version requires \code{m2} to be no more than \code{FLINT_BITS - 1} bits. \end{quote} \section{The zmod\_poly module} The \code{zmod_poly_t} data type represents elements of $\Z/n\Z[x]$ for some word sized integer $n$. Most of the functions work for an arbitrary $n$, however the division functions require the leading coefficient of the divisor polynomial to be invertible modulo $n$ and the gcd and resultant functions require $n$ to be prime. The \code{zmod_poly} module provides routines for memory management, basic manipulation and basic arithmetic. Each coefficient of a \code{zmod_poly_t} is stored as an \code{unsigned long} and is assumed to be reduced modulo the modulus $n$. Unless otherwise specified, all functions in this section permit aliasing between their input arguments and between their input and output arguments. \subsection{Simple example} The following example computes the square of the polynomial $5x^3 + 1$, where the coefficients are understood to be in $\Z/7\Z$. \begin{lstlisting} #include "zmod_poly.h" .... zmod_poly_t x, y; zmod_poly_init(x, 7); zmod_poly_init(y); zmod_poly_set_coeff_ui(x, 3, 5); zmod_poly_set_coeff_ui(x, 0, 1); zmod_poly_mul(y, x, x); zmod_poly_print(x); printf("\n"); zmod_poly_print(y); printf("\n"); zmod_poly_clear(x); zmod_poly_clear(y); \end{lstlisting} The output is: \begin{lstlisting} 4 1 0 0 5 7 1 0 0 3 0 0 4 \end{lstlisting} \subsection{Definition of the zmod\_poly\_t polynomial type} The \code{zmod_poly_t} type is a typedef for an array of length 1 of \code{zmod_poly_struct}'s. This permits passing parameters of type \code{zmod_poly_t} `by reference'. All \code{zmod_poly} functions expect their inputs to be normalised, and unless otherwise specified they produce output that is normalised. It is recommended that users do not access the fields of a \code{zmod_poly_t} or its coefficient data directly, but make use of the functions designed for this purpose (detailed below). The type has fields for the length of the polynomial, the number of coefficients allocated (the length is always less than or equal to this), a modulus $n$ and possibly a precomputed inverse of $n$. Functions in \code{zmod_poly} do all the memory management for the user. One does not need to specify the maximum length in advance before using a \code{zmod_poly_t} polynomial object, but it may be more efficient to do so. FLINT reallocates space automatically as the computation proceeds, if more space is required. We now describe the functions available in \code{zmod_poly}. \subsection{Memory management} \begin{lstlisting} void zmod_poly_init(zmod_poly_t poly, unsigned long p) \end{lstlisting} \begin{quote} Initialise \code{poly} as a polynomial over $\Z/p\Z$. \end{quote} \begin{lstlisting} void zmod_poly_init2(zmod_poly_t poly, unsigned long p, unsigned long alloc) \end{lstlisting} \begin{quote} Initialise \code{poly} as a polynomial over $\Z/p\Z$, allocating space for at least the given number of coefficients. \end{quote} \begin{lstlisting} void zmod_poly_clear(zmod_poly_t poly) \end{lstlisting} \begin{quote} Released the memory used by \code{poly}, which cannot then be used again until it is initialised again. \end{quote} \begin{lstlisting} void zmod_poly_realloc(zmod_poly_t poly, unsigned long alloc) \end{lstlisting} \begin{quote} Reallocate \code{poly} so that it has space for \code{alloc} coefficients. If alloc is greater than the current length of the polynomial, the existing coefficients are retained. \end{quote} \begin{lstlisting} void zmod_poly_fit_length(zmod_poly_t poly, unsigned long alloc) \end{lstlisting} \begin{quote} Reallocate \code{poly} so that it has space for at least \code{alloc} coefficients. This function will not reduce the number of allocated coefficients, so no data will be lost. \end{quote} \subsection{Setting/retrieving coefficients} \begin{lstlisting} unsigned long zmod_poly_get_coeff_ui(zmod_poly_t poly, unsigned long n) \end{lstlisting} \begin{quote} Return the $n$-th coefficient as an \code{unsigned long}. Coefficients are number from zero, starting with the constant coefficient. If $n$ is greater than or equal to the current length of the polynomial, zero is returned. \end{quote} \begin{lstlisting} void zmod_poly_set_coeff_ui(zmod_poly_t poly, unsigned long n, unsigned long c) \end{lstlisting} \begin{quote} Set the $n$-th coefficient to the \code{unsigned long c}. It is assumed that \code{c} is already reduced modulo the modulus of the polynomial. Coefficients are number from zero, starting with the constant coefficient. If $n$ is greater than the current length of the polynomial, zeroes are inserted between the new coefficient and the existing coefficients if required. \end{quote} \subsection{String conversions and I/O} The functions in this section read/write a polynomial to/from a string representation. The representation starts with the length of the polynomial, a space and then the modulus of the polynomial. If the length is not zero, this is followed by a space and then a space separated list of the coefficients starting from the constant coefficient. Each coefficient is represented as an integer between zero and one less than the modulus. The polynomial $3*x^2+2$ in $\Z/7\Z[x]$ would be represented: \begin{lstlisting} 3 7 2 0 3 \end{lstlisting} \begin{lstlisting} int zmod_poly_from_string(zmod_poly_t poly, char* s) \end{lstlisting} \begin{quote} Load \code{poly} from the given string \code{s}. \end{quote} \begin{lstlisting} char* zmod_poly_to_string(zmod_poly_t poly) \end{lstlisting} \begin{quote} Return a pointer to a string representing \code{poly}. Space is allocated for the string and must be free'd after use. \end{quote} \begin{lstlisting} void zmod_poly_print(zmod_poly_t poly) \end{lstlisting} \begin{quote} Print the string representation of \code{poly} to \code{stdout}. \end{quote} \begin{lstlisting} void zmod_poly_fprint(zmod_poly_t poly, FILE* f) \end{lstlisting} \begin{quote} Print the string representation of \code{poly} to the given file/stream \code{f}. \end{quote} \begin{lstlisting} int zmod_poly_read(zmod_poly_t poly) \end{lstlisting} \begin{quote} Read a polynomial in string representation from \code{stdin}. The function returns 1 if the string represented a valid polynomial, otherwise it returns 0. \end{quote} \begin{lstlisting} int zmod_poly_fread(zmod_poly_t poly, FILE* f) \end{lstlisting} \begin{quote} Read a polynomial in string representation from the given file/stream \code{f}. The function returns 1 if the string represented a valid polynomial, otherwise it returns 0. \end{quote} \subsection{Polynomial parameters (length, degree, modulus, etc.)} \begin{lstlisting} unsigned long zmod_poly_length(zmod_poly_t poly) \end{lstlisting} \begin{quote} Return the current length of the polynomial. The zero polynomial has length 0. \end{quote} \begin{lstlisting} long zmod_poly_degree(zmod_poly_t poly) \end{lstlisting} \begin{quote} Return the degree of the polynomial. The zero polynomial is defined to have length $-1$. \end{quote} \begin{lstlisting} unsigned long zmod_poly_modulus(zmod_poly_t poly) \end{lstlisting} \begin{quote} Return the modulus of the polynomial, i.e. if $n$ is returned, the polynomial is an element of $\Z/n\Z$.\end{quote} \subsection{Assignment and basic manipulation} \begin{lstlisting} void zmod_poly_truncate(zmod_poly_t poly, unsigned long length) \end{lstlisting} \begin{quote} Truncate \code{poly} to the given length and normalise. \end{quote} \begin{lstlisting} void zmod_poly_set(zmod_poly_t res, zmod_poly_t poly) \end{lstlisting} \begin{quote} Set \code{res} to equal \code{poly}. \end{quote} \begin{lstlisting} void zmod_poly_zero(zmod_poly_t poly) \end{lstlisting} \begin{quote} Set \code{poly} to be the zero polynomial. \end{quote} \begin{lstlisting} void zmod_poly_swap(zmod_poly_t poly1, zmod_poly_t poly2) \end{lstlisting} \begin{quote} Efficiently swap \code{poly1} and \code{poly2}. Data is not actually copied in memory. Instead, pointers are swapped. \end{quote} \begin{lstlisting} void zmod_poly_neg(zmod_poly_t res, zmod_poly_t poly) \end{lstlisting} \begin{quote} Negate the polynomial \code{poly}, i.e. set \code{res} to \code{-poly}. \end{quote} \begin{lstlisting} void zmod_poly_reverse(zmod_poly_t output, zmod_poly_t input, unsigned long length) \end{lstlisting} \begin{quote} Notionally zero padding or truncating if necessary, this function considers \code{input} to be a polynomial of the given length and reverses it, storing the result in \code{output}. \end{quote} \subsection{Subpolynomials} These functions allow one to attach a \code{zmod_poly_t} object to an existing polynomial or subpolynomial thereof. The subpolynomial is normalised if necessary. Since FLINT cannot reallocate the attached polynomial object, these functions should only be used to construct polynomial objects to be used as inputs to other \code{zmod_poly} functions. \begin{lstlisting} void zmod_poly_attach(zmod_poly_t poly1, zmod_poly_t poly2) \end{lstlisting} \begin{quote} Attach \code{poly2} to the polynomial object \code{poly1}. \end{quote} \begin{lstlisting} void zmod_poly_attach_shift(zmod_poly_t poly1, zmod_poly_t poly2, unsigned long n) \end{lstlisting} \begin{quote} This function notionally shifts \code{poly2} to the right by \code{n} coefficients and then attaches the polynomial object \code{poly1} to the result. \end{quote} \begin{lstlisting} void zmod_poly_attach_truncate(zmod_poly_t output, zmod_poly_t input, unsigned long n) \end{lstlisting} \begin{quote} This function notionally truncates \code{poly2} to length \code{n} and then attaches the polynomial object \code{poly1} to the result. \end{quote} \subsection{Comparison} \begin{lstlisting} int zmod_poly_equal(zmod_poly_t poly1, zmod_poly_t poly2) \end{lstlisting} \begin{quote} Returns 1 if the two polynomials are equal, otherwise returns 0. \end{quote} \begin{lstlisting} int zmod_poly_is_one(zmod_poly_t poly1) \end{lstlisting} \begin{quote} Returns 1 if the polynomial is equal to the constant polynomial 1, otherwise returns 0. \end{quote} \subsection{Scalar multiplication and division} \begin{lstlisting} void zmod_poly_scalar_mul(zmod_poly_t res, zmod_poly_t poly, unsigned long scalar) \end{lstlisting} \begin{quote} Multiply the polynomial through by the given scalar. It is assumed that \code{scalar} is already reduced modulo the modulus of the polynomial. \end{quote} \begin{lstlisting} void zmod_poly_make_monic(zmod_poly_t output, zmod_poly_t pol) \end{lstlisting} \begin{quote} Divide the polynomial through by the inverse of the leading coefficient of the polynomial. It is assumed that the leading coefficient is invertible modulo the modulus of the polynomial. This function results in a monic polynomial if this condition is met, otherwise the results are undefined. \end{quote} \subsection{Addition/subtraction} \begin{lstlisting} void zmod_poly_add(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2) \end{lstlisting} \begin{quote} Set \code{res} to the sum of \code{poly1} and \code{poly2}. Note that if cancellation occurs, \code{res} may have a lesser length than either of the two input polynomials. \end{quote} \begin{lstlisting} void zmod_poly_sub(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2) \end{lstlisting} \begin{quote} Set \code{res} to \code{poly1} minus \code{poly2}. Note that if cancellation occurs, \code{res} may have a lesser length than either of the two input polynomials. \end{quote} \subsection{Shifting} \begin{lstlisting} void zmod_poly_left_shift(zmod_poly_t res, zmod_poly_t poly, unsigned long k) \end{lstlisting} \begin{quote} Shift the polynomial \code{poly} left by \code{k} coefficients, i.e. multiply the polynomial by $x^k$ and store the result in \code{res}. The value of $k$ must be non-negative. \end{quote} \begin{lstlisting} void zmod_poly_right_shift(zmod_poly_t res, zmod_poly_t poly, unsigned long k) \end{lstlisting} \begin{quote} Shift the polynomial \code{poly} right by \code{k} coefficients, i.e. divide the polynomial by $x^k$, ignoring the remainder and store the result in \code{res}. The value of $k$ must be non-negative. If $k$ is greater than or equal to the current length of \code{poly}, \code{res} is set to equal the zero polynomial. \end{quote} \subsection{Polynomial multiplication} \begin{lstlisting} void zmod_poly_mul(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2) \end{lstlisting} \begin{quote} Set \code{res} to \code{poly1} multiplied by \code{poly2}. The length of \code{res} will be at most one less than the sum of the lengths of \code{poly1} and \code{poly2}. \end{quote} \begin{lstlisting} void zmod_poly_mul_trunc_n(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long n) \end{lstlisting} \begin{quote} Set \code{res} to \code{poly1} multiplied by \code{poly2} and truncate to length \code{n} if this is less than the length of the full product. This function is usually more efficient than simply doing the multiplication and then truncating. The function is tuned for \code{n} about half the length of a full product. This function is sometimes called a short product. This function can be used for power series multiplication. \end{quote} \begin{lstlisting} void zmod_poly_mul_trunc_left_n(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2, unsigned long n) \end{lstlisting} \begin{quote} Set \code{res} to \code{poly1} multiplied by \code{poly2} ignoring the least significant \code{n} terms of the result which may be set to anything. This function is more efficient than doing the full multiplication if the operands are relatively short. It is tuned for \code{n} about half the length of a full product. This function is sometimes called an opposite short product. \end{quote} \subsection{Polynomial division} \begin{lstlisting} void zmod_poly_newton_invert(zmod_poly_t Q_inv, zmod_poly_t Q, unsigned long n) \end{lstlisting} \begin{quote} Treat the polynomial \code{Q} as a series of length \code{n} (the constant coefficient of the series is taken to be the constant coefficient of the polynomial) and invert it, yielding a series \code{Q_inv} also given to precision \code{n}. \end{quote} \begin{lstlisting} void zmod_poly_div_series(zmod_poly_t Q, zmod_poly_t A, zmod_poly_t B, unsigned long n) \end{lstlisting} \begin{quote} Treat the polynomials \code{A} and \code{B} as series of length \code{n} and compute the quotient series \code{Q = A/B}. \end{quote} \begin{lstlisting} void zmod_poly_div(zmod_poly_t Q, zmod_poly_t A, zmod_poly_t B) \end{lstlisting} \begin{quote} Divide the polynomial \code{A} by the polynomial \code{B} and set \code{Q} to the result. \end{quote} \begin{lstlisting} void zmod_poly_divrem(zmod_poly_t Q, zmod_poly_t R, zmod_poly_t A, zmod_poly_t B) \end{lstlisting} \begin{quote} Divide the polynomial \code{A} by \code{B} and set \code{Q} to the quotient and \code{R} to the remainder. \end{quote} \subsection{Greatest common divisor and resultant} \begin{lstlisting} unsigned long zmod_poly_resultant(zmod_poly_t a, zmod_poly_t b) \end{lstlisting} \begin{quote} Compute the resultant of the polynomials \code{a} and \code{b}. If \code{a} and \code{b} are monic with $a(x) = \prod_i (x - \alpha_i)$ and $b(x) = \prod_j (x - \beta_j)$, when factored over an algebraic closure of the field of coefficients, then the resultant is given by the expression $r(x) = \prod_{i,j} (\alpha_i - \beta_j)$. If the polynomials are not monic, and \code{a} and \code{b} have leading coefficients $l_1$ and $l_2$ and degrees $d_1$ and $d_2$ respectively, then this quantity is multiplied by $l_1^{d_2-1}l_2^{d_1-1}$. Note that the resultant is zero iff the polynomials share a root over an algebraic closure of the coefficient ring. \end{quote} \begin{lstlisting} void zmod_poly_gcd(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2) \end{lstlisting} \begin{quote} Conmpute the greatest common divisor of the polynomials \code{poly1} and \code{poly2}. \end{quote} \begin{lstlisting} int zmod_poly_gcd_invert(zmod_poly_t res, zmod_poly_t poly1, zmod_poly_t poly2) \end{lstlisting} \begin{quote} Compute a polynomial \code{res} such that \code{res*poly1} is a constant modulo \code{poly2}. The two polynomials \code{poly1} and \code{poly2} are assumed to be coprime. If this is not the case, the function returns 0, otherwise it returns 1. \end{quote} \begin{lstlisting} void zmod_poly_xgcd(zmod_poly_t res, zmod_poly_t s, zmod_poly_t t, zmod_poly_t poly1, zmod_poly_t poly) \end{lstlisting} \begin{quote} Compute polynomials \code{s} and \code{t} such that \code{s*poly1+t*poly2} is the resultant of the polynomials \code{poly1} and \code{poly2}. The polynomials \code{poly1} and \code{poly2} are assumed to be coprime. \end{quote} \section{The long\_extras module} The \code{long_extras} module contains functions for doing arithmetic with integers which will fit into an \code{unsigned long}, including functions for modular arithmetic. Many of the functions take a precomputed inverse, which increases performance. The functions which include 2 in the name support moduli up to \code{FLINT_BITS - 1} bits, i.e. 31 or 63 bits, and the remainder work with moduli up to and including \code{FLINT_D_BITS}. On 64 bit machines, \code{FLINT_BITS} is 64 and \code{FLINT_D_BITS} is 53 bits. On a 32 bit machine the functions with 2 in the name are in fact macros aliasing the corresponding unadorned version. In this case \code{FLINT_BITS} is 32. The functions which begin \code{z_ll_} generally take a parameter consisting of two \code{unsigned long}'s thought of as an integer of twice the normal size, e.g. on a 64 bit machine these functions would support an input of 128 bits. Many of the functions in this module can be used to manipulate the individual coefficients of polynomials of type \code{zmod_poly_t}. \begin{lstlisting} pre_inv_t z_precompute_inverse(unsigned long n) pre_inv2_t z_precompute_inverse2(unsigned long n) pre_inv_ll_t z_ll_precompute_inverse2(unsigned long n) \end{lstlisting} \begin{quote} Return a precomputed inverse of the integer \code{n}. The first version returns a \code{pre_inv_t}, which is used with functions taking parameters up to \code{FLINT_D_BITS}. The second version returns a \code{pre_inv2_t} for use with function with second versions of functions taking a precomputed inverse, which support parameters up to \code{FLINT_BITS - 1} bits. The third version returns an inverse suitable for use with \code{z_ll_} functions which support an operand consisting of two \code{unsigned long}'s for twice the normal integer precision. \end{quote} \begin{lstlisting} unsigned long z_addmod(unsigned long a, unsigned long b, unsigned long p) \end{lstlisting} \begin{quote} Return the sum of \code{a} and \code{b} modulo \code{p}. Both \code{a} and \code{b} are assumed to be reduced modulo \code{p} when calling this function. \end{quote} \begin{lstlisting} unsigned long z_submod(unsigned long a, unsigned long b, unsigned long p) \end{lstlisting} \begin{quote} Return \code{a} minus \code{b} modulo \code{p}. Both \code{a} and \code{b} are assumed to be reduced modulo \code{p} when calling this function. \end{quote} \begin{lstlisting} unsigned long z_negmod(unsigned long a, unsigned long p) \end{lstlisting} \begin{quote} Return minus \code{a} modulo \code{p}. The value \code{a} is assumed to be reduced modulo \code{p} when calling this function. \end{quote} \begin{lstlisting} unsigned long z_div2_precomp(unsigned long a, unsigned long n, pre_inv2_t ninv) \end{lstlisting} \begin{quote} Return the floor of the quotient of \code{a} by \code{n}. There are no restrictions on the size of \code{a}. \end{quote} \begin{lstlisting} unsigned long z_mod_precomp(unsigned long a, unsigned long n, pre_inv_t ninv) unsigned long z_mod2_precomp(unsigned long a, unsigned long n, pre_inv2_t ninv) unsigned long z_ll_mod_precomp(unsigned long a_hi, unsigned long a_lo, unsigned long n, pre_inv_ll_t ninv) \end{lstlisting} \begin{quote} Return \code{a} modulo \code{n}. The first version assumes that \code{a} is less than \code{n^2}. The second and third versions replaces no restrictions on \code{a}. \end{quote} \begin{lstlisting} unsigned long z_mulmod_precomp(unsigned long a, unsigned long b, unsigned long n, pre_inv_t ninv) unsigned long z_mulmod2_precomp(unsigned long a, unsigned long b, unsigned long n, pre_inv2_t ninv) \end{lstlisting} \begin{quote} Return \code{a} times \code{b} modulo \code{n}. The first version assumes that \code{a} and \code{b} have been reduced modulo \code{n} before calling the function. The second version places no restrictions on \code{a} and \code{b}, i.e. their product may be up to two full limbs. \end{quote} \begin{lstlisting} unsigned long z_powmod(unsigned long a, long exp, unsigned long n) unsigned long z_powmod2(unsigned long a, long exp, unsigned long n) unsigned long z_powmod_precomp(unsigned long a, long exp, unsigned long n, pre_inv_t ninv) unsigned long z_powmod2_precomp(unsigned long a, long exp, unsigned long n, pre_inv2_t ninv) \end{lstlisting} \begin{quote} Raise \code{a} to the power \code{exp} modulo \code{n}. All versions assume \code{a} is reduced modulo \code{n}, but there are no restrictions on \code{exp}, which may be negative (assuming \code{a} is invertible modulo \code{n}) or zero. \end{quote} \begin{lstlisting} int z_jacobi_precomp(unsigned long a, unsigned long p, pre_inv_t pinv) \end{lstlisting} \begin{quote} Computes the Jacobi symbol of \code{a} modulo \code{p} for a prime \code{p}. Assumes that \code{a} is reduced modulo \code{p}. \end{quote} \begin{lstlisting} unsigned long z_pow(unsigned long a, unsigned long exp) \end{lstlisting} \begin{quote} Computes \code{a} to the power \code{exp} which must be non-negative. Assumes that the result will fit in an \code{unsigned long}. \end{quote} \begin{lstlisting} unsigned long z_sqrtmod(unsigned long a, unsigned long p) \end{lstlisting} \begin{quote} Returns a square root of \code{a} modulo \code{p}. Assumes \code{a} is reduced modulo \code{p}. The function returns 0 if \code{a} is not a quadratic residue modulo a prime \code{p}. \end{quote} \begin{lstlisting} unsigned long z_cuberootmod(unsigned long * cuberoot1, unsigned long a, unsigned long p) \end{lstlisting} \begin{quote} Returns a cube root of \code{a} modulo a prime \code{p}. Assumes \code{a} is reduced modulo \code{p}. If \code{a} is not 0, the function also sets \code{cuberoot1} to a cube root of unity modulo \code{p} if the cube roots of \code{a} are distinct, otherwise \code{cuberoot1} is set to 1. If \code{a} is not a cubic residue modulo \code{p} the function returns 0. \end{quote} \begin{lstlisting} unsigned long z_gcd(long x, long y) \end{lstlisting} \begin{quote} Returns the greatest common divisor of \code{x} and \code{y}, which may be signed. \end{quote} \begin{lstlisting} unsigned long z_invert(unsigned long a, unsigned long n) \end{lstlisting} \begin{quote} Returns a multiplicative inverse of \code{a} modulo \code{n}. Assumes \code{a} is reduced modulo \code{p}. \end{quote} \begin{lstlisting} long z_gcd_invert(long* a, long x, long y) \end{lstlisting} \begin{quote} Returns the greatest common divisor \code{d} of \code{x} and \code{y} (which may be signed) and sets \code{a} such that \code{a*x} is \code{d} modulo \code{y}. We ensure \code{a} is reduced modulo \code{y}. \end{quote} \begin{lstlisting} long z_extgcd(long* a, long* b, long x, long y) \end{lstlisting} \begin{quote} Returns the greatest common divisor \code{d} of \code{x} and \code{y} (which may be signed) and sets \code{a} and \code{b} such that \code{d = a*x+b*y}. \end{quote} \begin{lstlisting} unsigned long z_CRT(unsigned long x1, unsigned long n1, unsigned long x2, unsigned long n2) \end{lstlisting} \begin{quote} Returns the unique integer \code{d} reduced modulo \code{n1*n2} which is \code{x1} modulo \code{n1} and \code{x2} modulo \code{n2}. Assumes \code{x1} is reduced modulo \code{n1} and \code{x2} is reduced modulo \code{n2}. Also assumes \code{n1*n2} is no more than \code{FLINT_BITS - 1} bits and that \code{n1} and \code{n2} are coprime. \end{quote} \begin{lstlisting} unsigned long z_randint(unsigned long limit) \end{lstlisting} \begin{quote} Returns a random uniformly distributed integer in the range 0 to \code{limit - 1} inclusive. If \code{limit} is set to 0, the function returns a full random limb. \end{quote} \begin{lstlisting} unsigned long z_randbits(unsigned long bits) \end{lstlisting} \begin{quote} Returns a random uniformly distributed integer with (up to) the given number of bits. If \code{bits} is set to 0, the function returns a full random limb. \end{quote} \section{The mpn\_extras module} The \code{mpn\_extras} module is designed to supplement the low level \code{mpn} functions provided in GMP. These functions are designed to operate on raw limbs of multiprecision integer data. Each such integer consists of a string of limbs representing an integer, with the least significant limb first. The integers may either be unsigned or signed in twos complement format. \begin{lstlisting} void F_mpn_negate(mp_limb_t* dest, mp_limb_t* src, unsigned long count) \end{lstlisting} \begin{quote} Considering the data at the location \code{src} to be an integer of \code{count} limbs stored in twos complement format, this function negates the integer and stores the result at the location \code{dest}. \end{quote} \begin{lstlisting} void F_mpn_copy(mp_limb_t* dest, const mp_limb_t* src, unsigned long count) \end{lstlisting} \begin{quote} Copy \code{count} raw limbs at \code{src} to the location \code{dest}. Copying begins with the most significant limb first, thus the destination limbs may overlap the source limbs only if \code{dest > src} in memory. \end{quote} \begin{lstlisting} void F_mpn_copy_forward(mp_limb_t* dest, const mp_limb_t* src, unsigned long count) \end{lstlisting} \begin{quote} Copy \code{count} raw limbs at \code{src} to the location \code{dest}. Copying begins with the least significant limb first, thus the destination limbs may overlap the source limbs only if \code{dest < src} in memory. \end{quote} \begin{lstlisting} void F_mpn_clear(mp_limb_t* dest, unsigned long count) \end{lstlisting} \begin{quote} Set all bits of the \code{count} limbs starting at \code{dest} to binary zeros. \end{quote} \begin{lstlisting} void F_mpn_set(mp_limb_t* dest, unsigned long count) \end{lstlisting} \begin{quote} Set all bits of the \code{count} limbs starting at \code{dest} to binary ones. \end{quote} \begin{lstlisting} pre_limb_t F_mpn_precompute_inverse(mp_limb_t d) \end{lstlisting} \begin{quote} Returns a precomputed inverse of \code{d} for use in \code{F_mpn} functions which take a \code{pre_limb_t} precomputed inverse \code{dinv} of \code{d}. One needs to normalise \code{d} before computing the precomputed inverse, however the original value of \code{d} itself is passed to the functions. This computation can be done as follows: \end{quote} \begin{lstlisting} #include "flint.h" unsigned long norm; count_lead_zeros(norm, d); pre_limb_t xinv = F_mpn_precompute_inverse(x< #include #include "flint.h" #include "mpz_poly.h" #include "test-support.h" // tests whether the given polynomial is equal to the one given by the string // (only for testing purposes in this file) int mpz_poly_equal_str(mpz_poly_t poly, char* s) { mpz_poly_t poly2; mpz_poly_init(poly2); mpz_poly_from_string(poly2, s); int result = mpz_poly_equal(poly, poly2); mpz_poly_clear(poly2); return result; } /**************************************************************************** Setting/retrieving coefficients ****************************************************************************/ int test_mpz_poly_get_coeff_ptr() { int success = 1; mpz_poly_t poly; mpz_poly_init2(poly, 3); poly->length = 2; success = success && (mpz_poly_coeff_ptr(poly, 0) == poly->coeffs); success = success && (mpz_poly_coeff_ptr(poly, 1) == poly->coeffs + 1); success = success && (mpz_poly_coeff_ptr(poly, 2) == NULL); mpz_poly_clear(poly); return success; } int test_mpz_poly_get_coeff() { int success = 1; mpz_poly_t poly; mpz_t x; mpz_poly_init2(poly, 3); mpz_init(x); poly->length = 2; mpz_set_ui(poly->coeffs[0], 47); mpz_set_ui(poly->coeffs[1], 48); mpz_set_ui(poly->coeffs[2], 49); mpz_poly_get_coeff(x, poly, 0); success = success && !mpz_cmp_ui(x, 47); mpz_poly_get_coeff(x, poly, 1); success = success && !mpz_cmp_ui(x, 48); mpz_poly_get_coeff(x, poly, 2); success = success && !mpz_cmp_ui(x, 0); mpz_poly_clear(poly); mpz_clear(x); return success; } int test_mpz_poly_get_coeff_ui() { int success = 1; mpz_poly_t poly; mpz_t x; mpz_poly_init2(poly, 3); mpz_init(x); poly->length = 2; mpz_set_ui(poly->coeffs[0], 47); mpz_set_ui(poly->coeffs[1], 48); mpz_set_ui(poly->coeffs[2], 49); success = success && (mpz_poly_get_coeff_ui(poly, 0) == 47); success = success && (mpz_poly_get_coeff_ui(poly, 1) == 48); success = success && (mpz_poly_get_coeff_ui(poly, 2) == 0); mpz_poly_clear(poly); mpz_clear(x); return success; } int test_mpz_poly_get_coeff_si() { int success = 1; mpz_poly_t poly; mpz_t x; mpz_poly_init2(poly, 3); mpz_init(x); poly->length = 2; mpz_set_si(poly->coeffs[0], 47); mpz_set_si(poly->coeffs[1], -48); mpz_set_si(poly->coeffs[2], 49); success = success && (mpz_poly_get_coeff_si(poly, 0) == 47); success = success && (mpz_poly_get_coeff_si(poly, 1) == -48); success = success && (mpz_poly_get_coeff_si(poly, 2) == 0); mpz_poly_clear(poly); mpz_clear(x); return success; } int test_mpz_poly_set_coeff() { int success = 1; mpz_poly_t poly; mpz_t x, y, zero; mpz_poly_init(poly); mpz_init(x); mpz_init(y); mpz_init(zero); mpz_set_ui(x, 42); mpz_set_ui(y, 37); mpz_poly_set_coeff(poly, 2, x); success = success && mpz_poly_equal_str(poly, "3 0 0 42"); mpz_poly_set_coeff(poly, 5, y); success = success && mpz_poly_equal_str(poly, "6 0 0 42 0 0 37"); mpz_poly_set_coeff(poly, 1, y); success = success && mpz_poly_equal_str(poly, "6 0 37 42 0 0 37"); mpz_poly_set_coeff(poly, 5, x); success = success && mpz_poly_equal_str(poly, "6 0 37 42 0 0 42"); mpz_poly_set_coeff(poly, 2, zero); success = success && mpz_poly_equal_str(poly, "6 0 37 0 0 0 42"); mpz_poly_set_coeff(poly, 8, zero); success = success && mpz_poly_equal_str(poly, "6 0 37 0 0 0 42"); mpz_poly_set_coeff(poly, 5, zero); success = success && mpz_poly_equal_str(poly, "2 0 37"); mpz_poly_truncate(poly, poly, 1); success = success && mpz_poly_equal_str(poly, "0"); mpz_poly_set_coeff(poly, 3, x); success = success && mpz_poly_equal_str(poly, "4 0 0 0 42"); mpz_poly_set_coeff(poly, 3, zero); success = success && mpz_poly_equal_str(poly, "0"); mpz_poly_clear(poly); mpz_clear(y); mpz_clear(x); mpz_clear(zero); return success; } int test_mpz_poly_set_coeff_ui() { int success = 1; mpz_poly_t poly; mpz_poly_init(poly); mpz_poly_set_coeff_ui(poly, 2, 42); success = success && mpz_poly_equal_str(poly, "3 0 0 42"); mpz_poly_set_coeff_ui(poly, 5, 37); success = success && mpz_poly_equal_str(poly, "6 0 0 42 0 0 37"); mpz_poly_set_coeff_ui(poly, 1, 37); success = success && mpz_poly_equal_str(poly, "6 0 37 42 0 0 37"); mpz_poly_set_coeff_ui(poly, 5, 42); success = success && mpz_poly_equal_str(poly, "6 0 37 42 0 0 42"); mpz_poly_set_coeff_ui(poly, 2, 0); success = success && mpz_poly_equal_str(poly, "6 0 37 0 0 0 42"); mpz_poly_set_coeff_ui(poly, 8, 0); success = success && mpz_poly_equal_str(poly, "6 0 37 0 0 0 42"); mpz_poly_set_coeff_ui(poly, 5, 0); success = success && mpz_poly_equal_str(poly, "2 0 37"); mpz_poly_truncate(poly, poly, 1); success = success && mpz_poly_equal_str(poly, "0"); mpz_poly_set_coeff_ui(poly, 3, 42); success = success && mpz_poly_equal_str(poly, "4 0 0 0 42"); mpz_poly_set_coeff_ui(poly, 3, 0); success = success && mpz_poly_equal_str(poly, "0"); mpz_poly_clear(poly); return success; } int test_mpz_poly_set_coeff_si() { int success = 1; mpz_poly_t poly; mpz_poly_init(poly); mpz_poly_set_coeff_si(poly, 2, 42); success = success && mpz_poly_equal_str(poly, "3 0 0 42"); mpz_poly_set_coeff_si(poly, 5, -37); success = success && mpz_poly_equal_str(poly, "6 0 0 42 0 0 -37"); mpz_poly_set_coeff_si(poly, 1, -37); success = success && mpz_poly_equal_str(poly, "6 0 -37 42 0 0 -37"); mpz_poly_set_coeff_si(poly, 5, 42); success = success && mpz_poly_equal_str(poly, "6 0 -37 42 0 0 42"); mpz_poly_set_coeff_si(poly, 2, 0); success = success && mpz_poly_equal_str(poly, "6 0 -37 0 0 0 42"); mpz_poly_set_coeff_si(poly, 8, 0); success = success && mpz_poly_equal_str(poly, "6 0 -37 0 0 0 42"); mpz_poly_set_coeff_si(poly, 5, 0); success = success && mpz_poly_equal_str(poly, "2 0 -37"); mpz_poly_truncate(poly, poly, 1); success = success && mpz_poly_equal_str(poly, "0"); mpz_poly_set_coeff_si(poly, 3, 42); success = success && mpz_poly_equal_str(poly, "4 0 0 0 42"); mpz_poly_set_coeff_si(poly, 3, 0); success = success && mpz_poly_equal_str(poly, "0"); mpz_poly_clear(poly); return success; } /**************************************************************************** Conversions ****************************************************************************/ int test_mpz_poly_to_fmpz_poly() { return 0; } int test_fmpz_poly_to_mpz_poly() { return 0; } /**************************************************************************** String conversions and I/O ****************************************************************************/ int test_mpz_poly_from_string() { return 0; /* int success = 1; mpz_poly_t poly; mpz_poly_init(poly); mpz_poly_set_from_string(poly, ""); success = success && (poly->length == 0); mpz_poly_set_from_string(poly, " \t\n\r "); success = success && (poly->length == 0); mpz_poly_set_from_string(poly, "47"); success = success && (poly->length == 1); success = success && (mpz_poly_get_coeff_ui(poly, 0) == 47); mpz_poly_set_from_string(poly, " 47 "); success = success && (poly->length == 1); success = success && (mpz_poly_get_coeff_ui(poly, 0) == 47); mpz_poly_set_from_string(poly, " 47 0 -23 "); success = success && (poly->length == 3); success = success && (mpz_poly_get_coeff_ui(poly, 0) == 47); success = success && (mpz_poly_get_coeff_ui(poly, 1) == 0); success = success && (mpz_poly_get_coeff_si(poly, 2) == -23); // todo: also test a few cases where mpz_poly_set_from_string() // should return 0 mpz_poly_clear(poly); return success; */ } int test_mpz_poly_to_string() { return 0; /* int success = 1; char buf[1000]; mpz_poly_t poly; mpz_poly_init2(poly, 10); mpz_poly_get_as_string(buf, poly); success = success && !strcmp(buf, ""); poly->length = 2; mpz_set_si(poly->coeffs[1], -57); mpz_poly_get_as_string(buf, poly); success = success && !strcmp(buf, "0 -57"); success = success && (mpz_poly_get_string_size(poly) >= strlen("0 -57") + 1); mpz_poly_clear(poly); return success; */ } int test_mpz_poly_fprint() { return 0; } int test_mpz_poly_fread() { return 0; } /**************************************************************************** Length and degree ****************************************************************************/ int test_mpz_poly_normalise() { return 0; /* int success = 1; mpz_poly_t poly; mpz_poly_init2(poly, 10); poly->length = 3; _mpz_poly_normalise(poly); success = success && (poly->length == 0); poly->length = 3; mpz_set_ui(poly->coeffs[1], 5); _mpz_poly_normalise(poly); success = success && (poly->length == 2); mpz_poly_clear(poly); return success; */ } int test_mpz_poly_normalised() { return 0; } int test_mpz_poly_pad() { return 0; } int test_mpz_poly_truncate() { return 0; } int test_mpz_poly_length() { return 0; } int test_mpz_poly_degree() { return 0; } /**************************************************************************** Assignment ****************************************************************************/ int test_mpz_poly_set() { return 0; /* int success = 1; mpz_poly_t poly1, poly2; mpz_poly_init2(poly1, 10); mpz_poly_init2(poly2, 10); mpz_poly_set_from_string(poly1, "42 -5 0 3"); _mpz_poly_set(poly2, poly1); success = success && mpz_poly_equal_str(poly2, "42 -5 0 3"); mpz_poly_clear(poly1); mpz_poly_clear(poly2); return success; */ } int test_mpz_poly_swap() { return 0; } /**************************************************************************** Comparison ****************************************************************************/ int test_mpz_poly_equal() { int success = 1; mpz_poly_t poly1, poly2; mpz_poly_init(poly1); mpz_poly_init(poly2); mpz_poly_from_string(poly1, "4 42 -5 0 3"); mpz_poly_from_string(poly2, "4 42 -5 0 3"); success = success && mpz_poly_equal(poly1, poly2); mpz_poly_from_string(poly1, "4 42 -5 0 3"); mpz_poly_from_string(poly2, "5 42 -5 0 3 1"); success = success && !mpz_poly_equal(poly1, poly2); mpz_poly_from_string(poly1, "5 42 -5 0 3 4"); mpz_poly_from_string(poly2, "4 42 -5 0 3"); success = success && !mpz_poly_equal(poly1, poly2); mpz_poly_from_string(poly1, "4 42 -6 0 3"); mpz_poly_from_string(poly2, "4 42 -5 0 3"); success = success && !mpz_poly_equal(poly1, poly2); mpz_poly_from_string(poly1, "0 "); mpz_poly_from_string(poly2, "4 42 -5 0 3"); success = success && !mpz_poly_equal(poly1, poly2); mpz_poly_clear(poly1); mpz_poly_clear(poly2); return success; } /**************************************************************************** Addition/subtraction ****************************************************************************/ int test_mpz_poly_addsubneg() { int success = 1; unsigned long i, j, in1, in2, out, op, trial; const unsigned long MAX = 4; mpz_t temp; mpz_init(temp); mpz_poly_t poly[3]; for (unsigned long i = 0; i < 3; i++) mpz_poly_init(poly[i]); mpz_t coeffs[3][MAX]; for (i = 0; i < 3; i++) for (j = 0; j < MAX; j++) mpz_init(coeffs[i][j]); // loop over various argument aliasing combinations for (in1 = 0; in1 < 3 && success; in1++) for (in2 = in1; in2 < 3 && success; in2++) for (out = in2; out < 3 && success; out++) // loop over add/sub/neg for (op = 0; op < 3 && success; op++) { for (trial = 0; trial < 100 && success; trial++) { // generate random coefficients for (i = 0; i < 3; i++) for (j = 0; j < MAX; j++) mpz_set_si(coeffs[i][j], random_ulong(3) - 1); // copy into polys for (i = 0; i < 3; i++) { poly[i]->length = 0; for (unsigned long j = 0; j < MAX; j++) mpz_poly_set_coeff(poly[i], j, coeffs[i][j]); } // do the addition/subtraction/negation if (op == 0) mpz_poly_add(poly[out], poly[in1], poly[in2]); else if (op == 1) mpz_poly_sub(poly[out], poly[in1], poly[in2]); else mpz_poly_neg(poly[out], poly[in1]); // do it naively for (j = 0; j < MAX; j++) { if (op == 0) mpz_add(coeffs[out][j], coeffs[in1][j], coeffs[in2][j]); else if (op == 1) mpz_sub(coeffs[out][j], coeffs[in1][j], coeffs[in2][j]); else mpz_neg(coeffs[out][j], coeffs[in1][j]); } // compare results success = success && mpz_poly_normalised(poly[out]); for (j = 0; j < MAX; j++) { mpz_poly_get_coeff(temp, poly[out], j); success = success && !mpz_cmp(temp, coeffs[out][j]); } } } for (i = 0; i < 3; i++) for (j = 0; j < MAX; j++) mpz_clear(coeffs[i][j]); for (i = 0; i < 3; i++) mpz_poly_clear(poly[i]); mpz_clear(temp); return success; } /**************************************************************************** Shifting ****************************************************************************/ int test_mpz_poly_lshift() { return 0; } int test_mpz_poly_rshift() { return 0; } int test_mpz_poly_shift() { return 0; } /**************************************************************************** Scalar multiplication and division ****************************************************************************/ int test_mpz_poly_scalar_mul() { return 0; } int test_mpz_poly_scalar_mul_ui() { return 0; } int test_mpz_poly_scalar_mul_si() { return 0; } int test_mpz_poly_scalar_div() { return 0; } int test_mpz_poly_scalar_div_ui() { return 0; } int test_mpz_poly_scalar_div_si() { return 0; } int test_mpz_poly_scalar_div_exact() { return 0; } int test_mpz_poly_scalar_div_exact_ui() { return 0; } int test_mpz_poly_scalar_div_exact_si() { return 0; } int test_mpz_poly_mod() { return 0; } int test_mpz_poly_mod_ui() { return 0; } /**************************************************************************** Polynomial multiplication ****************************************************************************/ int test_mpz_poly_mul() { // todo: also should test squaring return 0; } int test_mpz_poly_mul_naive() { int success = 1; mpz_poly_t poly1, poly2, poly3; mpz_poly_init(poly1); mpz_poly_init(poly2); mpz_poly_init(poly3); // special cases for zero input mpz_poly_from_string(poly1, "0"); mpz_poly_from_string(poly2, "0"); mpz_poly_mul_naive(poly3, poly1, poly2); success = success && mpz_poly_equal_str(poly3, "0"); mpz_poly_from_string(poly1, "0"); mpz_poly_mul_naive(poly3, poly1, poly1); success = success && mpz_poly_equal_str(poly3, "0"); mpz_poly_from_string(poly1, "3 1 2 3"); mpz_poly_from_string(poly2, "0 "); mpz_poly_mul_naive(poly3, poly1, poly2); success = success && mpz_poly_equal_str(poly3, "0"); mpz_poly_from_string(poly1, "0 "); mpz_poly_from_string(poly2, "3 1 2 3"); mpz_poly_mul_naive(poly3, poly1, poly2); success = success && mpz_poly_equal_str(poly3, "0"); mpz_poly_from_string(poly1, "0 "); mpz_poly_from_string(poly2, "3 1 2 3"); mpz_poly_mul_naive(poly2, poly1, poly2); // inplace success = success && mpz_poly_equal_str(poly2, "0"); // special cases for length 1 input mpz_poly_from_string(poly1, "1 5"); mpz_poly_from_string(poly2, "1 2"); mpz_poly_mul_naive(poly3, poly1, poly2); success = success && mpz_poly_equal_str(poly3, "1 10"); mpz_poly_from_string(poly1, "1 5"); mpz_poly_from_string(poly2, "1 2"); mpz_poly_mul_naive(poly2, poly1, poly2); // inplace success = success && mpz_poly_equal_str(poly2, "1 10"); mpz_poly_from_string(poly1, "3 1 2 3"); mpz_poly_from_string(poly2, "1 2"); mpz_poly_mul_naive(poly3, poly1, poly2); success = success && mpz_poly_equal_str(poly3, "3 2 4 6"); mpz_poly_from_string(poly1, "1 2"); mpz_poly_from_string(poly2, "3 1 2 3"); mpz_poly_mul_naive(poly3, poly1, poly2); success = success && mpz_poly_equal_str(poly3, "3 2 4 6"); // random multiplications mpz_poly_from_string(poly1, "5 -3 4 0 2 56"); mpz_poly_from_string(poly2, "3 48 -2 3"); mpz_poly_mul_naive(poly3, poly1, poly2); success = success && mpz_poly_equal_str(poly3, "7 -144 198 -17 108 2684 -106 168"); mpz_poly_from_string(poly1, "5 -3 4 0 2 56"); mpz_poly_from_string(poly2, "3 48 -2 3"); mpz_poly_mul_naive(poly1, poly1, poly2); // inplace success = success && mpz_poly_equal_str(poly1, "7 -144 198 -17 108 2684 -106 168"); // squaring mpz_poly_from_string(poly1, "5 -3 4 0 2 56"); mpz_poly_mul_naive(poly3, poly1, poly1); success = success && mpz_poly_equal_str(poly3, "9 9 -24 16 -12 -320 448 4 224 3136"); mpz_poly_from_string(poly1, "5 -3 4 0 2 56"); mpz_poly_mul_naive(poly1, poly1, poly1); // inplace success = success && mpz_poly_equal_str(poly1, "9 9 -24 16 -12 -320 448 4 224 3136"); mpz_poly_clear(poly1); mpz_poly_clear(poly2); mpz_poly_clear(poly3); return success; } int test__mpz_poly_mul_kara_recursive() { // todo: also should test squaring int success = 1; mpz_poly_t in1, in2, correct, out, scratch; mpz_poly_init(in1); mpz_poly_init(in2); mpz_poly_init(correct); mpz_poly_init(out); mpz_poly_init(scratch); for (unsigned long len1 = 1; len1 <= 40 && success; len1++) for (unsigned long len2 = len1; len2 <= 40 && success; len2++) for (unsigned long crossover = 0; crossover <= 6; crossover++) for (unsigned long trial = 0; trial < 3 && success; trial++) { mpz_poly_ensure_alloc(in1, len1); mpz_poly_ensure_alloc(in2, len2); mpz_poly_ensure_alloc(out, len1 + len2 - 1); mpz_poly_ensure_alloc(scratch, len1 + len2); for (unsigned long i = 0; i < len1; i++) mpz_urandomb(in1->coeffs[i], randstate, 300); in1->length = len1; for (unsigned long i = 0; i < len2; i++) mpz_urandomb(in2->coeffs[i], randstate, 300); in2->length = len2; _mpz_poly_mul_kara_recursive(out->coeffs, in1->coeffs, len1, in2->coeffs, len2, scratch->coeffs, 1, crossover); out->length = len1 + len2 - 1; mpz_poly_mul_naive(correct, in1, in2); success = success && mpz_poly_equal(correct, out); } mpz_poly_clear(scratch); mpz_poly_clear(out); mpz_poly_clear(correct); mpz_poly_clear(in2); mpz_poly_clear(in1); return success; } int test_mpz_poly_mul_karatsuba() { // todo: also should test squaring // todo: also should test inplace multiplication int success = 1; mpz_t x; mpz_init(x); mpz_poly_t in1, in2, correct, out; mpz_poly_init(in1); mpz_poly_init(in2); mpz_poly_init(correct); mpz_poly_init(out); for (unsigned long len1 = 0; len1 <= 32 && success; len1++) for (unsigned long len2 = 0; len2 <= 32 && success; len2++) for (unsigned long trial = 0; trial < 15 && success; trial++) { mpz_poly_zero(in1); mpz_poly_zero(in2); for (unsigned long i = 0; i < len1; i++) { mpz_urandomb(x, randstate, 300); mpz_poly_set_coeff(in1, i, x); } for (unsigned long i = 0; i < len2; i++) { mpz_urandomb(x, randstate, 300); mpz_poly_set_coeff(in2, i, x); } mpz_poly_mul_karatsuba(out, in1, in2); mpz_poly_mul_naive(correct, in1, in2); success = success && mpz_poly_equal(out, correct); } mpz_clear(x); mpz_poly_clear(in1); mpz_poly_clear(in2); mpz_poly_clear(correct); mpz_poly_clear(out); return success; } int test_mpz_poly_mul_SS() { // todo: also should test squaring return 0; } int test_mpz_poly_mul_naive_KS() { // todo: also should test squaring return 0; /* // todo: test inplace multiplication too int success = 1; unsigned long max_degree = 10; unsigned long max_bitsize = 10; mpz_poly_t poly[4]; for (unsigned long i = 0; i < 4; i++) mpz_poly_init2(poly[i], max_degree*2 + 1); mpz_t temp; mpz_init(temp); unsigned long degree[2]; unsigned long bitsize[2]; for (degree[0] = 1; degree[0] <= max_degree; degree[0]++) for (degree[1] = 1; degree[1] <= max_degree; degree[1]++) for (bitsize[0] = 1; bitsize[0] <= max_bitsize; bitsize[0]++) for (bitsize[1] = 1; bitsize[1] <= max_bitsize; bitsize[1]++) for (unsigned long trial = 0; trial < 10; trial++) { // generate random polys for (unsigned long j = 0; j < 2; j++) { mpz_poly_zero(poly[j]); for (unsigned long i = 0; i < degree[j]; i++) { unsigned long bits = gmp_urandomm_ui( randstate, bitsize[j]+1); mpz_rrandomb(temp, randstate, bits); if (gmp_urandomb_ui(randstate, 1)) mpz_neg(temp, temp); mpz_poly_set_coeff(poly[j], i, temp); } } // compute product using naive multiplication and by // naive KS, and compare answers mpz_poly_mul_naive(poly[2], poly[0], poly[1]); mpz_poly_mul_naive_KS(poly[3], poly[0], poly[1]); success = success && mpz_poly_equal(poly[2], poly[3]); } for (unsigned long i = 0; i < 4; i++) mpz_poly_clear(poly[i]); mpz_clear(temp); return success; */ } /**************************************************************************** Polynomial division ****************************************************************************/ int test_mpz_poly_monic_inverse() { return 0; /* int success = 1; mpz_poly_t poly1, poly2; mpz_poly_init(poly1); mpz_poly_init(poly2); for (unsigned long deg1 = 2; deg1 <= 10; deg1++) { for (unsigned long trial = 0; trial < 20; trial++) { // generate random input poly mpz_poly_set_coeff_ui(poly1, deg1, 1); for (unsigned long i = 0; i < deg1; i++) mpz_poly_set_coeff_si(poly1, i, gmp_urandomb_ui(randstate, 10) - 512); // try computing inverses to various lengths for (unsigned long deg2 = deg1; deg2 <= 50; deg2++) { mpz_poly_t poly3; mpz_poly_init(poly3); mpz_poly_monic_inverse(poly3, poly1, deg2); if (poly3->length-1 != deg2) success = 0; else { // check correctness by multiplying back together mpz_poly_mul(poly2, poly1, poly3); success = success && !mpz_cmp_ui(poly2->coeffs[deg1+deg2], 1); for (unsigned long i = 0; i < deg2; i++) success = success && !mpz_sgn(poly2->coeffs[deg1+i]); } mpz_poly_clear(poly3); } } } mpz_poly_clear(poly2); mpz_poly_clear(poly1); return success; */ } int test_mpz_poly_pseudo_inverse() { return 0; } int test_mpz_poly_monic_div() { return 0; } int test_mpz_poly_pseudo_div() { return 0; } int test_mpz_poly_monic_rem() { return 0; } int test_mpz_poly_pseudo_rem() { return 0; } int test_mpz_poly_monic_div_rem() { return 0; } int test_mpz_poly_pseudo_div_rem() { return 0; } int test_mpz_poly_monic_inverse_naive() { return 0; } int test_mpz_poly_pseudo_inverse_naive() { return 0; } int test_mpz_poly_monic_div_naive() { return 0; } int test_mpz_poly_pseudo_div_naive() { return 0; } int test_mpz_poly_monic_rem_naive() { return 0; } int test_mpz_poly_pseudo_rem_naive() { return 0; } int test_mpz_poly_monic_div_rem_naive() { return 0; } int test_mpz_poly_pseudo_div_rem_naive() { return 0; } /**************************************************************************** GCD and extended GCD ****************************************************************************/ int test_mpz_poly_content() { return 0; } int test_mpz_poly_content_ui() { return 0; } int test_mpz_poly_gcd() { return 0; } int test_mpz_poly_xgcd() { return 0; } /**************************************************************************** Miscellaneous ****************************************************************************/ int test_mpz_poly_max_limbs() { return 0; } int test_mpz_poly_max_bits() { return 0; } /**************************************************************************** Main test code ****************************************************************************/ #define RUN_TEST(targetfunc) \ printf("Testing " #targetfunc "()... "); \ fflush(stdout); \ success = test_##targetfunc(); \ all_success = all_success && success; \ printf(success ? "ok\n" : "FAIL!\n"); void mpz_poly_test_all() { int success, all_success = 1; RUN_TEST(mpz_poly_get_coeff_ptr); RUN_TEST(mpz_poly_get_coeff); RUN_TEST(mpz_poly_get_coeff_ui); RUN_TEST(mpz_poly_get_coeff_si); RUN_TEST(mpz_poly_set_coeff); RUN_TEST(mpz_poly_set_coeff_ui); RUN_TEST(mpz_poly_set_coeff_si); // RUN_TEST(mpz_poly_to_fmpz_poly); // RUN_TEST(fmpz_poly_to_mpz_poly); // RUN_TEST(mpz_poly_from_string); // RUN_TEST(mpz_poly_to_string); // RUN_TEST(mpz_poly_fprint); // RUN_TEST(mpz_poly_fread); // RUN_TEST(mpz_poly_normalise); // RUN_TEST(mpz_poly_normalised); // RUN_TEST(mpz_poly_pad); // RUN_TEST(mpz_poly_length); // RUN_TEST(mpz_poly_degree); // RUN_TEST(mpz_poly_set); // RUN_TEST(mpz_poly_swap); RUN_TEST(mpz_poly_equal); RUN_TEST(mpz_poly_addsubneg); // RUN_TEST(mpz_poly_neg); // RUN_TEST(mpz_poly_lshift); // RUN_TEST(mpz_poly_rshift); // RUN_TEST(mpz_poly_shift); // RUN_TEST(mpz_poly_scalar_mul); // RUN_TEST(mpz_poly_scalar_mul_ui); // RUN_TEST(mpz_poly_scalar_mul_si); // RUN_TEST(mpz_poly_scalar_div); // RUN_TEST(mpz_poly_scalar_div_ui); // RUN_TEST(mpz_poly_scalar_div_si); // RUN_TEST(mpz_poly_scalar_div_exact); // RUN_TEST(mpz_poly_scalar_div_exact_ui); // RUN_TEST(mpz_poly_scalar_div_exact_si); // RUN_TEST(mpz_poly_mod); // RUN_TEST(mpz_poly_mod_ui); // RUN_TEST(mpz_poly_mul); RUN_TEST(mpz_poly_mul_naive); RUN_TEST(_mpz_poly_mul_kara_recursive); RUN_TEST(mpz_poly_mul_karatsuba); // RUN_TEST(mpz_poly_mul_SS); // RUN_TEST(mpz_poly_mul_naive_KS); // RUN_TEST(mpz_poly_monic_inverse); // RUN_TEST(mpz_poly_monic_inverse); // RUN_TEST(mpz_poly_pseudo_inverse); // RUN_TEST(mpz_poly_monic_div); // RUN_TEST(mpz_poly_pseudo_div); // RUN_TEST(mpz_poly_monic_rem); // RUN_TEST(mpz_poly_pseudo_rem); // RUN_TEST(mpz_poly_monic_div_rem); // RUN_TEST(mpz_poly_pseudo_div_rem); // RUN_TEST(mpz_poly_monic_inverse_naive); // RUN_TEST(mpz_poly_pseudo_inverse_naive); // RUN_TEST(mpz_poly_monic_div_naive); // RUN_TEST(mpz_poly_pseudo_div_naive); // RUN_TEST(mpz_poly_monic_rem_naive); // RUN_TEST(mpz_poly_pseudo_rem_naive); // RUN_TEST(mpz_poly_monic_div_rem_naive); // RUN_TEST(mpz_poly_pseudo_div_rem_naive); // RUN_TEST(mpz_poly_content); // RUN_TEST(mpz_poly_content_ui); // RUN_TEST(mpz_poly_gcd); // RUN_TEST(mpz_poly_xgcd); // RUN_TEST(mpz_poly_max_limbs); // RUN_TEST(mpz_poly_max_bits); printf(all_success ? "\nAll tests passed\n" : "\nAt least one test FAILED!\n"); } int main() { test_support_init(); mpz_poly_test_all(); test_support_cleanup(); flint_stack_cleanup(); return 0; } // *************** end of file flint-1.011/flint.h0000644017361200017500000001006511025357254014012 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /****************************************************************************** flint.h Main header file for FLINT. (C) 2006 William Hart and David Harvey ******************************************************************************/ #ifndef FLINT_H #define FLINT_H #include #include #include #include #include #include "longlong_wrapper.h" #ifdef __cplusplus extern "C" { #endif #if 0 #define FLINT_ASSERT assert #else #define FLINT_ASSERT(zzz_dummy) #endif #ifndef __USE_ISOC99 #define round(x) floor(x + 0.5) #endif #define FLINT_MAX(zzz1, zzz2) ((zzz1) > (zzz2) ? (zzz1) : (zzz2)) #define FLINT_MIN(zzz1, zzz2) ((zzz1) > (zzz2) ? (zzz2) : (zzz1)) #define FLINT_ABS(zzz) ((long)(zzz) < 0 ? (-zzz) : (zzz)) /* FLINT_BITS is the number of bits per limb. */ #if ULONG_MAX == 4294967295U #define FLINT_BITS 32 #define FLINT_D_BITS 32 #define FLINT_LG_BITS_PER_LIMB 5 #define FLINT_BYTES_PER_LIMB 4 #define FLINT_LG_BYTES_PER_LIMB 2 #elif ULONG_MAX == 18446744073709551615U #define FLINT_BITS 64 #define FLINT_D_BITS 53 #define FLINT_LG_BITS_PER_LIMB 6 #define FLINT_BYTES_PER_LIMB 8 #define FLINT_LG_BYTES_PER_LIMB 3 #else // only 32 and 64 bits are supported #error FLINT requires that unsigned long is 32 bits or 64 bits #endif /* Cache hints to speed up reads from data in memory */ #if defined(__GNUC__) #define FLINT_PREFETCH(addr,n) __builtin_prefetch((unsigned long*)addr+n,1,0) #elif defined(_MSC_VER) && _MSC_VER >= 1400 #define FLINT_PREFETCH(addr,n) PreFetchCacheLine(PF_TEMPORAL_LEVEL_1, (unsigned long*)addr+n) #else #define FLINT_PREFETCH(addr,n) /* nothing */ #endif /* Cache size in bytes. */ #define FLINT_CACHE_SIZE 65536 #define FLINT_POL_DIV_1_LENGTH 10 #if FLINT_BITS == 32 #define half_ulong uint16_t #define half_long int16_t #define HALF_FLINT_BITS 16 #else #define half_ulong uint32_t #define half_long int32_t #define HALF_FLINT_BITS 32 #endif #if defined(__GNUC__) #if FLINT_BITS == 64 #define count_lead_zeros(a,b) \ a = __builtin_clzll(b); #define count_trail_zeros(a,b) \ a = __builtin_ctzll(b); #else #define count_lead_zeros(a,b) \ a = __builtin_clzl(b); #define count_trail_zeros(a,b) \ a = __builtin_ctzl(b); #endif #else #error Currently FLINT only compiles with GCC #endif /* On some platforms arithmetic shifts by FLINT_BITS don't yield all zeros So we define these macros for use in situations where this would be a problem */ static inline unsigned long r_shift(unsigned long in, unsigned long shift) { if (shift == FLINT_BITS) return 0L; return (in>>shift); } static inline unsigned long l_shift(unsigned long in, unsigned long shift) { if (shift == FLINT_BITS) return 0L; return (in< #include #include #include "profiler-main.h" #include "flint.h" #include "memory-manager.h" #include "fmpz_poly.h" #include "mpz_poly.h" #include "test-support.h" //============================================================================= // whether to generate signed or unsigned random polys #define SIGNS 0 unsigned long randint(unsigned long randsup) { static unsigned long randval = 4035456057U; randval = ((unsigned long)randval*1025416097U+286824428U)%(unsigned long)4294967291U; return (unsigned long)randval%randsup; } void randpoly(mpz_poly_t pol, unsigned long length, unsigned long maxbits) { unsigned long bits; mpz_t temp; mpz_init(temp); mpz_poly_zero(pol); for (unsigned long i = 0; i < length; i++) { bits = maxbits; if (bits == 0) mpz_set_ui(temp,0); else { mpz_rrandomb(temp, randstate, bits); #if SIGNS if (randint(2)) mpz_neg(temp,temp); #endif } mpz_poly_set_coeff(pol, i, temp); } mpz_clear(temp); } // ============================================================================ /* Calls prof2d_sample(length, bits, NULL) for all length, bits combinations such that length*bits < max_bits, with length and bits spaced out by the given ratio */ void run_triangle(unsigned long max_bits, double ratio) { int max_iter = (int) ceil(log((double) max_bits) / log(ratio)); unsigned long last_length = 0; for (unsigned long i = 0; i <= max_iter; i++) { unsigned long length = (unsigned long) floor(pow(ratio, i)); if (length != last_length) { last_length = length; unsigned long last_bits = 0; for (unsigned long j = 0; j <= max_iter; j++) { unsigned long bits = (unsigned long) floor(pow(ratio, j)); if (bits != last_bits) { last_bits = bits; if (bits * length < max_bits) prof2d_sample(length, bits, NULL); } } } } } // ============================================================================ void sample_fmpz_poly_mul_KS(unsigned long length, unsigned long bits, void* arg, unsigned long count) { unsigned long m = ceil_log2(length); unsigned long output_bits = 2*bits+m; fmpz_poly_t poly1, poly2, poly3; mpz_poly_t r_poly, r_poly2; mpz_poly_init(r_poly); mpz_poly_init(r_poly2); mpz_poly_realloc(r_poly, length); mpz_poly_realloc(r_poly2, length); _fmpz_poly_stack_init(poly1, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(poly2, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(poly3, 2*length-1, (output_bits-1)/FLINT_BITS+1); unsigned long r_count; // how often to generate new random data if (count >= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long i = 0; i < count; i++) { if (i%r_count == 0) { randpoly(r_poly, length, bits); mpz_poly_to_fmpz_poly(poly1, r_poly); randpoly(r_poly2, length, bits); mpz_poly_to_fmpz_poly(poly2, r_poly2); } prof_start(); _fmpz_poly_mul_KS(poly3, poly1, poly2); prof_stop(); } mpz_poly_clear(r_poly); mpz_poly_clear(r_poly2); _fmpz_poly_stack_clear(poly3); _fmpz_poly_stack_clear(poly2); _fmpz_poly_stack_clear(poly1); } char* profDriverString_fmpz_poly_mul_KS(char* params) { return "fmpz_poly_mul_KS over various lengths and various bit sizes.\n" "Parameters are: max bitsize; ratio between consecutive lengths/bitsizes."; } char* profDriverDefaultParams_fmpz_poly_mul_KS() { return "16000000 1.2"; } void profDriver_fmpz_poly_mul_KS(char* params) { unsigned long max_bits; double ratio; sscanf(params, "%ld %lf", &max_bits, &ratio); test_support_init(); prof2d_set_sampler(sample_fmpz_poly_mul_KS); run_triangle(max_bits, ratio); test_support_cleanup(); } // ============================================================================ void sample_fmpz_poly_mul_KS_trunc(unsigned long length, unsigned long bits, void* arg, unsigned long count) { unsigned long m = ceil_log2(length); unsigned long output_bits = 2*bits+m; fmpz_poly_t poly1, poly2, poly3; mpz_poly_t r_poly, r_poly2; mpz_poly_init(r_poly); mpz_poly_init(r_poly2); mpz_poly_realloc(r_poly, length); mpz_poly_realloc(r_poly2, length); _fmpz_poly_stack_init(poly1, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(poly2, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(poly3, 2*length-1, (output_bits-1)/FLINT_BITS+1); unsigned long r_count; // how often to generate new random data if (count >= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long i = 0; i < count; i++) { if (i%r_count == 0) { randpoly(r_poly, length, bits); mpz_poly_to_fmpz_poly(poly1, r_poly); randpoly(r_poly2, length, bits); mpz_poly_to_fmpz_poly(poly2, r_poly2); } prof_start(); _fmpz_poly_mul_KS_trunc(poly3, poly1, poly2, length); prof_stop(); } mpz_poly_clear(r_poly); mpz_poly_clear(r_poly2); _fmpz_poly_stack_clear(poly3); _fmpz_poly_stack_clear(poly2); _fmpz_poly_stack_clear(poly1); } char* profDriverString_fmpz_poly_mul_KS_trunc(char* params) { return "fmpz_poly_mul_KS_trunc over various lengths and various bit sizes.\n" "Parameters are: max bitsize; ratio between consecutive lengths/bitsizes."; } char* profDriverDefaultParams_fmpz_poly_mul_KS_trunc() { return "16000000 1.2"; } void profDriver_fmpz_poly_mul_KS_trunc(char* params) { unsigned long max_bits; double ratio; sscanf(params, "%ld %lf", &max_bits, &ratio); test_support_init(); prof2d_set_sampler(sample_fmpz_poly_mul_KS_trunc); run_triangle(max_bits, ratio); test_support_cleanup(); } // **************************************************************************** void sample_fmpz_poly_mul_SS(unsigned long length, unsigned long bits, void* arg, unsigned long count) { unsigned long m = ceil_log2(length); unsigned long output_bits = 2*bits+m; fmpz_poly_t poly1, poly2, poly3; mpz_poly_t r_poly, r_poly2; mpz_poly_init(r_poly); mpz_poly_init(r_poly2); mpz_poly_realloc(r_poly, length); mpz_poly_realloc(r_poly2, length); _fmpz_poly_stack_init(poly1, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(poly2, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(poly3, 2*length-1, (output_bits-1)/FLINT_BITS+1); unsigned long r_count; // how often to generate new random data if (count >= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long i = 0; i < count; i++) { if (i%r_count == 0) { randpoly(r_poly, length, bits); mpz_poly_to_fmpz_poly(poly1, r_poly); randpoly(r_poly2, length, bits); mpz_poly_to_fmpz_poly(poly2, r_poly2); } prof_start(); _fmpz_poly_mul_SS(poly3, poly1, poly2); prof_stop(); } mpz_poly_clear(r_poly); mpz_poly_clear(r_poly2); _fmpz_poly_stack_clear(poly3); _fmpz_poly_stack_clear(poly2); _fmpz_poly_stack_clear(poly1); } char* profDriverString_fmpz_poly_mul_SS(char* params) { return "fmpz_poly_mul_SS over various lengths and various bit sizes.\n" "Parameters are: max bitsize; ratio between consecutive lengths/bitsizes."; } char* profDriverDefaultParams_fmpz_poly_mul_SS() { return "16000000 1.2"; } void profDriver_fmpz_poly_mul_SS(char* params) { unsigned long max_bits; double ratio; sscanf(params, "%ld %lf", &max_bits, &ratio); test_support_init(); prof2d_set_sampler(sample_fmpz_poly_mul_SS); run_triangle(max_bits, ratio); test_support_cleanup(); } // **************************************************************************** void sample_fmpz_poly_mul_SS_trunc(unsigned long length, unsigned long bits, void* arg, unsigned long count) { unsigned long m = ceil_log2(length); unsigned long output_bits = 2*bits+m; fmpz_poly_t poly1, poly2, poly3; mpz_poly_t r_poly, r_poly2; mpz_poly_init(r_poly); mpz_poly_init(r_poly2); mpz_poly_realloc(r_poly, length); mpz_poly_realloc(r_poly2, length); _fmpz_poly_stack_init(poly1, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(poly2, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(poly3, 2*length-1, (output_bits-1)/FLINT_BITS+1); unsigned long r_count; // how often to generate new random data if (count >= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long i = 0; i < count; i++) { if (i%r_count == 0) { randpoly(r_poly, length, bits); mpz_poly_to_fmpz_poly(poly1, r_poly); randpoly(r_poly2, length, bits); mpz_poly_to_fmpz_poly(poly2, r_poly2); } prof_start(); _fmpz_poly_mul_SS_trunc(poly3, poly1, poly2, length); prof_stop(); } mpz_poly_clear(r_poly); mpz_poly_clear(r_poly2); _fmpz_poly_stack_clear(poly3); _fmpz_poly_stack_clear(poly2); _fmpz_poly_stack_clear(poly1); } char* profDriverString_fmpz_poly_mul_SS_trunc(char* params) { return "fmpz_poly_mul_SS_trunc over various lengths and various bit sizes.\n" "Parameters are: max bitsize; ratio between consecutive lengths/bitsizes."; } char* profDriverDefaultParams_fmpz_poly_mul_SS_trunc() { return "16000000 1.2"; } void profDriver_fmpz_poly_mul_SS_trunc(char* params) { unsigned long max_bits; double ratio; sscanf(params, "%ld %lf", &max_bits, &ratio); test_support_init(); prof2d_set_sampler(sample_fmpz_poly_mul_SS_trunc); run_triangle(max_bits, ratio); test_support_cleanup(); } // ============================================================================ void sample_fmpz_poly_mul_karatsuba(unsigned long length, unsigned long bits, void* arg, unsigned long count) { unsigned long m = ceil_log2(length); unsigned long output_bits = 2*bits+m; fmpz_poly_t poly1, poly2, poly3; mpz_poly_t r_poly, r_poly2; mpz_poly_init(r_poly); mpz_poly_init(r_poly2); mpz_poly_realloc(r_poly, length); mpz_poly_realloc(r_poly2, length); _fmpz_poly_stack_init(poly1, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(poly2, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(poly3, 2*length-1, (output_bits-1)/FLINT_BITS+1); unsigned long r_count; // how often to generate new random data if (count >= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long i = 0; i < count; i++) { if (i%r_count == 0) { randpoly(r_poly, length, bits); mpz_poly_to_fmpz_poly(poly1, r_poly); randpoly(r_poly2, length, bits); mpz_poly_to_fmpz_poly(poly2, r_poly2); } prof_start(); _fmpz_poly_mul_karatsuba(poly3, poly1, poly2); prof_stop(); } mpz_poly_clear(r_poly); mpz_poly_clear(r_poly2); _fmpz_poly_stack_clear(poly3); _fmpz_poly_stack_clear(poly2); _fmpz_poly_stack_clear(poly1); } char* profDriverString_fmpz_poly_mul_karatsuba(char* params) { return "fmpz_poly_mul_karatsuba over various lengths and various bit sizes.\n" "Parameters are: max bitsize; ratio between consecutive lengths/bitsizes."; } char* profDriverDefaultParams_fmpz_poly_mul_karatsuba() { return "300000 1.2"; } void profDriver_fmpz_poly_mul_karatsuba(char* params) { unsigned long max_bits; double ratio; sscanf(params, "%ld %lf", &max_bits, &ratio); test_support_init(); prof2d_set_sampler(sample_fmpz_poly_mul_karatsuba); run_triangle(max_bits, ratio); test_support_cleanup(); } // ============================================================================ void sample_fmpz_poly_mul_karatsuba_trunc(unsigned long length, unsigned long bits, void* arg, unsigned long count) { unsigned long m = ceil_log2(length); unsigned long output_bits = 2*bits+m; fmpz_poly_t poly1, poly2, poly3; mpz_poly_t r_poly, r_poly2; mpz_poly_init(r_poly); mpz_poly_init(r_poly2); mpz_poly_realloc(r_poly, length); mpz_poly_realloc(r_poly2, length); _fmpz_poly_stack_init(poly1, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(poly2, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(poly3, 2*length-1, (output_bits-1)/FLINT_BITS+1); unsigned long r_count; // how often to generate new random data if (count >= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long i = 0; i < count; i++) { if (i%r_count == 0) { randpoly(r_poly, length, bits); mpz_poly_to_fmpz_poly(poly1, r_poly); randpoly(r_poly2, length, bits); mpz_poly_to_fmpz_poly(poly2, r_poly2); } prof_start(); _fmpz_poly_mul_karatsuba_trunc(poly3, poly1, poly2, length); prof_stop(); } mpz_poly_clear(r_poly); mpz_poly_clear(r_poly2); _fmpz_poly_stack_clear(poly3); _fmpz_poly_stack_clear(poly2); _fmpz_poly_stack_clear(poly1); } char* profDriverString_fmpz_poly_mul_karatsuba_trunc(char* params) { return "fmpz_poly_mul_karatsuba_trunc over various lengths and various bit sizes.\n" "Parameters are: max bitsize; ratio between consecutive lengths/bitsizes."; } char* profDriverDefaultParams_fmpz_poly_mul_karatsuba_trunc() { return "300000 1.2"; } void profDriver_fmpz_poly_mul_karatsuba_trunc(char* params) { unsigned long max_bits; double ratio; sscanf(params, "%ld %lf", &max_bits, &ratio); test_support_init(); prof2d_set_sampler(sample_fmpz_poly_mul_karatsuba_trunc); run_triangle(max_bits, ratio); test_support_cleanup(); } // ============================================================================ char* profDriverString_fmpz_poly_mul_karatsuba_len(char* params) { return "fmpz_poly_mul_karatsuba over various lengths with fixed bitsize.\n" "Parameters are: none."; } char* profDriverDefaultParams_fmpz_poly_mul_karatsuba_len() { return ""; } void profDriver_fmpz_poly_mul_karatsuba_len(char* params) { unsigned long max_bits; double ratio; test_support_init(); prof2d_set_sampler(sample_fmpz_poly_mul_karatsuba); for (unsigned long length = 1; length < 128; length++) prof2d_sample(length, 100, NULL); test_support_cleanup(); } // ============================================================================ void sample_fmpz_poly_mul(unsigned long length, unsigned long bits, void* arg, unsigned long count) { unsigned long m = ceil_log2(length); unsigned long output_bits = 2*bits+m; fmpz_poly_t poly1, poly2, poly3; mpz_poly_t r_poly, r_poly2; mpz_poly_init(r_poly); mpz_poly_init(r_poly2); mpz_poly_realloc(r_poly, length); mpz_poly_realloc(r_poly2, length); _fmpz_poly_stack_init(poly1, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(poly2, length, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(poly3, 2*length-1, (output_bits-1)/FLINT_BITS+1); unsigned long r_count; // how often to generate new random data if (count >= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long i = 0; i < count; i++) { if (i%r_count == 0) { randpoly(r_poly, length, bits); mpz_poly_to_fmpz_poly(poly1, r_poly); randpoly(r_poly2, length, bits); mpz_poly_to_fmpz_poly(poly2, r_poly2); } prof_start(); _fmpz_poly_mul(poly3, poly1, poly2); prof_stop(); } mpz_poly_clear(r_poly); mpz_poly_clear(r_poly2); _fmpz_poly_stack_clear(poly3); _fmpz_poly_stack_clear(poly2); _fmpz_poly_stack_clear(poly1); } char* profDriverString_fmpz_poly_mul(char* params) { return "fmpz_poly_mul over various lengths and various bit sizes.\n" "Parameters are: max bitsize; ratio between consecutive lengths/bitsizes."; } char* profDriverDefaultParams_fmpz_poly_mul() { return "16000000 1.2"; } void profDriver_fmpz_poly_mul(char* params) { unsigned long max_bits; double ratio; sscanf(params, "%ld %lf", &max_bits, &ratio); test_support_init(); prof2d_set_sampler(sample_fmpz_poly_mul); run_triangle(max_bits, ratio); test_support_cleanup(); } char* profDriverString_fmpz_poly_mul_specific(char* params) { return "fmpz_poly_mul for a specific length and bitsize.\n" "Parameters are: length, bitsize."; } char* profDriverDefaultParams_fmpz_poly_mul_specific() { return "1024 1024"; } void profDriver_fmpz_poly_mul_specific(char* params) { unsigned long length; unsigned long bits; sscanf(params, "%ld %ld", &length, &bits); test_support_init(); prof2d_set_sampler(sample_fmpz_poly_mul); prof2d_sample(length, bits, NULL); test_support_cleanup(); } // ============================================================================ /* this function samples multiplying polynomials of lengths len1 and len2 using fmpz_poly_mul_karatsuba arg should point to an unsigned long, giving the coefficient bitlengths */ void sample_fmpz_poly_mul_karatsuba_mixlengths( unsigned long len1, unsigned long len2, void* arg, unsigned long count) { unsigned long bits = *(unsigned long*) arg; unsigned long m = ceil_log2(len1 + len2); unsigned long output_bits = 2*bits + 2 + m; mpz_poly_t poly1, poly2; mpz_poly_init(poly1); mpz_poly_init(poly2); mpz_t x; mpz_init(x); for (unsigned long i = 0; i < len1; i++) { mpz_urandomb(x, randstate, bits); if (random_ulong(2)) mpz_neg(x, x); mpz_poly_set_coeff(poly1, i, x); } for (unsigned long i = 0; i < len2; i++) { mpz_urandomb(x, randstate, bits); if (random_ulong(2)) mpz_neg(x, x); mpz_poly_set_coeff(poly2, i, x); } mpz_clear(x); fmpz_poly_t fpoly1, fpoly2, fpoly3; _fmpz_poly_stack_init(fpoly1, len1, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(fpoly2, len2, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(fpoly3, len1 + len2 - 1, (output_bits-1)/FLINT_BITS+1); mpz_poly_to_fmpz_poly(fpoly1, poly1); mpz_poly_to_fmpz_poly(fpoly2, poly2); prof_start(); for (unsigned long i = 0; i < count; i++) _fmpz_poly_mul_karatsuba(fpoly3, fpoly1, fpoly2); prof_stop(); _fmpz_poly_stack_clear(fpoly3); _fmpz_poly_stack_clear(fpoly2); _fmpz_poly_stack_clear(fpoly1); mpz_poly_clear(poly2); mpz_poly_clear(poly1); } char* profDriverString_fmpz_poly_mul_karatsuba_mixlengths(char* params) { return "fmpz_poly_mul_karatubsa for distinct input lengths and fixed\n" "coefficient size. Parameters are: max length; length skip; coefficient size (in bits)\n"; } char* profDriverDefaultParams_fmpz_poly_mul_karatsuba_mixlengths() { return "50 1 100"; } void profDriver_fmpz_poly_mul_karatsuba_mixlengths(char* params) { unsigned long max_length, skip, bits; sscanf(params, "%ld %ld %ld", &max_length, &skip, &bits); prof2d_set_sampler(sample_fmpz_poly_mul_karatsuba_mixlengths); test_support_init(); for (unsigned long len1 = skip; len1 <= max_length; len1 += skip) for (unsigned long len2 = skip; len2 <= len1; len2 += skip) prof2d_sample(len1, len2, &bits); test_support_cleanup(); } // ============================================================================ /* this function samples multiplying polynomials of lengths len1 and len2 using fmpz_poly_mul_karatsuba arg should point to an unsigned long, giving the coefficient bitlengths */ void sample_fmpz_poly_mul_karatsuba_mixlengths2( unsigned long len1, unsigned long len2, void* arg, unsigned long count) { unsigned long bits = *(unsigned long*) arg; unsigned long m = ceil_log2(len1 + len2); unsigned long output_bits = 2*bits + 2 + m; mpz_poly_t poly1, poly2; mpz_poly_init(poly1); mpz_poly_init(poly2); mpz_t x; mpz_init(x); for (unsigned long i = 0; i < len1; i++) { mpz_urandomb(x, randstate, bits); if (random_ulong(2)) mpz_neg(x, x); mpz_poly_set_coeff(poly1, i, x); } for (unsigned long i = 0; i < len2; i++) { mpz_urandomb(x, randstate, bits); if (random_ulong(2)) mpz_neg(x, x); mpz_poly_set_coeff(poly2, i, x); } mpz_clear(x); fmpz_poly_t fpoly1, fpoly2, fpoly3; _fmpz_poly_stack_init(fpoly1, len1, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(fpoly2, len2, (bits-1)/FLINT_BITS+1); _fmpz_poly_stack_init(fpoly3, len1 + len2 - 1, (output_bits-1)/FLINT_BITS+1); mpz_poly_to_fmpz_poly(fpoly1, poly1); mpz_poly_to_fmpz_poly(fpoly2, poly2); unsigned long limbs = fpoly3->limbs; unsigned long log_length = 0; unsigned long crossover; fmpz_poly_t scratch, scratchb, temp; scratch->coeffs = (mp_limb_t *) flint_stack_alloc(5*FLINT_MAX(fpoly1->length,fpoly2->length)*(limbs+1)); scratch->limbs = limbs; scratchb->limbs = FLINT_MAX(fpoly1->limbs,fpoly2->limbs)+1; scratchb->coeffs = (mp_limb_t *) flint_stack_alloc(5*FLINT_MAX(fpoly1->length,fpoly2->length)*(scratchb->limbs+1)); crossover = 19 - _fmpz_poly_max_limbs(fpoly1) - _fmpz_poly_max_limbs(fpoly2); if (fpoly1->length >= fpoly2->length) { prof_start(); for (unsigned long i = 0; i < count; i++) __fmpz_poly_karamul_recursive(fpoly3, fpoly1, fpoly2, scratch, scratchb, crossover); prof_stop(); } else { prof_start(); for (unsigned long i = 0; i < count; i++) __fmpz_poly_karamul_recursive(fpoly3, fpoly2, fpoly1, scratch, scratchb, crossover); prof_stop(); } flint_stack_release(); flint_stack_release(); _fmpz_poly_stack_clear(fpoly3); _fmpz_poly_stack_clear(fpoly2); _fmpz_poly_stack_clear(fpoly1); mpz_poly_clear(poly2); mpz_poly_clear(poly1); } char* profDriverString_fmpz_poly_mul_karatsuba_mixlengths2(char* params) { return "fmpz_poly_mul_karatubsa for distinct input lengths and fixed\n" "coefficient size. Parameters are: max length; length skip; coefficient size (in bits)\n"; } char* profDriverDefaultParams_fmpz_poly_mul_karatsuba_mixlengths2() { return "50 1 100"; } void profDriver_fmpz_poly_mul_karatsuba_mixlengths2(char* params) { unsigned long max_length, skip, bits; sscanf(params, "%ld %ld %ld", &max_length, &skip, &bits); prof2d_set_sampler(sample_fmpz_poly_mul_karatsuba_mixlengths2); test_support_init(); for (unsigned long len1 = skip; len1 <= max_length; len1 += skip) for (unsigned long len2 = skip; len2 <= len1; len2 += skip) prof2d_sample(len1, len2, &bits); test_support_cleanup(); } // ============================================================================ void sample_fmpz_poly_div_mulders(unsigned long length, unsigned long bits, void* arg, unsigned long count) { mpz_poly_t test_poly, test_poly2; fmpz_poly_t test_mpn_poly, test_mpn_poly2, test_mpn_poly3, test_mpn_poly4; unsigned long bits2, length2, r_count; mpz_poly_init(test_poly); mpz_poly_init(test_poly2); if (count >= 10000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 4; else if (count >= 8) r_count = 2; else r_count = 1; fmpz_poly_init2(test_mpn_poly, 1, (bits-1)/FLINT_BITS+1); fmpz_poly_init2(test_mpn_poly2, 1, (bits2-1)/FLINT_BITS+1); length2 = length; bits2 = bits; for (unsigned long count1 = 0; count1 < count ; count1++) { if (count1 % r_count == 0) { do { randpoly(test_poly, length, bits); fmpz_poly_realloc(test_mpn_poly, length); mpz_poly_to_fmpz_poly(test_mpn_poly, test_poly); _fmpz_poly_normalise(test_mpn_poly); } while (test_mpn_poly->length == 0); randpoly(test_poly2, length2, bits2); fmpz_poly_realloc(test_mpn_poly2, length2); mpz_poly_to_fmpz_poly(test_mpn_poly2, test_poly2); } fmpz_poly_init(test_mpn_poly3); fmpz_poly_mul(test_mpn_poly3, test_mpn_poly, test_mpn_poly2); fmpz_poly_init(test_mpn_poly4); prof_start(); for (unsigned long i = 0; i < r_count; i++) fmpz_poly_div_mulders(test_mpn_poly4, test_mpn_poly3, test_mpn_poly); prof_stop(); count1+=(r_count-1); fmpz_poly_clear(test_mpn_poly3); fmpz_poly_clear(test_mpn_poly4); } fmpz_poly_clear(test_mpn_poly); fmpz_poly_clear(test_mpn_poly2); mpz_poly_clear(test_poly); mpz_poly_clear(test_poly2); } char* profDriverString_fmpz_poly_div_mulders(char* params) { return "fmpz_poly_div_mulders over various lengths and various bit sizes.\n" "Parameters are: max bitsize; ratio between consecutive lengths/bitsizes."; } char* profDriverDefaultParams_fmpz_poly_div_mulders() { return "1000000 1.2"; } void profDriver_fmpz_poly_div_mulders(char* params) { unsigned long max_bits; double ratio; sscanf(params, "%ld %lf", &max_bits, &ratio); test_support_init(); prof2d_set_sampler(sample_fmpz_poly_div_mulders); run_triangle(max_bits, ratio); test_support_cleanup(); } //============================================================================ void sample_fmpz_poly_gcd_subresultant(unsigned long length, unsigned long bits, void* arg, unsigned long count) { unsigned long m = ceil_log2(length); unsigned long output_bits = 2*bits+m; fmpz_poly_t poly1, poly2, poly3, poly4; mpz_poly_t r_poly, r_poly2, r_poly3; mpz_poly_init(r_poly); mpz_poly_init(r_poly2); mpz_poly_init(r_poly3); mpz_poly_realloc(r_poly, length); mpz_poly_realloc(r_poly2, length); mpz_poly_realloc(r_poly3, length); fmpz_poly_init(poly1); fmpz_poly_init(poly2); fmpz_poly_init(poly3); fmpz_poly_init(poly4); unsigned long r_count; // how often to generate new random data if (count >= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long i = 0; i < count; i++) { if (i%r_count == 0) { randpoly(r_poly, length, bits); mpz_poly_to_fmpz_poly(poly1, r_poly); randpoly(r_poly2, length, bits); mpz_poly_to_fmpz_poly(poly2, r_poly2); randpoly(r_poly3, length, bits); mpz_poly_to_fmpz_poly(poly3, r_poly3); fmpz_poly_mul(poly1, poly1, poly3); fmpz_poly_mul(poly2, poly2, poly3); } prof_start(); fmpz_poly_gcd_subresultant(poly4, poly1, poly2); prof_stop(); } mpz_poly_clear(r_poly); mpz_poly_clear(r_poly2); mpz_poly_clear(r_poly3); fmpz_poly_clear(poly4); fmpz_poly_clear(poly3); fmpz_poly_clear(poly2); fmpz_poly_clear(poly1); } char* profDriverString_fmpz_poly_gcd_subresultant(char* params) { return "fmpz_poly_gcd_subresultant over various lengths and various bit sizes.\n" "Parameters are: max bitsize; ratio between consecutive lengths/bitsizes."; } char* profDriverDefaultParams_fmpz_poly_gcd_subresultant() { return "100000 1.2"; } void profDriver_fmpz_poly_gcd_subresultant(char* params) { unsigned long max_bits; double ratio; sscanf(params, "%ld %lf", &max_bits, &ratio); test_support_init(); prof2d_set_sampler(sample_fmpz_poly_gcd_subresultant); run_triangle(max_bits, ratio); test_support_cleanup(); } //============================================================================ void sample_fmpz_poly_gcd_modular(unsigned long length, unsigned long bits, void* arg, unsigned long count) { unsigned long m = ceil_log2(length); unsigned long output_bits = 2*bits+m; fmpz_poly_t poly1, poly2, poly3, poly4; mpz_poly_t r_poly, r_poly2, r_poly3; mpz_poly_init(r_poly); mpz_poly_init(r_poly2); mpz_poly_init(r_poly3); mpz_poly_realloc(r_poly, length); mpz_poly_realloc(r_poly2, length); mpz_poly_realloc(r_poly3, length); fmpz_poly_init(poly1); fmpz_poly_init(poly2); fmpz_poly_init(poly3); fmpz_poly_init(poly4); unsigned long r_count; // how often to generate new random data if (count >= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long i = 0; i < count; i++) { if (i%r_count == 0) { randpoly(r_poly, length, bits); mpz_poly_to_fmpz_poly(poly1, r_poly); randpoly(r_poly2, length, bits); mpz_poly_to_fmpz_poly(poly2, r_poly2); randpoly(r_poly3, length, bits); mpz_poly_to_fmpz_poly(poly3, r_poly3); fmpz_poly_mul(poly1, poly1, poly3); fmpz_poly_mul(poly2, poly2, poly3); } prof_start(); fmpz_poly_gcd_modular(poly4, poly1, poly2); prof_stop(); } mpz_poly_clear(r_poly); mpz_poly_clear(r_poly2); mpz_poly_clear(r_poly3); fmpz_poly_clear(poly4); fmpz_poly_clear(poly3); fmpz_poly_clear(poly2); fmpz_poly_clear(poly1); } char* profDriverString_fmpz_poly_gcd_modular(char* params) { return "fmpz_poly_gcd_modular over various lengths and various bit sizes.\n" "Parameters are: max bitsize; ratio between consecutive lengths/bitsizes."; } char* profDriverDefaultParams_fmpz_poly_gcd_modular() { return "100000 1.2"; } void profDriver_fmpz_poly_gcd_modular(char* params) { unsigned long max_bits; double ratio; sscanf(params, "%ld %lf", &max_bits, &ratio); test_support_init(); prof2d_set_sampler(sample_fmpz_poly_gcd_modular); run_triangle(max_bits, ratio); test_support_cleanup(); } //============================================================================ void sample_fmpz_poly_gcd(unsigned long length, unsigned long bits, void* arg, unsigned long count) { unsigned long m = ceil_log2(length); unsigned long output_bits = 2*bits+m; fmpz_poly_t poly1, poly2, poly3, poly4; mpz_poly_t r_poly, r_poly2, r_poly3; mpz_poly_init(r_poly); mpz_poly_init(r_poly2); mpz_poly_init(r_poly3); mpz_poly_realloc(r_poly, length); mpz_poly_realloc(r_poly2, length); mpz_poly_realloc(r_poly3, length); fmpz_poly_init(poly1); fmpz_poly_init(poly2); fmpz_poly_init(poly3); fmpz_poly_init(poly4); unsigned long r_count; // how often to generate new random data if (count >= 1000) r_count = 100; else if (count >= 100) r_count = 10; else if (count >= 20) r_count = 5; else if (count >= 8) r_count = 2; else r_count = 1; for (unsigned long i = 0; i < count; i++) { if (i%r_count == 0) { randpoly(r_poly, length, bits); mpz_poly_to_fmpz_poly(poly1, r_poly); randpoly(r_poly2, length, bits); mpz_poly_to_fmpz_poly(poly2, r_poly2); randpoly(r_poly3, length, bits); mpz_poly_to_fmpz_poly(poly3, r_poly3); fmpz_poly_mul(poly1, poly1, poly3); fmpz_poly_mul(poly2, poly2, poly3); } prof_start(); fmpz_poly_gcd(poly4, poly1, poly2); prof_stop(); } mpz_poly_clear(r_poly); mpz_poly_clear(r_poly2); mpz_poly_clear(r_poly3); fmpz_poly_clear(poly4); fmpz_poly_clear(poly3); fmpz_poly_clear(poly2); fmpz_poly_clear(poly1); } char* profDriverString_fmpz_poly_gcd(char* params) { return "fmpz_poly_gcd over various lengths and various bit sizes.\n" "Parameters are: max bitsize; ratio between consecutive lengths/bitsizes."; } char* profDriverDefaultParams_fmpz_poly_gcd() { return "100000 1.2"; } void profDriver_fmpz_poly_gcd(char* params) { unsigned long max_bits; double ratio; sscanf(params, "%ld %lf", &max_bits, &ratio); test_support_init(); prof2d_set_sampler(sample_fmpz_poly_gcd); run_triangle(max_bits, ratio); test_support_cleanup(); } // end of file **************************************************************** flint-1.011/NTL-interface.h0000644017361200017500000000363311025357254015274 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** NTL-interface.h: Header file for NTL-interface.cpp Copyright (C) 2007, William Hart *****************************************************************************/ #ifndef FLINT_NTL_INT_H #define FLINT_NTL_INT_H #include #include #include "flint.h" #include "fmpz.h" #include "fmpz_poly.h" NTL_CLIENT /* Returns the number of limbs taken up by an NTL ZZ */ unsigned long ZZ_limbs(const ZZ& z); /* Convert an NTL ZZ to an fmpz_t Assumes the fmpz_t has already been allocated to have sufficient space */ void ZZ_to_fmpz(fmpz_t output, const ZZ& z); /* Convert an fmpz_t to an NTL ZZ */ void fmpz_to_ZZ(ZZ& output, const fmpz_t z); /* Convert an fmpz_poly_t to an NTL ZZX */ void fmpz_poly_to_ZZX(ZZX& output, const fmpz_poly_t poly); /* Convert an NTL ZZX to an fmpz_poly_t */ void ZZX_to_fmpz_poly(fmpz_poly_t output, const ZZX& poly); #endif flint-1.011/ZmodF_poly.h0000644017361200017500000002604711025357254014767 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** ZmodF_poly.h Polynomials over Z/pZ, where p = the Fermat number B^n + 1, where B = 2^FLINT_BITS. Routines for truncated Schoenhage-Strassen FFTs and convolutions. Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #ifndef FLINT_ZMODFPOLY_H #define FLINT_ZMODFPOLY_H #ifdef __cplusplus extern "C" { #endif #include #include #include #include "memory-manager.h" #include "ZmodF.h" /**************************************************************************** ZmodF_poly_t ----------- ZmodF_poly_t represents a polynomial with coefficients in Z/pZ, where p = B^n + 1, B = 2^FLINT_BITS. Coefficients are represented in the format described in ZmodF.h. Each polynomial has a fixed transform length 2^depth, specified at creation time, where depth >= 0. A polynomial may be in either "coefficient representation" (list of coefficients of the polynomial), or "fourier representation" (list of fourier coefficients). The polynomial does not keep track of which form it is in, this is just a conceptual distinction. x.length indicates how many coefficients contain meaningful data. If x is in coefficient representation, the remaining coefficients are assumed to be *zero*. If x is in fourier representation, the remaining coefficients are not necessarily zero, they are simply *unknown*. Always 0 <= length <= 2^depth. Each polynomial carries a number of additional scratch buffers. The number of scratch buffers is set at creation time. Various routines require a certain number of scratch buffers to be present. The scratch buffers and coefficient buffers are allocated as one large block, and routines may *permute* them, so that outputs may well end up in what was originally a scratch buffer. */ typedef struct { unsigned long depth; unsigned long n; unsigned long length; // Single chunk of memory where all coefficients live. mp_limb_t* storage; // Array of pointers to coefficients (length 2^depth). ZmodF_t* coeffs; // Array of pointers to scratch buffers (length scratch_count). unsigned long scratch_count; ZmodF_t* scratch; } ZmodF_poly_struct; // ZmodF_poly_t allows reference-like semantics for ZpolyFPoly_struct: typedef ZmodF_poly_struct ZmodF_poly_t[1]; typedef ZmodF_poly_struct * ZmodF_poly_p; /**************************************************************************** Memory Management Routines ****************************************************************************/ /* Initialises a ZmodF_poly_t with supplied parameters, and length = 0. Coefficients are not zeroed out. */ void ZmodF_poly_init(ZmodF_poly_t poly, unsigned long depth, unsigned long n, unsigned long scratch_count); void ZmodF_poly_stack_init(ZmodF_poly_t poly, unsigned long depth, unsigned long n, unsigned long scratch_count); /* Frees resources for the given polynomial. */ void ZmodF_poly_clear(ZmodF_poly_t poly); void ZmodF_poly_stack_clear(ZmodF_poly_t poly); /* Decrease the number of limbs n that are meaningful in a ZmodF_poly_t. The actual number of limbs allocated remains the same, only the field n is adjusted. */ static inline void ZmodF_poly_decrease_n(ZmodF_poly_t poly, unsigned long n) { FLINT_ASSERT(n <= poly->n); poly->n = n; } /**************************************************************************** Basic Arithmetic Routines ****************************************************************************/ /* Sets x := y. Only y.length coefficients are copied. PRECONDITIONS: x and y must have compatible dimensions. */ void ZmodF_poly_set(ZmodF_poly_t x, ZmodF_poly_t y); /* Sets res := pointwise product of x and y mod p. Only coefficients up to x.length are multiplied. PRECONDITIONS: Any combination of aliasing among res, x, y is allowed. x, y, res must have compatible dimensions. x and y must have the same length. NOTE: This function normalises the coefficients before multiplying. */ void ZmodF_poly_pointwise_mul(ZmodF_poly_t res, ZmodF_poly_t x, ZmodF_poly_t y); /* Sets res := x + y mod p. Only coefficients up to x.length are added. PRECONDITIONS: Any combination of aliasing among res, x, y is allowed. x, y, res must have compatible dimensions. x and y must have the same length. NOTE: This function does *not* normalise before subtracting. Be careful with the overflow limb. */ void ZmodF_poly_add(ZmodF_poly_t res, ZmodF_poly_t x, ZmodF_poly_t y); /* Sets res := x - y mod p. Only coefficients up to x.length are subtracted. PRECONDITIONS: Any combination of aliasing among res, x, y is allowed. x, y, res must have compatible dimensions. x and y must have the same length. NOTE: This function does *not* normalise before subtracting. Be careful with the overflow limb. */ void ZmodF_poly_sub(ZmodF_poly_t res, ZmodF_poly_t x, ZmodF_poly_t y); /* Normalises all coefficients (up to x.length) to be in the range [0, p). */ void ZmodF_poly_normalise(ZmodF_poly_t poly); /* Divides all coefficients by 2^depth mod p. This should be used after running an inverse fourier transform. */ void ZmodF_poly_rescale(ZmodF_poly_t poly); /* Divides _trunc_ coefficients by 2^depth mod p. This can be used after running an inverse fourier transform of one only wants the first trunc coefficients. */ void ZmodF_poly_rescale_range(ZmodF_poly_t poly, unsigned long start, unsigned long n); /**************************************************************************** Fourier Transform Routines For the following routines, 2^depth must divide 4*n*FLINT_BITS. This ensures that Z/pZ has enough roots of unity. ****************************************************************************/ /* This is the threshold for switching from a plain iterative FFT to an FFT factoring algorithm. It should be set to about the number of limbs in L1 cache. */ //#define ZMODFPOLY_FFT_FACTOR_THRESHOLD 7500 #define ZMODFPOLY_FFT_FACTOR_THRESHOLD 7000 /* Converts from coefficient representation to fourier representation. "length" is the desired number of fourier coefficients; x.length is set to length when finished. Output is inplace. (Note that in general *all* 2^depth coefficients will get overwritten in intermediate steps.) PRECONDITIONS: 0 <= length <= 2^poly.depth poly.scratch_count >= 1 */ void ZmodF_poly_FFT(ZmodF_poly_t poly, unsigned long length); /* Converts from fourier representation to coefficient representation. It *assumes* that the supplied fourier coefficients are actually the fourier transform of a polynomial whose coefficients beyond x.length are all zero. Result is inplace, x.length is not modified. (Note: after it's finished, the coefficients beyond x.length will contain garbage.) The output will be a factor of 2^depth too big. See ZmodF_poly_rescale(). PRECONDITIONS: poly.scratch_count >= 1 */ void ZmodF_poly_IFFT(ZmodF_poly_t poly); /* Computes convolution of x and y, places result in res. The resulting length will be x.length + y.length - 1. If this is more than 2^depth, then the resulting length is 2^depth, and the convolution is actually cyclic of length 2^depth. PRECONDITIONS: Any combination of aliasing among res, x, y is allowed. x, y, res must have compatible dimensions. NOTE: x and y will both be converted to fourier representation. If you don't like it, make a copy first. PRECONDITIONS: x.scratch_count >= 1 y.scratch_count >= 1 res.scratch_count >= 1 */ void ZmodF_poly_convolution(ZmodF_poly_t res, ZmodF_poly_t x, ZmodF_poly_t y); void ZmodF_poly_convolution_range(ZmodF_poly_t res, ZmodF_poly_t x, ZmodF_poly_t y, unsigned long start, unsigned long n); // internal functions void _ZmodF_poly_FFT_iterative( ZmodF_t* x, unsigned long depth, unsigned long skip, unsigned long nonzero, unsigned long length, unsigned long twist, unsigned long n, ZmodF_t* scratch); void _ZmodF_poly_FFT_factor( ZmodF_t* x, unsigned long rows_depth, unsigned long cols_depth, unsigned long skip, unsigned long nonzero, unsigned long length, unsigned long twist, unsigned long n, ZmodF_t* scratch); void _ZmodF_poly_FFT(ZmodF_t* x, unsigned long depth, unsigned long skip, unsigned long nonzero, unsigned long length, unsigned long twist, unsigned long n, ZmodF_t* scratch); void _ZmodF_poly_IFFT_recursive( ZmodF_t* x, unsigned long depth, unsigned long skip, unsigned long nonzero, unsigned long length, int extra, unsigned long twist, unsigned long n, ZmodF_t* scratch); void _ZmodF_poly_IFFT_iterative( ZmodF_t* x, unsigned long depth, unsigned long skip, unsigned long twist, unsigned long n, ZmodF_t* scratch); void _ZmodF_poly_IFFT(ZmodF_t* x, unsigned long depth, unsigned long skip, unsigned long nonzero, unsigned long length, int extra, unsigned long twist, unsigned long n, ZmodF_t* scratch); /**************************************************************************** Negacyclic Fourier Transform Routines For the following routines, 2^(depth+1) must divide 4*n*FLINT_BITS. This ensures that Z/pZ has enough roots of unity. These routines are exactly the same as those listed in the previous section, except that they evaluate at w^(2k+1), where w is a 2^(depth+1)-th root of unity. ****************************************************************************/ void ZmodF_poly_negacyclic_FFT(ZmodF_poly_t poly); void ZmodF_poly_negacyclic_IFFT(ZmodF_poly_t poly); void ZmodF_poly_negacyclic_convolution(ZmodF_poly_t res, ZmodF_poly_t x, ZmodF_poly_t y); #ifdef __cplusplus } #endif #endif // end of file **************************************************************** flint-1.011/fmpz-test.c0000644017361200017500000011275511025357254014633 0ustar tabbotttabbott/*============================================================================ This file is part of FLINT. FLINT is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. FLINT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FLINT; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ===============================================================================*/ /**************************************************************************** fmpz-test.c: Test code for fmpz.c and fmpz.h Copyright (C) 2007, William Hart and David Harvey *****************************************************************************/ #include #include #include #include "flint.h" #include "test-support.h" #include "fmpz.h" #define SIGNS 1 #define DEBUG 0 // prints debug information #define DEBUG2 1 gmp_randstate_t state; #define RUN_TEST(targetfunc) \ printf("Testing " #targetfunc "()... "); \ fflush(stdout); \ success = test_##targetfunc(); \ all_success = all_success && success; \ printf(success ? "ok\n" : "FAIL!\n"); int test_fmpz_convert() { mpz_t num1, num2; fmpz_t fnum1; unsigned long bits; int result = 1; mpz_init(num1); mpz_init(num2); for (unsigned long i = 0; (i < 100000) && (result == 1); i++) { bits = random_ulong(1000); #if DEBUG printf("Bits = %ld\n", bits); #endif fnum1 = fmpz_init(FLINT_MAX((long)(bits-1)/FLINT_BITS,0L)+1); mpz_rrandomb(num1, state, bits); #if SIGNS if (random_ulong(2)) mpz_neg(num1, num1); #endif mpz_to_fmpz(fnum1, num1); fmpz_check_normalisation(fnum1); fmpz_to_mpz(num2, fnum1); fmpz_clear(fnum1); result = (mpz_cmp(num1, num2) == 0); } mpz_clear(num1); mpz_clear(num2); return result; } int test_fmpz_size() { mpz_t num1, num2; fmpz_t fnum1; unsigned long bits; int result = 1; mpz_init(num1); mpz_init(num2); for (unsigned long i = 0; (i < 100000) && (result == 1); i++) { bits = random_ulong(1000); fnum1 = fmpz_init(FLINT_MAX((long)(bits-1)/FLINT_BITS,0)+1); mpz_rrandomb(num1, state, bits); #if SIGNS if (random_ulong(2)) mpz_neg(num1, num1); #endif mpz_to_fmpz(fnum1, num1); result = (mpz_size(num1) == fmpz_size(fnum1)); fmpz_clear(fnum1); } mpz_clear(num1); mpz_clear(num2); return result; } int test_fmpz_bits() { mpz_t num1, num2; fmpz_t fnum1; unsigned long bits; int result = 1; mpz_init(num1); mpz_init(num2); for (unsigned long i = 0; (i < 100000) && (result == 1); i++) { bits = random_ulong(1000); fnum1 = fmpz_init(FLINT_MAX((long)(bits-1)/FLINT_BITS,0)+1); mpz_rrandomb(num1, state, bits); #if SIGNS if (random_ulong(2)) mpz_neg(num1, num1); #endif mpz_to_fmpz(fnum1, num1); result = (mpz_sizeinbase(num1, 2) == fmpz_bits(fnum1)) || ((mpz_cmp_ui(num1, 0) == 0) && (fmpz_bits(fnum1) == 0)); #if DEBUG2 if (!result) { printf("bits = %ld, bits2 = %ld\n", mpz_sizeinbase(num1, 2), fmpz_bits(fnum1)); gmp_printf("%Zd\n", num1); } #endif fmpz_clear(fnum1); } mpz_clear(num1); mpz_clear(num2); return result; } int test_fmpz_sgn() { mpz_t num1, num2; fmpz_t fnum1; unsigned long bits; int result = 1; mpz_init(num1); mpz_init(num2); for (unsigned long i = 0; (i < 100000) && (result == 1); i++) { bits = random_ulong(1000); fnum1 = fmpz_init(FLINT_MAX((long)(bits-1)/FLINT_BITS,0)+1); mpz_rrandomb(num1, state, bits); #if SIGNS if (random_ulong(2)) mpz_neg(num1, num1); #endif mpz_to_fmpz(fnum1, num1); result = (((long) mpz_sgn(num1) > 0) && ((long) fmpz_sgn(fnum1) > 0)) || (((long) mpz_sgn(num1) < 0) && ((long) fmpz_sgn(fnum1) < 0)) || (((long) mpz_sgn(num1) == 0) && ((long) fmpz_sgn(fnum1) == 0)); #if DEBUG2 if (!result) { printf("sign = %ld, sign2 = %ld\n", mpz_sgn(num1), fmpz_sgn(fnum1)); gmp_printf("%Zd\n", num1); } #endif fmpz_clear(fnum1); } mpz_clear(num1); mpz_clear(num2); return result; } int test_fmpz_set_si() { mpz_t num1, num2; fmpz_t fnum1; unsigned long bits; long x; int result = 1; mpz_init(num1); mpz_init(num2); fnum1 = fmpz_init(0); fmpz_set_si(fnum1, 0); fmpz_check_normalisation(fnum1); mpz_set_si(num1, 0); fmpz_to_mpz(num2, fnum1); result = (mpz_cmp(num1, num2) == 0); fmpz_clear(fnum1); for (unsigned long i = 0; (i < 100000) && (result == 1); i++) { bits = random_ulong(FLINT_BITS-1)+1; x = random_ulong(1L<