#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#define FUNC_isqrt 1
#define FUNC_gcd_ui 1
#define FUNC_is_perfect_square 1
#define FUNC_clz 1
#include "ptypes.h"
#include "factor.h"
#include "sieve.h"
#include "util.h"
#include "mulmod.h"
#include "cache.h"
#include "primality.h"
#include "montmath.h"
static int holf32(uint32_t n, UV *factors, uint32_t rounds);
/*
* You need to remember to use UV for unsigned and IV for signed types that
* are large enough to hold our data.
* If you use int, that's 32-bit on LP64 and LLP64 machines. You lose.
* If you use long, that's 32-bit on LLP64 machines. You lose.
* If you use long long, you may be too large which isn't so bad, but some
* compilers may not understand the type at all.
* perl.h already figured all this out, and provided us with these types which
* match the native integer type used inside our Perl, so just use those.
*/
static const unsigned short primes_small[] =
{0,2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,
101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,
193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,281,283,
293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,389,397,401,
409,419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503,509,
521,523,541,547,557,563,569,571,577,587,593,599,601,607,613,617,619,631,
641,643,647,653,659,661,673,677,683,691,701,709,719,727,733,739,743,751,
757,761,769,773,787,797,809,811,821,823,827,829,839,853,857,859,863,877,
881,883,887,907,911,919,929,937,941,947,953,967,971,977,983,991,997,1009,
1013,1019,1021,1031,1033,1039,1049,1051,1061,1063,1069,1087,1091,1093,
1097,1103,1109,1117,1123,1129,1151,1153,1163,1171,1181,1187,1193,1201,
1213,1217,1223,1229,1231,1237,1249,1259,1277,1279,1283,1289,1291,1297,
1301,1303,1307,1319,1321,1327,1361,1367,1373,1381,1399,1409,1423,1427,
1429,1433,1439,1447,1451,1453,1459,1471,1481,1483,1487,1489,1493,1499,
1511,1523,1531,1543,1549,1553,1559,1567,1571,1579,1583,1597,1601,1607,
1609,1613,1619,1621,1627,1637,1657,1663,1667,1669,1693,1697,1699,1709,
1721,1723,1733,1741,1747,1753,1759,1777,1783,1787,1789,1801,1811,1823,
1831,1847,1861,1867,1871,1873,1877,1879,1889,1901,1907,1913,1931,1933,
1949,1951,1973,1979,1987,1993,1997,1999,2003,2011};
#define NPRIMES_SMALL (sizeof(primes_small)/sizeof(primes_small[0]))
/* The main factoring loop */
/* Puts factors in factors[] and returns the number found. */
int factor(UV n, UV *factors)
{
int nsmallfactors, nfactors = 0; /* Number of factored in factors result */
uint32_t f = 7;
if (n > 1) {
while ( (n & 1) == 0 ) { factors[nfactors++] = 2; n /= 2; }
while ( (n % 3) == 0 ) { factors[nfactors++] = 3; n /= 3; }
while ( (n % 5) == 0 ) { factors[nfactors++] = 5; n /= 5; }
}
if (f*f <= n) {
UV const lastsp = 83;
UV sp = 4;
/* Trial division from 7 to 421. Use 32-bit if possible. */
if (n <= 4294967295U) {
unsigned int un = n;
while (sp < lastsp) {
while ( (un%f) == 0 ) {
factors[nfactors++] = f;
un /= f;
}
f = primes_small[++sp];
if (f*f > un) break;
}
n = un;
} else {
while (sp < lastsp) {
while ( (n%f) == 0 ) {
factors[nfactors++] = f;
n /= f;
}
f = primes_small[++sp];
if (f*f > n) break;
}
}
/* If n is small and still composite, finish it here */
if (n < 2011*2011 && f*f <= n) { /* Trial division from 431 to 2003 */
unsigned int un = n;
while (sp < NPRIMES_SMALL) {
while ( (un%f) == 0 ) {
factors[nfactors++] = f;
un /= f;
}
f = primes_small[++sp];
if (f*f > un) break;
}
n = un;
}
}
if (f*f > n) {
if (n != 1) factors[nfactors++] = n;
return nfactors;
}
#if BITS_PER_WORD == 64
/* For small values less than f^3, use simple factor to split semiprime */
if (n < 100000000 && n < f*f*f) {
if (MR32(n)) factors[nfactors++] = n;
else nfactors += holf32(n, factors+nfactors, 10000);
return nfactors;
}
#endif
nsmallfactors = nfactors;
/* Perfect powers. Factor root only once. */
{
int i, j, k = powerof(n);
if (k > 1) {
UV p = rootof(n, k);
nfactors = factor(p, factors+nsmallfactors);
for (i = nfactors; i >= 0; i--)
for (j = 0; j < k; j++)
factors[nsmallfactors+k*i+j] = factors[nsmallfactors+i];
return nsmallfactors + k*nfactors;
}
}
{
UV tofac_stack[MPU_MAX_FACTORS+1];
int i, j, ntofac = 0;
int const verbose = _XS_get_verbose();
/* loop over each remaining factor, until ntofac == 0 */
do {
while ( (n >= f*f) && (!is_def_prime(n)) ) {
int split_success = 0;
/* Adjust the number of rounds based on the number size and speed */
UV const nbits = BITS_PER_WORD - clz(n);
#if USE_MONTMATH
UV const br_rounds = 8000 + (9000 * ((nbits <= 45) ? 0 : (nbits-45)));
UV const sq_rounds = 200000;
#elif MULMODS_ARE_FAST
UV const br_rounds = 500 + ( 200 * ((nbits <= 45) ? 0 : (nbits-45)));
UV const sq_rounds = 100000;
#else
UV const br_rounds = (nbits >= 63) ? 120000 : (nbits >= 58) ? 500 : 0;
UV const sq_rounds = 200000;
#endif
#if BITS_PER_WORD == 64
/* For small semiprimes the fastest solution is HOLF under 32, then
* Lehman (no trial) under 38. However on random inputs, HOLF is
* best only under 28-30 bits, and adding Lehman is always slower. */
if (!split_success && nbits <= 30) { /* This should always succeed */
split_success = holf32(n, tofac_stack+ntofac, 1000000)-1;
if (verbose) printf("holf %d\n", split_success);
}
#endif
/* Almost all inputs are factored here */
if (!split_success && br_rounds > 0) {
split_success = pbrent_factor(n, tofac_stack+ntofac, br_rounds, 1)-1;
if (verbose) printf("pbrent %d\n", split_success);
}
#if USE_MONTMATH
if (!split_success) {
split_success = pbrent_factor(n, tofac_stack+ntofac, 2*br_rounds, 3)-1;
if (verbose) printf("second pbrent %d\n", split_success);
}
#endif
/* Random 64-bit inputs at this point:
* About 3.1% are small enough that we did with HOLF.
* montmath: 96.89% pbrent, 0.01% pbrent2
* fast: 73.43% pbrent, 21.97% squfof, 1.09% p-1, 0.49% prho, long
* slow: 75.34% squfof, 19.47% pbrent, 0.20% p-1, 0.06% prho
*/
/* SQUFOF with these parameters gets 99.9% of everything left */
if (!split_success && nbits <= 62) {
split_success = squfof_factor(n,tofac_stack+ntofac, sq_rounds)-1;
if (verbose) printf("squfof %d\n", split_success);
}
/* At this point we should only have 16+ digit semiprimes. */
if (!split_success) {
split_success = pminus1_factor(n, tofac_stack+ntofac, 8000, 120000)-1;
if (verbose) printf("pminus1 %d\n", split_success);
/* Get the stragglers */
if (!split_success) {
split_success = prho_factor(n, tofac_stack+ntofac, 120000)-1;
if (verbose) printf("long prho %d\n", split_success);
if (!split_success) {
split_success = pbrent_factor(n, tofac_stack+ntofac, 500000, 5)-1;
if (verbose) printf("long pbrent %d\n", split_success);
}
}
}
if (split_success) {
MPUassert( split_success == 1, "split factor returned more than 2 factors");
ntofac++; /* Leave one on the to-be-factored stack */
if ((tofac_stack[ntofac] == n) || (tofac_stack[ntofac] == 1))
croak("bad factor\n");
n = tofac_stack[ntofac]; /* Set n to the other one */
} else {
/* Nothing should ever get here, but we're paranoid. */
UV m = f % 30;
UV limit = isqrt(n);
if (verbose) printf("doing trial on %"UVuf"\n", n);
while (f <= limit) {
if ( (n%f) == 0 ) {
do {
n /= f;
factors[nfactors++] = f;
} while ( (n%f) == 0 );
limit = isqrt(n);
}
f += wheeladvance30[m];
m = nextwheel30[m];
}
break; /* We just factored n via trial division. Exit loop. */
}
}
/* n is now prime (or 1), so add to already-factored stack */
if (n != 1) factors[nfactors++] = n;
/* Pop the next number off the to-factor stack */
if (ntofac > 0) n = tofac_stack[ntofac-1];
} while (ntofac-- > 0);
/* Sort the non-small factors */
for (i = nsmallfactors+1; i < nfactors; i++) {
UV fi = factors[i];
for (j = i; j > 0 && factors[j-1] > fi; j--)
factors[j] = factors[j-1];
factors[j] = fi;
}
}
return nfactors;
}
int factor_exp(UV n, UV *factors, UV* exponents)
{
int i = 1, j = 1, nfactors;
if (n == 1) return 0;
nfactors = factor(n, factors);
if (exponents == 0) {
for (; i < nfactors; i++)
if (factors[i] != factors[i-1])
factors[j++] = factors[i];
} else {
exponents[0] = 1;
for (; i < nfactors; i++) {
if (factors[i] != factors[i-1]) {
exponents[j] = 1;
factors[j++] = factors[i];
} else {
exponents[j-1]++;
}
}
}
return j;
}
int trial_factor(UV n, UV *factors, UV f, UV last)
{
int sp, nfactors = 0;
if (f < 2) f = 2;
if (last == 0 || last*last > n) last = UV_MAX;
if (n < 4 || last < f) {
factors[0] = n;
return (n == 1) ? 0 : 1;
}
/* possibly do uint32_t specific code here */
if (f < primes_small[NPRIMES_SMALL-1]) {
while ( (n & 1) == 0 ) { factors[nfactors++] = 2; n >>= 1; }
if (3<=last) while ( (n % 3) == 0 ) { factors[nfactors++] = 3; n /= 3; }
if (5<=last) while ( (n % 5) == 0 ) { factors[nfactors++] = 5; n /= 5; }
for (sp = 4; sp < (int)NPRIMES_SMALL; sp++) {
f = primes_small[sp];
if (f*f > n || f > last) break;
while ( (n%f) == 0 ) {
factors[nfactors++] = f;
n /= f;
}
}
}
/* Trial division using a mod-30 wheel for larger values */
if (f*f <= n && f <= last) {
UV m, newlimit, limit = isqrt(n);
if (limit > last) limit = last;
m = f % 30;
while (f <= limit) {
if ( (n%f) == 0 ) {
do {
factors[nfactors++] = f;
n /= f;
} while ( (n%f) == 0 );
newlimit = isqrt(n);
if (newlimit < limit) limit = newlimit;
}
f += wheeladvance30[m];
m = nextwheel30[m];
}
}
/* All done! */
if (n != 1)
factors[nfactors++] = n;
return nfactors;
}
static void _divisors_from_factors(UV nfactors, UV* fp, UV* fe, UV* res) {
UV s, count = 1;
res[0] = 1;
for (s = 0; s < nfactors; s++) {
UV i, j, scount = count, p = fp[s], e = fe[s], mult = 1;
for (j = 0; j < e; j++) {
mult *= p;
for (i = 0; i < scount; i++)
res[count++] = res[i] * mult;
}
}
}
static int numcmp(const void *a, const void *b)
{ const UV *x = a, *y = b; return (*x > *y) ? 1 : (*x < *y) ? -1 : 0; }
UV* _divisor_list(UV n, UV *num_divisors)
{
UV factors[MPU_MAX_FACTORS+1];
UV exponents[MPU_MAX_FACTORS+1];
UV* divs;
int i, nfactors, ndivisors;
if (n <= 1) {
New(0, divs, 2, UV);
if (n == 0) { divs[0] = 0; divs[1] = 1; *num_divisors = 2; }
if (n == 1) { divs[0] = 1; *num_divisors = 1; }
return divs;
}
/* Factor and convert to factor/exponent pair */
nfactors = factor_exp(n, factors, exponents);
/* Calculate number of divisors, allocate space, fill with divisors */
ndivisors = exponents[0] + 1;
for (i = 1; i < nfactors; i++)
ndivisors *= (exponents[i] + 1);
New(0, divs, ndivisors, UV);
_divisors_from_factors(nfactors, factors, exponents, divs);
/* Sort divisors (numeric ascending) */
qsort(divs, ndivisors, sizeof(UV), numcmp);
/* Return number of divisors and list */
*num_divisors = ndivisors;
return divs;
}
/* The usual method, on OEIS for instance, is:
* (p^(k*(e+1))-1) / (p^k-1)
* but that overflows quicky. Instead we rearrange as:
* 1 + p^k + p^k^2 + ... p^k^e
* Return 0 if the result overflowed.
*/
static const UV sigma_overflow[11] =
#if BITS_PER_WORD == 64
{UVCONST(3000000000000000000),UVCONST(3000000000),2487240,64260,7026,
1622, 566, 256, 139, 85, 57};
#else
{UVCONST(845404560), 52560, 1548, 252, 84, 41, 24, 16, 12, 10, 8};
#endif
UV divisor_sum(UV n, UV k)
{
UV factors[MPU_MAX_FACTORS+1];
int nfac, i, j;
UV product = 1;
if (k > 11 || (k > 0 && n >= sigma_overflow[k-1])) return 0;
if (n <= 1) /* n=0 divisors are [0,1] */
return (n == 1) ? 1 : (k == 0) ? 2 : 1; /* n=1 divisors are [1] */
nfac = factor(n,factors);
if (k == 0) {
for (i = 0; i < nfac; i++) {
UV e = 1, f = factors[i];
while (i+1 < nfac && f == factors[i+1]) { e++; i++; }
product *= (e+1);
}
} else if (k == 1) {
for (i = 0; i < nfac; i++) {
UV f = factors[i];
UV pke = f, fmult = 1 + f;
while (i+1 < nfac && f == factors[i+1]) {
pke *= f;
fmult += pke;
i++;
}
product *= fmult;
}
} else {
for (i = 0; i < nfac; i++) {
UV f = factors[i];
UV fmult, pke, pk = f;
for (j = 1; j < (int)k; j++) pk *= f;
fmult = 1 + pk;
pke = pk;
while (i+1 < nfac && f == factors[i+1]) {
pke *= pk;
fmult += pke;
i++;
}
product *= fmult;
}
}
return product;
}
static int found_factor(UV n, UV f, UV* factors)
{
UV f2 = n/f;
int i = f > f2;
if (f == 1 || f2 == 1) {
factors[0] = n;
return 1;
}
factors[i] = f;
factors[1-i] = f2;
MPUassert( factors[0] * factors[1] == n , "incorrect factoring");
return 2;
}
/* Knuth volume 2, algorithm C.
* Can't compete with HOLF, SQUFOF, pbrent, etc.
*/
int fermat_factor(UV n, UV *factors, UV rounds)
{
IV sqn, x, y, r;
MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in fermat_factor");
sqn = isqrt(n);
x = 2 * sqn + 1;
y = 1;
r = (sqn*sqn) - n;
while (r != 0) {
if (rounds-- == 0) { factors[0] = n; return 1; }
r += x;
x += 2;
do {
r -= y;
y += 2;
} while (r > 0);
}
r = (x-y)/2;
return found_factor(n, r, factors);
}
/* Hart's One Line Factorization. */
int holf_factor(UV n, UV *factors, UV rounds)
{
UV i, s, m, f;
MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in holf_factor");
/* We skip the perfect-square test for s in the loop, so we
* will never succeed if n is a perfect square. Test that now. */
if (is_perfect_square(n))
return found_factor(n, isqrt(n), factors);
if (n <= (UV_MAX >> 6)) { /* Try with premultiplier first */
UV npre = n * ( (n <= (UV_MAX >> 13)) ? 720 :
(n <= (UV_MAX >> 11)) ? 480 :
(n <= (UV_MAX >> 10)) ? 360 :
(n <= (UV_MAX >> 8)) ? 60 : 30 );
UV ni = npre;
#if 0 /* Straightforward */
while (rounds--) {
s = isqrt(ni) + 1;
m = (s*s) - ni;
if (is_perfect_square(m)) {
f = gcd_ui(n, s - isqrt(m));
if (f > 1 && f < n)
return found_factor(n, f, factors);
}
if (ni >= (ni+npre)) break;
ni += npre;
}
#else /* More optimized */
while (rounds--) {
s = 1 + (UV)sqrt((double)ni);
m = (s*s) - ni;
f = m & 127;
if (!((f*0x8bc40d7d) & (f*0xa1e2f5d1) & 0x14020a)) {
f = (UV)sqrt((double)m);
if (m == f*f) {
f = gcd_ui(n, s - f);
if (f > 1 && f < n)
return found_factor(n, f, factors);
}
}
if (ni >= (ni+npre)) break;
ni += npre;
}
#endif
}
for (i = 1; i <= rounds; i++) {
s = (UV) sqrt( (double)n * (double)i );
/* Assume s^2 isn't a perfect square. We're rapidly losing precision
* so we won't be able to accurately detect it anyway. */
s++; /* s = ceil(sqrt(n*i)) */
m = sqrmod(s, n);
if (is_perfect_square(m)) {
f = isqrt(m);
f = gcd_ui( (s>f) ? s-f : f-s, n);
/* This should always succeed, but with overflow concerns.... */
return found_factor(n, f, factors);
}
}
factors[0] = n;
return 1;
}
static int holf32(uint32_t n, UV *factors, uint32_t rounds) {
UV npre, ni; /* These should be 64-bit */
uint32_t s, m, f;
if (n < 3) { factors[0] = n; return 1; }
if (!(n&1)) { factors[0] = 2; factors[1] = n/2; return 2; }
if (is_perfect_square(n)) { factors[0] = factors[1] = isqrt(n); return 2; }
ni = npre = (UV) n * ((BITS_PER_WORD == 64) ? 5040 : 1);
while (rounds--) {
s = 1 + (uint32_t)sqrt((double)ni);
m = ((UV)s*(UV)s) - ni;
f = m & 127;
if (!((f*0x8bc40d7d) & (f*0xa1e2f5d1) & 0x14020a)) {
f = (uint32_t)sqrt((double)m);
if (m == f*f) {
f = gcd_ui(n, s - f);
if (f > 1 && f < n)
return found_factor(n, f, factors);
}
}
if (ni >= (ni+npre)) break; /* We've overflowed */
ni += npre;
}
factors[0] = n;
return 1;
}
#define ABSDIFF(x,y) (x>y) ? x-y : y-x
#if USE_MONTMATH
/* Pollard Rho with Brent's updates, using Montgomery reduction. */
int pbrent_factor(UV n, UV *factors, UV rounds, UV a)
{
UV const nbits = BITS_PER_WORD - clz(n);
const UV inner = (nbits <= 31) ? 32 : (nbits <= 35) ? 64 : (nbits <= 40) ? 160 : (nbits <= 52) ? 256 : 320;
UV f, m, r, rleft, Xi, Xm, Xs;
int irounds, fails = 6;
const uint64_t npi = mont_inverse(n), mont1 = mont_get1(n);
MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in pbrent_factor");
r = f = 1;
Xi = Xm = Xs = mont1;
a = mont_geta(a,n);
while (rounds > 0) {
rleft = (r > rounds) ? rounds : r;
Xm = Xi;
/* Do rleft rounds, inner at a time */
while (rleft > 0) {
irounds = (rleft > (UV)inner) ? inner : rleft;
rleft -= irounds;
rounds -= irounds;
Xs = Xi;
if (n < (1ULL << 63)) {
Xi = mont_mulmod63(Xi,Xi+a,n);
m = ABSDIFF(Xi,Xm);
while (--irounds > 0) {
Xi = mont_mulmod63(Xi,Xi+a,n);
f = ABSDIFF(Xi,Xm);
m = mont_mulmod63(m, f, n);
}
} else if (a == mont1) {
Xi = mont_mulmod64(Xi,Xi+a,n);
m = ABSDIFF(Xi,Xm);
while (--irounds > 0) {
Xi = mont_mulmod64(Xi,Xi+a,n);
f = ABSDIFF(Xi,Xm);
m = mont_mulmod64(m, f, n);
}
} else {
Xi = addmod(mont_mulmod64(Xi,Xi,n), a, n);
m = ABSDIFF(Xi,Xm);
while (--irounds > 0) {
Xi = addmod(mont_mulmod64(Xi,Xi,n), a, n);
f = ABSDIFF(Xi,Xm);
m = mont_mulmod64(m, f, n);
}
}
f = gcd_ui(m, n);
if (f != 1)
break;
}
/* If f == 1, then we didn't find a factor. Move on. */
if (f == 1) {
r *= 2;
continue;
}
if (f == n) { /* back up, with safety */
Xi = Xs;
do {
if (n < (1ULL << 63) || a == mont1)
Xi = mont_mulmod(Xi,Xi+a,n);
else
Xi = addmod(mont_mulmod(Xi,Xi,n),a,n);
m = ABSDIFF(Xi,Xm);
f = gcd_ui(m, n);
} while (f == 1 && r-- != 0);
}
if (f == 0 || f == n) {
if (fails-- <= 0) break;
Xi = Xm = mont1;
a = addmod(a, mont_geta(11,n), n);
continue;
}
return found_factor(n, f, factors);
}
factors[0] = n;
return 1;
}
#else
/* Pollard Rho with Brent's updates. */
int pbrent_factor(UV n, UV *factors, UV rounds, UV a)
{
UV f, m, r, Xi, Xm;
const UV inner = (n <= 4000000000UL) ? 32 : 160;
int fails = 6;
MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in pbrent_factor");
r = f = Xi = Xm = 1;
while (rounds > 0) {
UV rleft = (r > rounds) ? rounds : r;
UV saveXi = Xi;
/* Do rleft rounds, inner at a time */
while (rleft > 0) {
UV dorounds = (rleft > inner) ? inner : rleft;
saveXi = Xi;
rleft -= dorounds;
rounds -= dorounds;
Xi = sqraddmod(Xi, a, n); /* First iteration, no mulmod needed */
m = ABSDIFF(Xi,Xm);
while (--dorounds > 0) { /* Now do inner-1=63 more iterations */
Xi = sqraddmod(Xi, a, n);
f = ABSDIFF(Xi,Xm);
m = mulmod(m, f, n);
}
f = gcd_ui(m, n);
if (f != 1)
break;
}
/* If f == 1, then we didn't find a factor. Move on. */
if (f == 1) {
r *= 2;
Xm = Xi;
continue;
}
if (f == n) { /* back up, with safety */
Xi = saveXi;
do {
Xi = sqraddmod(Xi, a, n);
f = gcd_ui( ABSDIFF(Xi,Xm), n);
} while (f == 1 && r-- != 0);
}
if (f == 0 || f == n) {
if (fails-- <= 0) break;
Xm = addmod(Xm, 11, n);
Xi = Xm;
a++;
continue;
}
return found_factor(n, f, factors);
}
factors[0] = n;
return 1;
}
#endif
/* Pollard's Rho. */
int prho_factor(UV n, UV *factors, UV rounds)
{
UV a, f, i, m, oldU, oldV;
const UV inner = 64;
UV U = 7;
UV V = 7;
int fails = 3;
MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in prho_factor");
/* We could just as well say a = 1 */
switch (n%8) {
case 1: a = 1; break;
case 3: a = 2; break;
case 5: a = 3; break;
case 7: a = 5; break;
default: a = 7; break;
}
rounds = (rounds + inner - 1) / inner;
while (rounds-- > 0) {
m = 1; oldU = U; oldV = V;
for (i = 0; i < inner; i++) {
U = sqraddmod(U, a, n);
V = sqraddmod(V, a, n);
V = sqraddmod(V, a, n);
f = (U > V) ? U-V : V-U;
m = mulmod(m, f, n);
}
f = gcd_ui(m, n);
if (f == 1)
continue;
if (f == n) { /* back up to find a factor*/
U = oldU; V = oldV;
i = inner;
do {
U = sqraddmod(U, a, n);
V = sqraddmod(V, a, n);
V = sqraddmod(V, a, n);
f = gcd_ui( (U > V) ? U-V : V-U, n);
} while (f == 1 && i-- != 0);
}
if (f == 0 || f == n) {
if (fails-- <= 0) break;
U = addmod(U,2,n);
V = U;
a++;
continue;
}
return found_factor(n, f, factors);
}
factors[0] = n;
return 1;
}
/* Pollard's P-1 */
int pminus1_factor(UV n, UV *factors, UV B1, UV B2)
{
UV f, k, kmin;
UV a = 2, q = 2;
UV savea = 2, saveq = 2;
UV j = 1;
UV sqrtB1 = isqrt(B1);
#if USE_MONTMATH
const uint64_t npi = mont_inverse(n), mont1 = mont_get1(n);
UV ma = mont_geta(a,n);
#define PMINUS1_APPLY_POWER ma = mont_powmod(ma, k, n)
#define PMINUS1_RECOVER_A a = mont_recover(ma,n)
#else
#define PMINUS1_APPLY_POWER a = powmod(a, k, n)
#define PMINUS1_RECOVER_A
#endif
MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in pminus1_factor");
if (B1 <= primes_small[NPRIMES_SMALL-2]) {
UV i;
for (i = 1; primes_small[i] <= B1; i++) {
q = k = primes_small[i];
if (q <= sqrtB1) {
k = q*q; kmin = B1/q;
while (k <= kmin) k *= q;
}
PMINUS1_APPLY_POWER;
if ( (j++ % 32) == 0) {
PMINUS1_RECOVER_A;
if (a == 0 || gcd_ui(a-1, n) != 1)
break;
savea = a; saveq = q;
}
}
PMINUS1_RECOVER_A;
} else {
START_DO_FOR_EACH_PRIME(2, B1) {
q = k = p;
if (q <= sqrtB1) {
k = q*q; kmin = B1/q;
while (k <= kmin) k *= q;
}
PMINUS1_APPLY_POWER;
if ( (j++ % 32) == 0) {
PMINUS1_RECOVER_A;
if (a == 0 || gcd_ui(a-1, n) != 1)
break;
savea = a; saveq = q;
}
} END_DO_FOR_EACH_PRIME
PMINUS1_RECOVER_A;
}
if (a == 0) { factors[0] = n; return 1; }
f = gcd_ui(a-1, n);
/* If we found more than one factor in stage 1, backup and single step */
if (f == n) {
a = savea;
START_DO_FOR_EACH_PRIME(saveq, B1) {
k = p; kmin = B1/p;
while (k <= kmin) k *= p;
a = powmod(a, k, n);
f = gcd_ui(a-1, n);
q = p;
if (f != 1)
break;
} END_DO_FOR_EACH_PRIME
/* If f == n again, we could do:
* for (savea = 3; f == n && savea < 100; savea = next_prime(savea)) {
* a = savea;
* for (q = 2; q <= B1; q = next_prime(q)) {
* ...
* }
* }
* but this could be a huge time sink if B1 is large, so just fail.
*/
}
/* STAGE 2 */
if (f == 1 && B2 > B1) {
UV bm = a;
UV b = 1;
UV bmdiff;
UV precomp_bm[111] = {0}; /* Enough for B2 = 189M */
/* calculate (a^q)^2, (a^q)^4, etc. */
bmdiff = sqrmod(bm, n);
precomp_bm[0] = bmdiff;
for (j = 1; j < 20; j++) {
bmdiff = mulmod(bmdiff,bm,n);
bmdiff = mulmod(bmdiff,bm,n);
precomp_bm[j] = bmdiff;
}
a = powmod(a, q, n);
j = 1;
START_DO_FOR_EACH_PRIME( q+1, B2 ) {
UV lastq = q;
UV qdiff;
q = p;
/* compute a^q = a^lastq * a^(q-lastq) */
qdiff = (q - lastq) / 2 - 1;
if (qdiff >= 111) {
bmdiff = powmod(bm, q-lastq, n); /* Big gap */
} else {
bmdiff = precomp_bm[qdiff];
if (bmdiff == 0) {
if (precomp_bm[qdiff-1] != 0)
bmdiff = mulmod(mulmod(precomp_bm[qdiff-1],bm,n),bm,n);
else
bmdiff = powmod(bm, q-lastq, n);
precomp_bm[qdiff] = bmdiff;
}
}
a = mulmod(a, bmdiff, n);
if (a == 0) break;
b = mulmod(b, a-1, n); /* if b == 0, we found multiple factors */
if ( (j++ % 64) == 0 ) {
f = gcd_ui(b, n);
if (f != 1)
break;
}
} END_DO_FOR_EACH_PRIME
f = gcd_ui(b, n);
}
return found_factor(n, f, factors);
}
/* Simple Williams p+1 */
static void pp1_pow(UV *cX, UV exp, UV n)
{
UV X0 = *cX;
UV X = *cX;
UV Y = mulsubmod(X, X, 2, n);
UV bit = UVCONST(1) << (clz(exp)-1);
while (bit) {
UV T = mulsubmod(X, Y, X0, n);
if ( exp & bit ) {
X = T;
Y = mulsubmod(Y, Y, 2, n);
} else {
Y = T;
X = mulsubmod(X, X, 2, n);
}
bit >>= 1;
}
*cX = X;
}
int pplus1_factor(UV n, UV *factors, UV B1)
{
UV X1, X2, f;
UV sqrtB1 = isqrt(B1);
MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in pplus1_factor");
X1 = 7 % n;
X2 = 11 % n;
f = 1;
START_DO_FOR_EACH_PRIME(2, B1) {
UV k = p;
if (p < sqrtB1) {
UV kmin = B1/p;
while (k <= kmin)
k *= p;
}
pp1_pow(&X1, k, n);
if (X1 != 2) {
f = gcd_ui( submod(X1, 2, n) , n);
if (f != 1 && f != n) break;
}
pp1_pow(&X2, k, n);
if (X2 != 2) {
f = gcd_ui( submod(X2, 2, n) , n);
if (f != 1 && f != n) break;
}
} END_DO_FOR_EACH_PRIME
return found_factor(n, f, factors);
}
/* SQUFOF, based on Ben Buhrow's racing version. */
#if 1
/* limit to 62-bit inputs, use 32-bit types, faster */
#define SQUFOF_TYPE uint32_t
#define SQUFOF_MAX (UV_MAX >> 2)
#else
/* All 64-bit inputs possible, though we severely limit multipliers */
#define SQUFOF_TYPE UV
#define SQUFOF_MAX UV_MAX
#endif
typedef struct
{
int valid;
SQUFOF_TYPE P;
SQUFOF_TYPE bn;
SQUFOF_TYPE Qn;
SQUFOF_TYPE Q0;
SQUFOF_TYPE b0;
SQUFOF_TYPE it;
SQUFOF_TYPE imax;
SQUFOF_TYPE mult;
} mult_t;
/* N < 2^63 (or 2^31). Returns 0 or a factor */
static UV squfof_unit(UV n, mult_t* mult_save)
{
SQUFOF_TYPE imax,i,Q0,Qn,bn,b0,P,bbn,Ro,S,So,t1,t2;
P = mult_save->P;
bn = mult_save->bn;
Qn = mult_save->Qn;
Q0 = mult_save->Q0;
b0 = mult_save->b0;
i = mult_save->it;
imax = i + mult_save->imax;
#define SQUARE_SEARCH_ITERATION \
t1 = P; \
P = bn*Qn - P; \
t2 = Qn; \
Qn = Q0 + bn*(t1-P); \
Q0 = t2; \
bn = (b0 + P) / Qn; \
i++;
while (1) {
int j = 0;
if (i & 0x1) {
SQUARE_SEARCH_ITERATION;
}
/* i is now even */
while (1) {
/* We need to know P, bn, Qn, Q0, iteration count, i from prev */
if (i >= imax) {
/* save state and try another multiplier. */
mult_save->P = P;
mult_save->bn = bn;
mult_save->Qn = Qn;
mult_save->Q0 = Q0;
mult_save->it = i;
return 0;
}
SQUARE_SEARCH_ITERATION;
/* Even iteration. Check for square: Qn = S*S */
t2 = Qn & 127;
if (!((t2*0x8bc40d7d) & (t2*0xa1e2f5d1) & 0x14020a)) {
t1 = (uint32_t) sqrt(Qn);
if (Qn == t1*t1)
break;
}
/* Odd iteration. */
SQUARE_SEARCH_ITERATION;
}
S = t1; /* isqrt(Qn); */
mult_save->it = i;
/* Reduce to G0 */
Ro = P + S*((b0 - P)/S);
So = (n - (UV)Ro*(UV)Ro)/(UV)S;
bbn = (b0+Ro)/So;
/* Search for symmetry point */
#define SYMMETRY_POINT_ITERATION \
t1 = Ro; \
Ro = bbn*So - Ro; \
t2 = So; \
So = S + bbn*(t1-Ro); \
S = t2; \
bbn = (b0+Ro)/So; \
if (Ro == t1) break;
j = 0;
while (1) {
SYMMETRY_POINT_ITERATION;
SYMMETRY_POINT_ITERATION;
SYMMETRY_POINT_ITERATION;
SYMMETRY_POINT_ITERATION;
if (j++ > 2000000) {
mult_save->valid = 0;
return 0;
}
}
t1 = gcd_ui(Ro, n);
if (t1 > 1)
return t1;
}
}
/* Gower and Wagstaff 2008:
* http://www.ams.org/journals/mcom/2008-77-261/S0025-5718-07-02010-8/
* Section 5.3. I've added some with 13,17,19. Sorted by F(). */
static const UV squfof_multipliers[] =
/* { 3*5*7*11, 3*5*7, 3*5*11, 3*5, 3*7*11, 3*7, 5*7*11, 5*7,
3*11, 3, 5*11, 5, 7*11, 7, 11, 1 }; */
{ 3*5*7*11, 3*5*7, 3*5*7*11*13, 3*5*7*13, 3*5*7*11*17, 3*5*11,
3*5*7*17, 3*5, 3*5*7*11*19, 3*5*11*13,3*5*7*19, 3*5*7*13*17,
3*5*13, 3*7*11, 3*7, 5*7*11, 3*7*13, 5*7,
3*5*17, 5*7*13, 3*5*19, 3*11, 3*7*17, 3,
3*11*13, 5*11, 3*7*19, 3*13, 5, 5*11*13,
5*7*19, 5*13, 7*11, 7, 3*17, 7*13,
11, 1 };
#define NSQUFOF_MULT (sizeof(squfof_multipliers)/sizeof(squfof_multipliers[0]))
int squfof_factor(UV n, UV *factors, UV rounds)
{
mult_t mult_save[NSQUFOF_MULT];
UV i, nn64, sqrtnn64, mult, f64,rounds_done = 0;
int mults_racing = NSQUFOF_MULT;
/* Caller should have handled these trivial cases */
MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in squfof_factor");
/* Too big */
if (n > SQUFOF_MAX) {
factors[0] = n; return 1;
}
for (i = 0; i < NSQUFOF_MULT; i++) {
mult_save[i].valid = -1;
mult_save[i].it = 0;
}
/* Race each multiplier for a bit (20-20k rounds) */
while (mults_racing > 0 && rounds_done < rounds) {
for (i = 0; i < NSQUFOF_MULT && rounds_done < rounds; i++) {
if (mult_save[i].valid == 0) continue;
mult = squfof_multipliers[i];
nn64 = n * mult;
if (mult_save[i].valid == -1) {
if ((SQUFOF_MAX / mult) < n) {
mult_save[i].valid = 0; /* This multiplier would overflow 64-bit */
mults_racing--;
continue;
}
sqrtnn64 = isqrt(nn64);
mult_save[i].valid = 1;
mult_save[i].Q0 = 1;
mult_save[i].b0 = sqrtnn64;
mult_save[i].P = sqrtnn64;
mult_save[i].Qn = (SQUFOF_TYPE)(nn64 - sqrtnn64 * sqrtnn64);
if (mult_save[i].Qn == 0)
return found_factor(n, sqrtnn64, factors);
mult_save[i].bn = (2 * sqrtnn64) / (UV)mult_save[i].Qn;
mult_save[i].it = 0;
mult_save[i].mult = mult;
mult_save[i].imax = (UV) (sqrt(sqrtnn64) / 16);
if (mult_save[i].imax < 20) mult_save[i].imax = 20;
if (mult_save[i].imax > rounds) mult_save[i].imax = rounds;
}
if (mults_racing == 1) /* Do all rounds if only one multiplier left */
mult_save[i].imax = (rounds - rounds_done);
f64 = squfof_unit(nn64, &mult_save[i]);
if (f64 > 1) {
UV f64red = f64 / gcd_ui(f64, mult);
if (f64red > 1) {
/* unsigned long totiter = 0;
{int K; for (K = 0; K < NSQUFOF_MULT; K++) totiter += mult_save[K].it; }
printf(" n %lu mult %lu it %lu (%lu)\n",n,mult,totiter,(UV)mult_save[i].it); */
return found_factor(n, f64red, factors);
}
/* Found trivial factor. Quit working with this multiplier. */
mult_save[i].valid = 0;
}
if (mult_save[i].valid == 0)
mults_racing--;
rounds_done += mult_save[i].imax; /* Assume we did all rounds */
}
}
/* No factors found */
factors[0] = n;
return 1;
}
#define SQR_TAB_SIZE 512
static int sqr_tab_init = 0;
static double sqr_tab[SQR_TAB_SIZE];
static void make_sqr_tab(void) {
int i;
for (i = 0; i < SQR_TAB_SIZE; i++)
sqr_tab[i] = sqrt((double)i);
sqr_tab_init = 1;
}
/* Lehman written and tuned by Warren D. Smith.
* Revised by Ben Buhrow and Dana Jacobsen. */
int lehman_factor(UV n, UV *factors, int do_trial) {
const double Tune = ((n >> 31) >> 5) ? 3.5 : 5.0;
double x, sqrtn;
UV a,c,kN,kN4,B2;
uint32_t b,p,k,r,B,U,Bred,inc,ip;
if (!(n&1)) return found_factor(n, 2, factors);
B = Tune * (1+rootof(n,3));
if (do_trial) {
uint32_t FirstCut = 0.1 * B;
if (FirstCut < 84) FirstCut = 84;
if (FirstCut > 65535) FirstCut = 65535;
for (ip = 2; ip < NPRIMES_SMALL; ip++) {
p = primes_small[ip];
if (p >= FirstCut)
break;
if (n % p == 0)
return found_factor(n, p, factors);
}
}
if (n >= UVCONST(8796393022207)) { factors[0] = n; return 1; }
Bred = B / (Tune * Tune * Tune);
B2 = B*B;
kN = 0;
if (!sqr_tab_init) make_sqr_tab();
sqrtn = sqrt(n);
for (k = 1; k <= Bred; k++) {
if (k&1) { inc = 4; r = (k+n) % 4; }
else { inc = 2; r = 1; }
kN += n;
if (kN >= UVCONST(1152921504606846976)) { factors[0] = n; return 1; }
kN4 = kN*4;
x = (k < SQR_TAB_SIZE) ? sqrtn * sqr_tab[k] : sqrt((double)kN);
a = x;
if ((UV)a * (UV)a == kN)
return found_factor(n, gcd_ui(a,n), factors);
x *= 2;
a = x + 0.9999999665; /* Magic constant */
b = a % inc;
b = a + (inc+r-b) % inc;
c = (UV)b*(UV)b - kN4;
U = x + B2/(2*x);
for (a = b; a <= U; c += inc*(a+a+inc), a += inc) {
/* Check for perfect square */
b = c & 127;
if (!((b*0x8bc40d7d) & (b*0xa1e2f5d1) & 0x14020a)) {
b = (uint32_t) sqrt(c);
if (c == b*b) {
B2 = gcd_ui(a+b, n);
return found_factor(n, B2, factors);
}
}
}
}
if (do_trial) {
if (B > 65535) B = 65535;
/* trial divide from primes[ip] to B. We could:
* 1) use table of 6542 shorts for the primes.
* 2) use a wheel
* 3) let trial_factor handle it
*/
if (ip >= NPRIMES_SMALL) ip = NPRIMES_SMALL-1;
return trial_factor(n, factors, primes_small[ip], B);
}
factors[0] = n;
return 1;
}
static UV dlp_trial(UV a, UV g, UV p, UV maxrounds) {
UV k, t;
if (maxrounds > p) maxrounds = p;
#if USE_MONTMATH
if (p&1) {
const uint64_t npi = mont_inverse(p), mont1 = mont_get1(p);
g = mont_geta(g, p);
a = mont_geta(a, p);
for (t = g, k = 1; k < maxrounds; k++) {
if (t == a)
return k;
t = mont_mulmod(t, g, p);
if (t == g) break; /* Stop at cycle */
}
} else
#endif
{
for (t = g, k = 1; k < maxrounds; k++) {
if (t == a)
return k;
t = mulmod(t, g, p);
if (t == g) break; /* Stop at cycle */
}
}
return 0;
}
/******************************************************************************/
/* DLP - Pollard Rho */
/******************************************************************************/
/* Compare with Pomerance paper (dartmouth dtalk4):
* Type I/II/III = our case 1, 0, 2.
* x_i = u, a_i = v, b_i = w
*
* Also see Bai/Brent 2008 for many ideas to speed this up.
* https://maths-people.anu.edu.au/~brent/pd/rpb231.pdf
* E.g. Teske adding-walk, Brent's cycle algo, Teske modified cycle
*/
#define pollard_rho_cycle(u,v,w,p,n,a,g) \
switch (u % 3) { \
case 0: u = mulmod(u,u,p); v = mulmod(v,2,n); w = mulmod(w,2,n); break;\
case 1: u = mulmod(u,a,p); v = addmod(v,1,n); break;\
case 2: u = mulmod(u,g,p); w = addmod(w,1,n); break;\
}
typedef struct prho_state_t {
UV u;
UV v;
UV w;
UV U;
UV V;
UV W;
UV round;
int failed;
int verbose;
} prho_state_t;
static UV dlp_prho_uvw(UV a, UV g, UV p, UV n, UV rounds, prho_state_t *s) {
UV i, k = 0;
UV u=s->u, v=s->v, w=s->w;
UV U=s->U, V=s->V, W=s->W;
int const verbose = s->verbose;
if (s->failed) return 0;
if (s->round + rounds > n) rounds = n - s->round;
for (i = 1; i <= rounds; i++) {
pollard_rho_cycle(u,v,w,p,n,a,g); /* xi, ai, bi */
pollard_rho_cycle(U,V,W,p,n,a,g);
pollard_rho_cycle(U,V,W,p,n,a,g); /* x2i, a2i, b2i */
if (verbose > 3) printf( "%3"UVuf" %4"UVuf" %3"UVuf" %3"UVuf" %4"UVuf" %3"UVuf" %3"UVuf"\n", i, u, v, w, U, V, W );
if (u == U) {
UV r1, r2, G, G2;
r1 = submod(v, V, n);
if (r1 == 0) {
if (verbose) printf("DLP Rho failure, r=0\n");
s->failed = 1;
k = 0;
break;
}
r2 = submod(W, w, n);
G = gcd_ui(r1,n);
G2 = gcd_ui(G,r2);
k = divmod(r2/G2, r1/G2, n/G2);
if (G > 1) {
if (powmod(g,k,p) == a) {
if (verbose > 2) printf(" common GCD %"UVuf"\n", G2);
} else {
UV m, l = divmod(r2, r1, n/G);
for (m = 0; m < G; m++) {
k = addmod(l, mulmod(m,(n/G),n), n);
if (powmod(g,k,p) == a) break;
}
if (m<G && verbose > 2) printf(" GCD %"UVuf", found with m=%"UVuf"\n", G, m);
}
}
if (powmod(g,k,p) != a) {
if (verbose > 2) printf("r1 = %"UVuf" r2 = %"UVuf" k = %"UVuf"\n", r1, r2, k);
if (verbose) printf("Incorrect DLP Rho solution: %"UVuf"\n", k);
s->failed = 1;
k = 0;
}
break;
}
}
s->round += i-1;
if (verbose && k) printf("DLP Rho solution found after %"UVuf" steps\n", s->round + 1);
s->u = u; s->v = v; s->w = w; s->U = U; s->V = V; s->W = W;
return k;
}
#if 0
static UV dlp_prho(UV a, UV g, UV p, UV n, UV maxrounds) {
#ifdef DEBUG
int const verbose = _XS_get_verbose()
#else
int const verbose = 0;
#endif
prho_state_t s = {1, 0, 0, 1, 0, 0, 0, 0, verbose};
return dlp_prho_uvw(a, g, p, n, maxrounds, &s);
}
#endif
/******************************************************************************/
/* DLP - BSGS */
/******************************************************************************/
typedef struct bsgs_hash_t {
UV M; /* The baby step index */
UV V; /* The powmod value */
struct bsgs_hash_t* next;
} bsgs_hash_t;
/****************************************/
/* Simple and limited pool allocation */
#define BSGS_ENTRIES_PER_PAGE 8000
typedef struct bsgs_page_top_t {
struct bsgs_page_t* first;
bsgs_hash_t** table;
UV size;
int nused;
int npages;
} bsgs_page_top_t;
typedef struct bsgs_page_t {
bsgs_hash_t entries[BSGS_ENTRIES_PER_PAGE];
struct bsgs_page_t* next;
} bsgs_page_t;
static bsgs_hash_t* get_entry(bsgs_page_top_t* top) {
if (top->nused == 0 || top->nused >= BSGS_ENTRIES_PER_PAGE) {
bsgs_page_t* newpage;
Newz(0, newpage, 1, bsgs_page_t);
newpage->next = top->first;
top->first = newpage;
top->nused = 0;
top->npages++;
}
return top->first->entries + top->nused++;
}
static void destroy_pages(bsgs_page_top_t* top) {
bsgs_page_t* head = top->first;
while (head != 0) {
bsgs_page_t* next = head->next;
Safefree(head);
head = next;
}
top->first = 0;
}
/****************************************/
static void bsgs_hash_put(bsgs_page_top_t* pagetop, UV v, UV i) {
UV idx = v % pagetop->size;
bsgs_hash_t** table = pagetop->table;
bsgs_hash_t* entry = table[idx];
while (entry && entry->V != v)
entry = entry->next;
if (!entry) {
entry = get_entry(pagetop);
entry->M = i;
entry->V = v;
entry->next = table[idx];
table[idx] = entry;
}
}
static UV bsgs_hash_get(bsgs_page_top_t* pagetop, UV v) {
bsgs_hash_t* entry = pagetop->table[v % pagetop->size];
while (entry && entry->V != v)
entry = entry->next;
return (entry) ? entry->M : 0;
}
static UV bsgs_hash_put_get(bsgs_page_top_t* pagetop, UV v, UV i) {
UV idx = v % pagetop->size;
bsgs_hash_t** table = pagetop->table;
bsgs_hash_t* entry = table[idx];
while (entry && entry->V != v)
entry = entry->next;
if (entry)
return entry->M;
entry = get_entry(pagetop);
entry->M = i;
entry->V = v;
entry->next = table[idx];
table[idx] = entry;
return 0;
}
static UV dlp_bsgs(UV a, UV g, UV p, UV n, UV maxent, int race_rho) {
bsgs_page_top_t PAGES;
UV i, m, maxm, hashmap_count;
UV aa, S, gm, T, gs_i, bs_i;
UV result = 0;
#ifdef DEBUG
int const verbose = _XS_get_verbose();
#else
int const verbose = 0;
#endif
prho_state_t rho_state = {1, 0, 0, 1, 0, 0, 0, 0, verbose};
if (n <= 2) return 0; /* Shouldn't be here with gorder this low */
if (race_rho) {
result = dlp_prho_uvw(a, g, p, n, 10000, &rho_state);
if (result) {
if (verbose) printf("rho found solution in BSGS step 0\n");
return result;
}
}
if (a == 0) return 0; /* We don't handle this case */
maxm = isqrt(n);
m = (maxent > maxm) ? maxm : maxent;
hashmap_count = (m < 65537) ? 65537 :
(m > 40000000) ? 40000003 :
next_prime(m); /* Ave depth around 2 */
/* Create table. Size: 8*hashmap_count bytes. */
PAGES.size = hashmap_count;
PAGES.first = 0;
PAGES.nused = 0;
PAGES.npages = 0;
Newz(0, PAGES.table, hashmap_count, bsgs_hash_t*);
aa = mulmod(a,a,p);
S = a;
gm = powmod(g, m, p);
T = gm;
gs_i = 0;
bs_i = 0;
bsgs_hash_put(&PAGES, S, 0); /* First baby step */
S = mulmod(S, g, p);
/* Interleaved Baby Step Giant Step */
for (i = 1; i <= m; i++) {
gs_i = bsgs_hash_put_get(&PAGES, S, i);
if (gs_i) { bs_i = i; break; }
S = mulmod(S, g, p);
if (S == aa) { /* We discovered the solution! */
if (verbose) printf(" dlp bsgs: solution at BS step %"UVuf"\n", i+1);
result = i+1;
break;
}
bs_i = bsgs_hash_put_get(&PAGES, T, i);
if (bs_i) { gs_i = i; break; }
T = mulmod(T, gm, p);
if (race_rho && (i % 2048) == 0) {
result = dlp_prho_uvw(a, g, p, n, 100000, &rho_state);
if (result) {
if (verbose) printf("rho found solution in BSGS step %"UVuf"\n", i);
break;
}
}
}
if (!result) {
/* Extend Giant Step search */
if (!(gs_i || bs_i)) {
UV b = (p+m-1)/m;
if (m < maxm && b > 8*m) b = 8*m;
for (i = m+1; i < b; i++) {
bs_i = bsgs_hash_get(&PAGES, T);
if (bs_i) { gs_i = i; break; }
T = mulmod(T, gm, p);
if (race_rho && (i % 2048) == 0) {
result = dlp_prho_uvw(a, g, p, n, 100000, &rho_state);
if (result) {
if (verbose) printf("rho found solution in BSGS step %"UVuf"\n", i);
break;
}
}
}
}
if (gs_i || bs_i) {
result = submod(mulmod(gs_i, m, p), bs_i, p);
}
}
if (verbose) printf(" dlp bsgs using %d pages (%.1fMB+%.1fMB) for hash\n", PAGES.npages, ((double)PAGES.npages * sizeof(bsgs_page_t)) / (1024*1024), ((double)hashmap_count * sizeof(bsgs_hash_t*)) / (1024*1024));
destroy_pages(&PAGES);
Safefree(PAGES.table);
if (result != 0 && powmod(g,result,p) != a) {
if (verbose) printf("Incorrect DLP BSGS solution: %"UVuf"\n", result);
result = 0;
}
if (race_rho && result == 0) {
result = dlp_prho_uvw(a, g, p, n, 2000000000U, &rho_state);
}
return result;
}
/* Find smallest k where a = g^k mod p */
#define DLP_TRIAL_NUM 10000
static UV znlog_solve(UV a, UV g, UV p, UV n) {
UV k, sqrtn;
const int verbose = _XS_get_verbose();
if (a >= p) a %= p;
if (g >= p) g %= p;
if (a == 1 || g == 0 || p <= 2)
return 0;
if (verbose > 1 && n != p-1) printf(" g=%"UVuf" p=%"UVuf", order %"UVuf"\n", g, p, n);
/* printf(" solving znlog(%"UVuf",%"UVuf",%"UVuf") n=%"UVuf"\n", a, g, p, n); */
if (n == 0 || n <= DLP_TRIAL_NUM) {
k = dlp_trial(a, g, p, DLP_TRIAL_NUM);
if (verbose) printf(" dlp trial 10k %s\n", (k!=0 || p <= DLP_TRIAL_NUM) ? "success" : "failure");
if (k != 0 || (n > 0 && n <= DLP_TRIAL_NUM)) return k;
}
{ /* Existence checks */
UV aorder, gorder = n;
if (gorder != 0 && powmod(a, gorder, p) != 1) return 0;
aorder = znorder(a,p);
if (aorder == 0 && gorder != 0) return 0;
if (aorder != 0 && gorder % aorder != 0) return 0;
}
sqrtn = (n == 0) ? 0 : isqrt(n);
if (n == 0) n = p-1;
{
UV maxent = (sqrtn > 0) ? sqrtn+1 : 100000;
k = dlp_bsgs(a, g, p, n, maxent/2, /* race rho */ 1);
if (verbose) printf(" dlp bsgs %"UVuf"k %s\n", maxent/1000, k!=0 ? "success" : "failure");
if (k != 0) return k;
if (sqrtn > 0 && sqrtn < maxent) return 0;
}
if (verbose) printf(" dlp doing exhaustive trial\n");
k = dlp_trial(a, g, p, p);
return k;
}
/* Silver-Pohlig-Hellman */
static UV znlog_ph(UV a, UV g, UV p, UV p1) {
UV fac[MPU_MAX_FACTORS+1];
UV exp[MPU_MAX_FACTORS+1];
int i, nfactors;
UV x, j;
if (p1 == 0) return 0; /* TODO: Should we plow on with p1=p-1? */
nfactors = factor_exp(p1, fac, exp);
if (nfactors == 1)
return znlog_solve(a, g, p, p1);
for (i = 0; i < nfactors; i++) {
UV pi, delta, gamma;
pi = fac[i]; for (j = 1; j < exp[i]; j++) pi *= fac[i];
delta = powmod(a,p1/pi,p);
gamma = powmod(g,p1/pi,p);
/* printf(" solving znlog(%"UVuf",%"UVuf",%"UVuf")\n", delta, gamma, p); */
fac[i] = znlog_solve( delta, gamma, p, znorder(gamma,p) );
exp[i] = pi;
}
x = chinese(fac, exp, nfactors, &i);
if (i == 1 && powmod(g, x, p) == a)
return x;
return 0;
}
/* Find smallest k where a = g^k mod p */
UV znlog(UV a, UV g, UV p) {
UV k, gorder, aorder;
const int verbose = _XS_get_verbose();
if (a >= p) a %= p;
if (g >= p) g %= p;
if (a == 1 || g == 0 || p <= 2)
return 0;
/* TODO: We call znorder with the same p many times. We should have a
* method for znorder given {phi,nfactors,fac,exp} */
gorder = znorder(g,p);
if (gorder != 0 && powmod(a, gorder, p) != 1) return 0;
aorder = znorder(a,p);
if (aorder == 0 && gorder != 0) return 0;
if (aorder != 0 && gorder % aorder != 0) return 0;
/* TODO: Come up with a better solution for a=0 */
if (a == 0 || p < DLP_TRIAL_NUM || (gorder > 0 && gorder < DLP_TRIAL_NUM)) {
if (verbose > 1) printf(" dlp trial znlog(%"UVuf",%"UVuf",%"UVuf")\n",a,g,p);
k = dlp_trial(a, g, p, p);
return k;
}
if (!is_prob_prime(gorder)) {
k = znlog_ph(a, g, p, gorder);
if (verbose) printf(" dlp PH %s\n", k!=0 ? "success" : "failure");
if (k != 0) return k;
}
return znlog_solve(a, g, p, gorder);
}
/* Compile with:
* gcc -O3 -fomit-frame-pointer -march=native -Wall -DFACTOR_STANDALONE -DSTANDALONE factor.c util.c sieve.c cache.c primality.c lmo.c -lm
*/
#ifdef FACTOR_STANDALONE
#include <errno.h>
int main(int argc, char *argv[])
{
UV n;
UV factors[MPU_MAX_FACTORS+1];
int nfactors, i, a;
if (argc <= 1) { printf("usage: %s <n>\n", argv[0]); return(1); }
for (a = 1; a < argc; a++) {
n = strtoul(argv[a], 0, 10);
if (n == ULONG_MAX && errno == ERANGE) { printf("Argument larger than ULONG_MAX\n"); return(-1); }
nfactors = factor(n, factors);
printf("%"UVuf":", n);
for (i = 0; i < nfactors; i++)
printf(" %"UVuf"", factors[i]);
printf("\n");
}
return(0);
}
#endif