[libmath-prime-util-perl] 08/13: Add table sieve, fix off-by-one in Lehmer, change sieve/advanced transition in Lehmer
Partha P. Mukherjee
ppm-guest at moszumanska.debian.org
Thu May 21 18:47:42 UTC 2015
This is an automated email from the git hooks/post-receive script.
ppm-guest pushed a commit to annotated tag v0.24
in repository libmath-prime-util-perl.
commit 1bbbfaf45191314bf674518967ca357d1fa29e16
Author: Dana Jacobsen <dana at acm.org>
Date: Fri Mar 8 16:03:58 2013 -0800
Add table sieve, fix off-by-one in Lehmer, change sieve/advanced transition in Lehmer
---
Changes | 3 ++-
lehmer.c | 8 +++----
util.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 83 insertions(+), 5 deletions(-)
diff --git a/Changes b/Changes
index df6c7f3..963c245 100644
--- a/Changes
+++ b/Changes
@@ -6,7 +6,8 @@ Revision history for Perl extension Math::Prime::Util.
- euler_phi on a range wasn't working right with some ranges.
- - More XS prime count improvements to speed and space.
+ - More XS prime count improvements to speed and space. Add some tables
+ to the sieve count so it runs a bit faster. Transition from sieve later.
- PP prime count for 10^9 and larger is ~2x faster and uses much less
memory. Similar impact for nth_prime 10^8 or larger.
diff --git a/lehmer.c b/lehmer.c
index bf972a6..d5f8376 100644
--- a/lehmer.c
+++ b/lehmer.c
@@ -4,7 +4,7 @@
#include <math.h>
/* Below this size, just sieve. */
-#define SIEVE_LIMIT 1000000
+#define SIEVE_LIMIT 60000000
/*****************************************************************************
*
@@ -553,7 +553,7 @@ UV _XS_meissel_pi(UV n)
if (verbose > 0) printf("phi(%lu,%lu) = %lu. sum = %lu\n", n, a, sum - ((b+a-2) * (b-a+1) / 2), sum);
TIMING_END_PRINT("phi(x,a)")
- lastprime = b*SIEVE_MULT;
+ lastprime = b*SIEVE_MULT+1;
if (verbose > 0) printf("meissel %lu stage 3: %lu small primes\n", n, lastprime);
TIMING_START;
primes = generate_small_primes(lastprime);
@@ -616,7 +616,7 @@ UV _XS_lehmer_pi(UV n)
/* We get an array of the first b primes. This is used in stage 4. If we
* get more than necessary, we can use them to speed up some.
*/
- lastprime = b*SIEVE_MULT;
+ lastprime = b*SIEVE_MULT+1;
if (verbose > 0) printf("lehmer %lu stage 3: %lu small primes\n", n, lastprime);
TIMING_START;
primes = generate_small_primes(lastprime);
@@ -680,7 +680,7 @@ UV _XS_LMO_pi(UV n)
b = _XS_lehmer_pi(n12);
TIMING_END_PRINT("stage 1")
- lastprime = b*SIEVE_MULT;
+ lastprime = b*SIEVE_MULT+1;
if (verbose > 0) printf("LMO %lu stage 2: %lu small primes\n", n, lastprime);
TIMING_START;
primes = generate_small_primes(lastprime);
diff --git a/util.c b/util.c
index 368fc24..123349b 100644
--- a/util.c
+++ b/util.c
@@ -412,6 +412,59 @@ static const unsigned char prime_count_small[] =
16,16,16,16,16,16,17,17,18,18,18,18,18,18,19};
#define NPRIME_COUNT_SMALL (sizeof(prime_count_small)/sizeof(prime_count_small[0]))
+#define USE_PC_TABLES 1
+#if USE_PC_TABLES
+/* These tables let us have fast answers up to 3000M for the cost of ~1.4k of
+ * static data/code. We can get a 4 to 100x speedup here. We don't want to
+ * push this idea too far because Lehmer's method should be faster. */
+/* mpu '$step=30_000; $pc=prime_count(5); print "$pc\n", join(",", map { $spc=$pc; $pc=prime_count($_*$step); $pc-$spc; } 1..200), "\n"' */
+static const unsigned short step_counts_30k[] = /* starts at 7 */
+ {3242,2812,2656,2588,2547,2494,2465,2414,2421,2355,2407,2353,2310,2323,2316,
+ 2299,2286,2281,2247,2279,2243,2223,2251,2214,2209,2230,2215,2207,2205,2179,
+ 2200,2144,2159,2193,2164,2136,2180,2152,2162,2174,2113,2131,2150,2101,2111,
+ 2146,2115,2123,2119,2108,2124,2097,2075,2089,2094,2119,2084,2065,2069,2101,
+ 2094,2083,2089,2076,2088,2027,2109,2073,2061,2033,2079,2078,2036,2025,2058,
+ 2083,2037,2005,2048,2048,2024,2045,2027,2025,2039,2049,2022,2034,2046,2032,
+ 2019,2000,2014,2069,2042,1980,2021,2014,1995,2017,1992,1985,2045,2007,1990,
+ 2008,2052,2033,1988,1984,2010,1943,2024,2005,2027,1937,1955,1956,1993,1976};
+#define NSTEP_COUNTS_30K (sizeof(step_counts_30k)/sizeof(step_counts_30k[0]))
+
+/* mpu '$step=300_000; $pc=prime_count(10*$step); print "$pc\n", join(",", map { $spc=$pc; $pc=prime_count($_*$step); $pc-$spc; } 11..100), "\n"' */
+static const unsigned short step_counts_300k[] = /* starts at 3M */
+ {20084,19826,19885,19703,19634,19491,19532,19391,19244,19243,19224,19086,
+ 19124,19036,18942,18893,18870,18853,18837,18775,18688,18674,18594,18525,
+ 18639,18545,18553,18424,18508,18421,18375,18366,18391,18209,18239,18298,
+ 18209,18294,18125,18138,18147,18115,18126,18021,18085,18068,18094,17963,
+ 18041,18003,17900,17881,17917,17888,17880,17852,17892,17779,17823,17764,
+ 17806,17762,17780,17716,17633,17758,17746,17678,17687,17613,17709,17628,
+ 17634,17556,17528,17598,17604,17532,17606,17548,17493,17576,17456,17468,
+ 17555,17452,17407,17472,17415,17500,17508,17418,17463,17240,17345,17351,
+ 17380,17394,17379,17330,17322,17335,17354,17113,17210,17231,17238,17305,
+ 17268,17219,17281,17235,17119,17292,17161,17212,17166,17277,17137,17260,
+ 17228,17197,17154,17097,17195,17136,17067,17058,17041,17045,17187,17034,
+ 17029,17037,17090,16985,17054,17017,17106,17001,17095,17125,17027,16948,
+ 16969,17031,16916,17031,16905,16937,16881,16952,16919,16938,17028,16963,
+ 16902,16922,16944,16901,16847,16969,16900,16876,16841,16874,16894,16861,
+ 16761,16886,16778,16820,16727,16921,16817,16845,16847,16824,16844,16809,
+ 16859,16783,16713,16752,16762,16857,16760,16626,16784,16784,16718,16745,
+ 16871,16635,16714,16630,16779,16709,16660,16730,16715,16724};
+#define NSTEP_COUNTS_300K (sizeof(step_counts_300k)/sizeof(step_counts_300k[0]))
+
+static const unsigned int step_counts_30m[] = /* starts at 30M */
+ {1704256,1654839,1624694,1602748,1585989,1571241,1559918,1549840,1540941,
+ 1533150,1525813,1519922,1513269,1508559,1503386,1497828,1494129,1489905,
+ 1486417,1482526,1478941,1475577,1472301,1469133,1466295,1464711,1461223,
+ 1458478,1455327,1454218,1451883,1449393,1447612,1445029,1443285,1442268,
+ 1438511,1437688,1435603,1433623,1432638,1431158,1429158,1427934,1426191,
+ 1424449,1423146,1421898,1421628,1419519,1417646,1416274,1414828,1414474,
+ 1412536,1412147,1410149,1409474,1408847,1406619,1405863,1404699,1403820,
+ 1402802,1402215,1401459,1399972,1398687,1397968,1397392,1396025,1395311,
+ 1394081,1393614,1393702,1391745,1390950,1389856,1389245,1388381,1387557,
+ 1387087,1386285,1386089,1385355,1383659,1383030,1382174,1382128,1380556,
+ 1379940,1379988,1379181,1378300,1378033,1376974,1376282,1375646,1374445};
+#define NSTEP_COUNTS_30M (sizeof(step_counts_30m)/sizeof(step_counts_30m[0]))
+#endif
+
UV _XS_prime_count(UV low, UV high)
{
const unsigned char* cache_sieve;
@@ -429,6 +482,30 @@ UV _XS_prime_count(UV low, UV high)
if (low > high) return count;
+#if USE_PC_TABLES
+ if (low == 7 && high >= 30000) {
+ UV i = 0;
+ if (high < (NSTEP_COUNTS_30K+1) * UVCONST(30000)) {
+ while (i < NSTEP_COUNTS_30K && high >= (i+1) * 30000) {
+ count += step_counts_30k[i++];
+ }
+ low = i * 30000;
+ } else if (high < (NSTEP_COUNTS_300K+1) * UVCONST(300000)) {
+ count = 216816;
+ while (i < NSTEP_COUNTS_300K && high >= (i+11) * UVCONST(300000)) {
+ count += step_counts_300k[i++];
+ }
+ low = (i+10) * 300000;
+ } else {
+ count = 1857859;
+ while (i < NSTEP_COUNTS_30M && high >= (i+2) * UVCONST(30000000)) {
+ count += step_counts_30m[i++];
+ }
+ low = (UV)(i+1) * UVCONST(30000000);
+ }
+ }
+#endif
+
low_d = low/30;
high_d = high/30;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git
More information about the Pkg-perl-cvs-commits
mailing list