[libmath-prime-util-perl] 08/13: Add table sieve, fix off-by-one in Lehmer, change sieve/advanced transition in Lehmer

Partha P. Mukherjee ppm-guest at moszumanska.debian.org
Thu May 21 18:47:42 UTC 2015


This is an automated email from the git hooks/post-receive script.

ppm-guest pushed a commit to annotated tag v0.24
in repository libmath-prime-util-perl.

commit 1bbbfaf45191314bf674518967ca357d1fa29e16
Author: Dana Jacobsen <dana at acm.org>
Date:   Fri Mar 8 16:03:58 2013 -0800

    Add table sieve, fix off-by-one in Lehmer, change sieve/advanced transition in Lehmer
---
 Changes  |  3 ++-
 lehmer.c |  8 +++----
 util.c   | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/Changes b/Changes
index df6c7f3..963c245 100644
--- a/Changes
+++ b/Changes
@@ -6,7 +6,8 @@ Revision history for Perl extension Math::Prime::Util.
 
     - euler_phi on a range wasn't working right with some ranges.
 
-    - More XS prime count improvements to speed and space.
+    - More XS prime count improvements to speed and space.  Add some tables
+      to the sieve count so it runs a bit faster.  Transition from sieve later.
 
     - PP prime count for 10^9 and larger is ~2x faster and uses much less
       memory.  Similar impact for nth_prime 10^8 or larger.
diff --git a/lehmer.c b/lehmer.c
index bf972a6..d5f8376 100644
--- a/lehmer.c
+++ b/lehmer.c
@@ -4,7 +4,7 @@
 #include <math.h>
 
 /* Below this size, just sieve. */
-#define SIEVE_LIMIT  1000000
+#define SIEVE_LIMIT  60000000
 
 /*****************************************************************************
  *
@@ -553,7 +553,7 @@ UV _XS_meissel_pi(UV n)
   if (verbose > 0) printf("phi(%lu,%lu) = %lu.  sum = %lu\n", n, a, sum - ((b+a-2) * (b-a+1) / 2), sum);
   TIMING_END_PRINT("phi(x,a)")
 
-  lastprime = b*SIEVE_MULT;
+  lastprime = b*SIEVE_MULT+1;
   if (verbose > 0) printf("meissel %lu stage 3: %lu small primes\n", n, lastprime);
   TIMING_START;
   primes = generate_small_primes(lastprime);
@@ -616,7 +616,7 @@ UV _XS_lehmer_pi(UV n)
   /* We get an array of the first b primes.  This is used in stage 4.  If we
    * get more than necessary, we can use them to speed up some.
    */
-  lastprime = b*SIEVE_MULT;
+  lastprime = b*SIEVE_MULT+1;
   if (verbose > 0) printf("lehmer %lu stage 3: %lu small primes\n", n, lastprime);
   TIMING_START;
   primes = generate_small_primes(lastprime);
@@ -680,7 +680,7 @@ UV _XS_LMO_pi(UV n)
   b = _XS_lehmer_pi(n12);
   TIMING_END_PRINT("stage 1")
 
-  lastprime = b*SIEVE_MULT;
+  lastprime = b*SIEVE_MULT+1;
   if (verbose > 0) printf("LMO %lu stage 2: %lu small primes\n", n, lastprime);
   TIMING_START;
   primes = generate_small_primes(lastprime);
diff --git a/util.c b/util.c
index 368fc24..123349b 100644
--- a/util.c
+++ b/util.c
@@ -412,6 +412,59 @@ static const unsigned char prime_count_small[] =
    16,16,16,16,16,16,17,17,18,18,18,18,18,18,19};
 #define NPRIME_COUNT_SMALL  (sizeof(prime_count_small)/sizeof(prime_count_small[0]))
 
+#define USE_PC_TABLES 1
+#if USE_PC_TABLES
+/* These tables let us have fast answers up to 3000M for the cost of ~1.4k of
+ * static data/code.  We can get a 4 to 100x speedup here.  We don't want to
+ * push this idea too far because Lehmer's method should be faster. */
+/* mpu '$step=30_000; $pc=prime_count(5); print "$pc\n", join(",", map { $spc=$pc; $pc=prime_count($_*$step); $pc-$spc; } 1..200), "\n"' */
+static const unsigned short step_counts_30k[] =  /* starts at 7 */
+  {3242,2812,2656,2588,2547,2494,2465,2414,2421,2355,2407,2353,2310,2323,2316,
+   2299,2286,2281,2247,2279,2243,2223,2251,2214,2209,2230,2215,2207,2205,2179,
+   2200,2144,2159,2193,2164,2136,2180,2152,2162,2174,2113,2131,2150,2101,2111,
+   2146,2115,2123,2119,2108,2124,2097,2075,2089,2094,2119,2084,2065,2069,2101,
+   2094,2083,2089,2076,2088,2027,2109,2073,2061,2033,2079,2078,2036,2025,2058,
+   2083,2037,2005,2048,2048,2024,2045,2027,2025,2039,2049,2022,2034,2046,2032,
+   2019,2000,2014,2069,2042,1980,2021,2014,1995,2017,1992,1985,2045,2007,1990,
+   2008,2052,2033,1988,1984,2010,1943,2024,2005,2027,1937,1955,1956,1993,1976};
+#define NSTEP_COUNTS_30K  (sizeof(step_counts_30k)/sizeof(step_counts_30k[0]))
+
+/* mpu '$step=300_000; $pc=prime_count(10*$step); print "$pc\n", join(",", map { $spc=$pc; $pc=prime_count($_*$step); $pc-$spc; } 11..100), "\n"' */
+static const unsigned short step_counts_300k[] =  /* starts at 3M */
+  {20084,19826,19885,19703,19634,19491,19532,19391,19244,19243,19224,19086,
+   19124,19036,18942,18893,18870,18853,18837,18775,18688,18674,18594,18525,
+   18639,18545,18553,18424,18508,18421,18375,18366,18391,18209,18239,18298,
+   18209,18294,18125,18138,18147,18115,18126,18021,18085,18068,18094,17963,
+   18041,18003,17900,17881,17917,17888,17880,17852,17892,17779,17823,17764,
+   17806,17762,17780,17716,17633,17758,17746,17678,17687,17613,17709,17628,
+   17634,17556,17528,17598,17604,17532,17606,17548,17493,17576,17456,17468,
+   17555,17452,17407,17472,17415,17500,17508,17418,17463,17240,17345,17351,
+   17380,17394,17379,17330,17322,17335,17354,17113,17210,17231,17238,17305,
+   17268,17219,17281,17235,17119,17292,17161,17212,17166,17277,17137,17260,
+   17228,17197,17154,17097,17195,17136,17067,17058,17041,17045,17187,17034,
+   17029,17037,17090,16985,17054,17017,17106,17001,17095,17125,17027,16948,
+   16969,17031,16916,17031,16905,16937,16881,16952,16919,16938,17028,16963,
+   16902,16922,16944,16901,16847,16969,16900,16876,16841,16874,16894,16861,
+   16761,16886,16778,16820,16727,16921,16817,16845,16847,16824,16844,16809,
+   16859,16783,16713,16752,16762,16857,16760,16626,16784,16784,16718,16745,
+   16871,16635,16714,16630,16779,16709,16660,16730,16715,16724};
+#define NSTEP_COUNTS_300K (sizeof(step_counts_300k)/sizeof(step_counts_300k[0]))
+
+static const unsigned int step_counts_30m[] =  /* starts at 30M */
+  {1704256,1654839,1624694,1602748,1585989,1571241,1559918,1549840,1540941,
+   1533150,1525813,1519922,1513269,1508559,1503386,1497828,1494129,1489905,
+   1486417,1482526,1478941,1475577,1472301,1469133,1466295,1464711,1461223,
+   1458478,1455327,1454218,1451883,1449393,1447612,1445029,1443285,1442268,
+   1438511,1437688,1435603,1433623,1432638,1431158,1429158,1427934,1426191,
+   1424449,1423146,1421898,1421628,1419519,1417646,1416274,1414828,1414474,
+   1412536,1412147,1410149,1409474,1408847,1406619,1405863,1404699,1403820,
+   1402802,1402215,1401459,1399972,1398687,1397968,1397392,1396025,1395311,
+   1394081,1393614,1393702,1391745,1390950,1389856,1389245,1388381,1387557,
+   1387087,1386285,1386089,1385355,1383659,1383030,1382174,1382128,1380556,
+   1379940,1379988,1379181,1378300,1378033,1376974,1376282,1375646,1374445};
+#define NSTEP_COUNTS_30M  (sizeof(step_counts_30m)/sizeof(step_counts_30m[0]))
+#endif
+
 UV _XS_prime_count(UV low, UV high)
 {
   const unsigned char* cache_sieve;
@@ -429,6 +482,30 @@ UV _XS_prime_count(UV low, UV high)
 
   if (low > high)  return count;
 
+#if USE_PC_TABLES
+  if (low == 7 && high >= 30000) {
+    UV i = 0;
+    if (high < (NSTEP_COUNTS_30K+1) * UVCONST(30000)) {
+      while (i < NSTEP_COUNTS_30K && high >= (i+1) * 30000) {
+        count += step_counts_30k[i++];
+      }
+      low = i * 30000;
+    } else if (high < (NSTEP_COUNTS_300K+1) * UVCONST(300000)) {
+      count = 216816;
+      while (i < NSTEP_COUNTS_300K && high >= (i+11) * UVCONST(300000)) {
+        count += step_counts_300k[i++];
+      }
+      low = (i+10) * 300000;
+    } else {
+      count = 1857859;
+      while (i < NSTEP_COUNTS_30M && high >= (i+2) * UVCONST(30000000)) {
+        count += step_counts_30m[i++];
+      }
+      low = (UV)(i+1) * UVCONST(30000000);
+    }
+  }
+#endif
+
   low_d = low/30;
   high_d = high/30;
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git



More information about the Pkg-perl-cvs-commits mailing list