[SCM] Fast arithmetic with dense matrices over F_{2^e} branch, upstream, updated. 9faf6ece9a183a703670566609063ab274b1c544

Mon Sep 10 12:24:25 UTC 2012

The following commit has been merged in the upstream branch:
commit a66b031e5bb3ab961f82d3f1fc158c4ceab3597a
Author: Martin Albrecht <martinralbrecht at googlemail.com>
Date:   Sat Aug 11 15:33:44 2012 +0100

    added gf2e_mul() and started moving functions over

diff --git a/Makefile.am b/Makefile.am
index f1ad325..c66ede2 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -22,7 +22,8 @@ libm4rie_la_SOURCES = m4rie/gf2e.c \
 
 pkgincludesubdir = $(includedir)/m4rie
 
-pkgincludesub_HEADERS = m4rie/gf2e.h \
+pkgincludesub_HEADERS = m4rie/gf2x.h \
+	m4rie/gf2e.h \
 	m4rie/mzed.h \
 	m4rie/m4rie.h \
 	m4rie/m4ri_functions.h \
diff --git a/src/Doxyfile b/src/Doxyfile
index 1adbde9..5ee41e2 100644
--- a/src/Doxyfile
+++ b/src/Doxyfile
@@ -179,7 +179,7 @@ TAB_SIZE               = 4
 # will result in a user-defined paragraph with heading "Side Effects:".
 # You can put \n's in the value part of an alias to insert newlines.
 
-ALIASES                = "GF2E=\f$\mathbb{F}_{2^e}\f$" "GF2=\f$\mathbb{F}_2\f$" " e=\f$e\f$" "GF4=\f$\mathbb{F}_{2^2}\f$" "GF8=\f$\mathbb{F}_{2^3}\f$" "GF16=\f$\mathbb{F}_{2^4}\f$" "GF32=\f$\mathbb{F}_{2^5}\f$" "GF64=\f$\mathbb{F}_{2^6}\f$" "GF128=\f$\mathbb{F}_{2^7}\f$" "GF256=\f$\mathbb{F}_{2^8}\f$" "GF512=\f$\mathbb{F}_{2^9}\f$" "GF1024=\f$\mathbb{F}_{2^10}\f$"
+ALIASES                = "GF2E=\f$\mathbb{F}_{2^e}\f$" "GF2=\f$\mathbb{F}_2\f$" "GF2X=\f$\mathbb{F}_2[x]\f$" " e=\f$e\f$" "GF4=\f$\mathbb{F}_{2^2}\f$" "GF8=\f$\mathbb{F}_{2^3}\f$" "GF16=\f$\mathbb{F}_{2^4}\f$" "GF32=\f$\mathbb{F}_{2^5}\f$" "GF64=\f$\mathbb{F}_{2^6}\f$" "GF128=\f$\mathbb{F}_{2^7}\f$" "GF256=\f$\mathbb{F}_{2^8}\f$" "GF512=\f$\mathbb{F}_{2^9}\f$" "GF1024=\f$\mathbb{F}_{2^10}\f$"
 
 # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
 # sources only. Doxygen will then generate output that is more tailored for C.
diff --git a/src/gf2e.h b/src/gf2e.h
index 9b0b700..496650d 100644
--- a/src/gf2e.h
+++ b/src/gf2e.h
@@ -9,109 +9,31 @@
 #ifndef M4RIE_GF2E_H
 #define M4RIE_GF2E_H
 
+/******************************************************************************
+*
+*            M4RIE: Linear Algebra over GF(2^e)
+*
+*    Copyright (C) 2010,2011 Martin Albrecht <martinralbrecht at googlemail.com>
+*
+*  Distributed under the terms of the GNU General Public License (GEL)
+*  version 2 or higher.
+*
+*    This code is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+*    General Public License for more details.
+*
+*  The full text of the GPL is available at:
+*
+*                  http://www.gnu.org/licenses/
+******************************************************************************/
+
 #include <m4ri/m4ri.h>
+#include <m4rie/gf2x.h>
 
 #define M4RIE_MAX_DEGREE 10
 
 /**
- * \brief Multiplication in GF(2)[x] where a,b have a degree smaller than d <= 16.
- */
-
-static inline word gf2x_mul(const word a, const word b, unsigned int d) {
-  /* double check that integer multiplication is indeed not horribly slow here */
-  word res = 0;
-
-  switch(d) {
-  case 16: res ^= ((a&(1<<15))>>15) * (b<<15);
-  case 15: res ^= ((a&(1<<14))>>14) * (b<<14);
-  case 14: res ^= ((a&(1<<13))>>13) * (b<<13);
-  case 13: res ^= ((a&(1<<12))>>12) * (b<<12);
-  case 12: res ^= ((a&(1<<11))>>11) * (b<<11);
-  case 11: res ^= ((a&(1<<10))>>10) * (b<<10);
-  case 10: res ^= ((a&(1<< 9))>> 9) * (b<< 9);
-  case  9: res ^= ((a&(1<< 8))>> 8) * (b<< 8);
-  case  8: res ^= ((a&(1<< 7))>> 7) * (b<< 7);
-  case  7: res ^= ((a&(1<< 6))>> 6) * (b<< 6);
-  case  6: res ^= ((a&(1<< 5))>> 5) * (b<< 5);
-  case  5: res ^= ((a&(1<< 4))>> 4) * (b<< 4);
-  case  4: res ^= ((a&(1<< 3))>> 3) * (b<< 3);
-  case  3: res ^= ((a&(1<< 2))>> 2) * (b<< 2);
-  case  2: res ^= ((a&(1<< 1))>> 1) * (b<< 1);
-  case  1: res ^= ((a&(1<< 0))>> 0) * (b<< 0);
-    break;
-  default:
-    m4ri_die("degree %d not supported.",d);
-  }
-  return res;
-}
-
-/**
- * \brief Degree of elements in GF(2)[x].
- *
- */
-
-static inline unsigned int gf2x_deg(word a) {
-  unsigned int degree = 0;
-  if( (a & 0xffffffff00000000ULL) != 0) { degree += 32; a>>=32; }
-  if( (a &         0xffff0000ULL) != 0) { degree += 16; a>>=16; }
-  if( (a &             0xff00ULL) != 0) { degree +=  8; a>>= 8; }
-  if( (a &               0xf0ULL) != 0) { degree +=  4; a>>= 4; }
-  if( (a &                0xcULL) != 0) { degree +=  2; a>>= 2; }
-  if( (a &                0x2ULL) != 0) { degree +=  1; a>>= 1; }
-  return degree;
-}
-
-/**
- * \brief Division in GF(2)[x].
- */
-
-static inline word gf2x_div(word a, word b) {
-  word res = 0;
-  while(a >= b) {
-    unsigned int diff = gf2x_deg(a) - gf2x_deg(b);
-    res |= __M4RI_TWOPOW(diff);
-    a ^= b<<diff;
-  }
-  return res;
-}
-
-/**
- * \brief Remainders in GF(2)[x].
- */
-
-static inline word gf2x_mod(word a, word b) {
-  word res = 0;
-  while(a >= b) {
-    unsigned int diff = gf2x_deg(a) - gf2x_deg(b);
-    res |= __M4RI_TWOPOW(diff);
-    a ^= b<<diff;
-  }
-  return a;
-}
-
-/**
- * \brief a^(-1) % b.
- */
-
-static inline word gf2x_invmod(word a, word b, unsigned int d) {
-  word x = 0;
-  word lastx = 1;
-  word y = 1;
-  word lasty = 0;
-
-  word tmp = 0;
-
-  while (b != 0) {
-    word quotient = gf2x_div(a,b);
-    tmp = b; b = gf2x_mod(a, b); a = tmp;
-    tmp = x; x = lastx ^ gf2x_mul(quotient, x, d); lastx = tmp;
-    tmp = y; y = lasty ^ gf2x_mul(quotient, y, d); lasty = tmp;
-  }
-  return lastx;
-}
-
-
-/**
  * \brief \GF2E
  */
 
@@ -135,6 +57,25 @@ typedef struct {
 gf2e *gf2e_init(const word minpoly);
 
 /**
+ * Generate gf2e::pow_gen.
+ *
+ * \param ff Finite field.
+ */
+
+static inline void gf2e_make_pow_gen(gf2e *ff) {
+  unsigned int n = 2*ff->degree-1;
+  word *m = (word*)m4ri_mm_malloc( n * sizeof(word));
+  for(unsigned int i=0; i<n; i++) {
+    m[i] = 1<<i;
+    for(unsigned int j=i; j>=ff->degree; j--) {
+      if (m[i] & 1<<j)
+        m[i] ^= ff->minpoly<<(j - ff->degree);
+    }
+  }
+  ff->pow_gen = m;
+}
+
+/**
  * Free ff
  *
  * \param ff Finite field.
@@ -142,11 +83,22 @@ gf2e *gf2e_init(const word minpoly);
 
 void gf2e_free(gf2e *ff);
 
+/**
+ * \brief a^(-1) % minpoly
+ */
+
 static inline word gf2e_inv(const gf2e *ff, word a) {
   return gf2x_invmod(a, ff->minpoly, ff->degree);
 }
 
 /**
+ * \brief a*b in \GF2E
+ */
+static inline word gf2e_mul(const gf2e *ff, const word a, const word b) {
+  return ff->mul[a][b];
+}
+
+/**
  * Return the width used for storing elements of ff
  *
  * \param ff Finite field.
@@ -180,26 +132,6 @@ static inline size_t gf2e_degree_to_w(const gf2e *ff) {
 }
 
 /**
- * Generate gf2e::pow_gen.
- *
- * \param ff Finite field.
- */
-
-static inline void gf2e_make_pow_gen(gf2e *ff) {
-  unsigned int n = 2*ff->degree-1;
-  word *m = (word*)m4ri_mm_malloc( n * sizeof(word));
-  for(unsigned int i=0; i<n; i++) {
-    m[i] = 1<<i;
-    for(unsigned int j=i; j>=ff->degree; j--) {
-      if (m[i] & 1<<j)
-        m[i] ^= ff->minpoly<<(j - ff->degree);
-    }
-  }
-  ff->pow_gen = m;
-}
-
-
-/**
  * Compute all multiples by a of vectors fitting into 16 bits.
  *
  * \param ff Finite field.
@@ -211,7 +143,6 @@ static inline word *gf2e_t16_init(const gf2e *ff, const word a) {
 
   const unsigned int w = gf2e_degree_to_w(ff);
   const word mask_w = (1<<w)-1;
-  const word *x = ff->mul[a];
 
   /**
    * @todo: this is a bit of overkill, we could do better
@@ -219,17 +150,17 @@ static inline word *gf2e_t16_init(const gf2e *ff, const word a) {
   for(word i=0; i<1<<16; i++) {
     switch(w) {
     case 2:
-      mul[i]  = x[(i&mask_w)] | x[((i>>2)&mask_w)]<<2 | x[((i>>4)&mask_w)]<<4 | x[((i>>6)&mask_w)]<<6;
-      mul[i] |= x[((i>>8)&mask_w)]<<8 | x[((i>>10)&mask_w)]<<10 | x[((i>>12)&mask_w)]<<12 | x[((i>>14)&mask_w)]<<14;
+      mul[i]  = gf2e_mul(ff, a, ((i>>0)&mask_w))<<0 | gf2e_mul(ff, a, ((i>> 2)&mask_w))<< 2 | gf2e_mul(ff, a, ((i>> 4)&mask_w))<< 4 | gf2e_mul(ff, a, ((i>> 6)&mask_w))<< 6;
+      mul[i] |= gf2e_mul(ff, a, ((i>>8)&mask_w))<<8 | gf2e_mul(ff, a, ((i>>10)&mask_w))<<10 | gf2e_mul(ff, a, ((i>>12)&mask_w))<<12 | gf2e_mul(ff, a, ((i>>14)&mask_w))<<14;
       break;
     case 4:
-      mul[i]  = x[(i&mask_w)] | x[((i>>4)&mask_w)]<<4 | x[((i>>8)&mask_w)]<<8 | x[((i>>12)&mask_w)]<<12;
+      mul[i]  = gf2e_mul(ff, a, (i&mask_w)) | gf2e_mul(ff, a, ((i>>4)&mask_w))<<4 | gf2e_mul(ff, a, ((i>>8)&mask_w))<<8 | gf2e_mul(ff, a, ((i>>12)&mask_w))<<12;
       break;
     case 8:
-      mul[i]  = x[(i&mask_w)] | x[((i>>8)&mask_w)]<<8;
+      mul[i]  = gf2e_mul(ff, a, (i&mask_w)) | gf2e_mul(ff, a, ((i>>8)&mask_w))<<8;
       break;
     case 16:
-      mul[i]  = x[(i&mask_w)];
+      mul[i]  = gf2e_mul(ff, a, (i&mask_w));
       break;
     };
   }
diff --git a/src/gf2x.h b/src/gf2x.h
new file mode 100644
index 0000000..7a38ec0
--- /dev/null
+++ b/src/gf2x.h
@@ -0,0 +1,169 @@
+/**
+ * \file gf2x.h
+ *
+ * \brief \GF2X for degrees < 64
+ *
+ * \author Martin Albrecht <martinralbrecht at googlemail.com>
+ *
+ * \warning Do not rely on these functions for high performance, they are not fully optimised.
+ */
+
+#ifndef M4RIE_GF2X_H
+#define M4RIE_GF2X_H
+
+/******************************************************************************
+*
+*            M4RIE: Linear Algebra over GF(2^e)
+*
+*    Copyright (C) 2012 Martin Albrecht <martinralbrecht at googlemail.com>
+*
+*  Distributed under the terms of the GNU General Public License (GEL)
+*  version 2 or higher.
+*
+*    This code is distributed in the hope that it will be useful,
+*    but WITHOUT ANY WARRANTY; without even the implied warranty of
+*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+*    General Public License for more details.
+*
+*  The full text of the GPL is available at:
+*
+*                  http://www.gnu.org/licenses/
+******************************************************************************/
+
+#include <m4ri/m4ri.h>
+
+#define __M4RIE_1tF(X) ~((X)-1) /**< Maps 1 to word with all ones and 0 to 0. */
+
+/**
+ * \brief a*b in \GF2X with deg(a) and deg(b) < d.
+ */
+
+static inline word gf2x_mul(const word a, const word b, unsigned int d) {
+  word res = 0;
+
+  switch(d) {
+  case 32: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(31)) >>31) & (b<<31);
+  case 31: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(30)) >>30) & (b<<30);
+  case 30: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(29)) >>29) & (b<<29);
+  case 29: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(28)) >>28) & (b<<28);
+  case 28: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(27)) >>27) & (b<<27);
+  case 27: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(26)) >>26) & (b<<26);
+  case 26: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(25)) >>25) & (b<<25);
+  case 25: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(24)) >>24) & (b<<24);
+  case 24: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(23)) >>23) & (b<<23);
+  case 23: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(22)) >>22) & (b<<22);
+  case 22: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(21)) >>21) & (b<<21);
+  case 21: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(20)) >>20) & (b<<20);
+  case 20: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(19)) >>19) & (b<<19);
+  case 19: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(18)) >>18) & (b<<18);
+  case 18: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(17)) >>17) & (b<<17);
+  case 17: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(16)) >>16) & (b<<16);
+  case 16: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(15)) >>15) & (b<<15);
+  case 15: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(14)) >>14) & (b<<14);
+  case 14: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(13)) >>13) & (b<<13);
+  case 13: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(12)) >>12) & (b<<12);
+  case 12: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(11)) >>11) & (b<<11);
+  case 11: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(10)) >>10) & (b<<10);
+  case 10: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 9)) >> 9) & (b<< 9);
+  case  9: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 8)) >> 8) & (b<< 8);
+  case  8: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 7)) >> 7) & (b<< 7);
+  case  7: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 6)) >> 6) & (b<< 6);
+  case  6: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 5)) >> 5) & (b<< 5);
+  case  5: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 4)) >> 4) & (b<< 4);
+  case  4: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 3)) >> 3) & (b<< 3);
+  case  3: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 2)) >> 2) & (b<< 2);
+  case  2: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 1)) >> 1) & (b<< 1);
+  case  1: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 0)) >> 0) & (b<< 0);
+    break;
+  default:
+    m4ri_die("degree %d too big.\n",d);
+  }
+  return res;
+}
+/**
+ * \brief deg(a) in \GF2X.
+ *
+ * \param a Polynomial of degree <= 64.
+ */
+
+static inline int gf2x_deg(word a) {
+  int degree = 0;
+  if( (a & 0xffffffff00000000ULL) != 0) { degree += 32; a>>=32; }
+  if( (a &         0xffff0000ULL) != 0) { degree += 16; a>>=16; }
+  if( (a &             0xff00ULL) != 0) { degree +=  8; a>>= 8; }
+  if( (a &               0xf0ULL) != 0) { degree +=  4; a>>= 4; }
+  if( (a &                0xcULL) != 0) { degree +=  2; a>>= 2; }
+  if( (a &                0x2ULL) != 0) { degree +=  1; a>>= 1; }
+  return degree;
+}
+
+/**
+ * \brief a / b in \GF2X.
+ */
+
+static inline word gf2x_div(word a, word b) {
+  word res = 0;
+  word mask = 0;
+  const int deg_b = gf2x_deg(b);
+  for(int deg_a=gf2x_deg(a); deg_a>=deg_b; deg_a--) {
+    mask = __M4RIE_1tF(a>>deg_a);
+    res |= mask & __M4RI_TWOPOW(deg_a - deg_b);
+    a ^=  mask & b<<(deg_a - deg_b);
+  }
+  return res;
+}
+
+/**
+ * \brief a mod b in \GF2X.
+ */
+
+static inline word gf2x_mod(word a, word b) {
+  const int deg_b = gf2x_deg(b);
+  for(int deg_a=gf2x_deg(a); deg_a>=deg_b; deg_a--) {
+    a ^= __M4RIE_1tF(a>>deg_a) & b<<(deg_a - deg_b);
+  }
+  return a;
+}
+
+/**
+ * \brief a / b and a mod b in \GF2X.
+ */
+
+static inline word gf2x_divmod(word a, word b, word *rem) {
+  word res = 0;
+  word mask = 0;
+  const int deg_b = gf2x_deg(b);
+  for(int deg_a=gf2x_deg(a); deg_a>=deg_b; deg_a--) {
+    mask = __M4RIE_1tF(a>>deg_a);
+    res |= mask & __M4RI_TWOPOW(deg_a - deg_b);
+    a ^=  mask & b<<(deg_a - deg_b);
+  }
+  *rem = a;
+  return res;
+}
+
+
+/**
+ * \brief a^(-1) % b with deg(a), deg(b) <= d.
+ */
+
+static inline word gf2x_invmod(word a, word b, unsigned int d) {
+  word x = 0;
+  word lastx = 1;
+  word y = 1;
+  word lasty = 0;
+
+  word rem;
+  word quo;
+  word tmp = 0;
+
+  while (b != 0) {
+    quo = gf2x_divmod(a,b, &rem);
+    a = b; b = rem;
+    tmp = x; x = lastx ^ gf2x_mul(quo, x, d); lastx = tmp;
+    tmp = y; y = lasty ^ gf2x_mul(quo, y, d); lasty = tmp;
+  }
+  return lastx;
+}
+
+#endif //M4RIE_GF2X_H
diff --git a/src/mzed.c b/src/mzed.c
index fdd61eb..5b2ec5c 100644
--- a/src/mzed.c
+++ b/src/mzed.c
@@ -134,7 +134,7 @@ mzed_t *_mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B) {
   for (rci_t i=0; i<C->nrows; ++i) {
     for (rci_t j=0; j<C->ncols; ++j) {
       for (rci_t k=0; k<A->ncols; ++k) {
-        mzed_add_elem(C, i, j, ff->mul[mzed_read_elem(A,i, k)][mzed_read_elem(B, k, j)]);
+        mzed_add_elem(C, i, j, gf2e_mul(ff, mzed_read_elem(A,i, k), mzed_read_elem(B, k, j)));
       }
     }
   }

-- 
Fast arithmetic with dense matrices over F_{2^e}