[SCM] Fast arithmetic with dense matrices over F_{2^e} branch, upstream, updated. 9faf6ece9a183a703670566609063ab274b1c544
Martin Albrecht
martinralbrecht at googlemail.com
Mon Sep 10 12:24:25 UTC 2012
The following commit has been merged in the upstream branch:
commit a66b031e5bb3ab961f82d3f1fc158c4ceab3597a
Author: Martin Albrecht <martinralbrecht at googlemail.com>
Date: Sat Aug 11 15:33:44 2012 +0100
added gf2e_mul() and started moving functions over
diff --git a/Makefile.am b/Makefile.am
index f1ad325..c66ede2 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -22,7 +22,8 @@ libm4rie_la_SOURCES = m4rie/gf2e.c \
pkgincludesubdir = $(includedir)/m4rie
-pkgincludesub_HEADERS = m4rie/gf2e.h \
+pkgincludesub_HEADERS = m4rie/gf2x.h \
+ m4rie/gf2e.h \
m4rie/mzed.h \
m4rie/m4rie.h \
m4rie/m4ri_functions.h \
diff --git a/src/Doxyfile b/src/Doxyfile
index 1adbde9..5ee41e2 100644
--- a/src/Doxyfile
+++ b/src/Doxyfile
@@ -179,7 +179,7 @@ TAB_SIZE = 4
# will result in a user-defined paragraph with heading "Side Effects:".
# You can put \n's in the value part of an alias to insert newlines.
-ALIASES = "GF2E=\f$\mathbb{F}_{2^e}\f$" "GF2=\f$\mathbb{F}_2\f$" " e=\f$e\f$" "GF4=\f$\mathbb{F}_{2^2}\f$" "GF8=\f$\mathbb{F}_{2^3}\f$" "GF16=\f$\mathbb{F}_{2^4}\f$" "GF32=\f$\mathbb{F}_{2^5}\f$" "GF64=\f$\mathbb{F}_{2^6}\f$" "GF128=\f$\mathbb{F}_{2^7}\f$" "GF256=\f$\mathbb{F}_{2^8}\f$" "GF512=\f$\mathbb{F}_{2^9}\f$" "GF1024=\f$\mathbb{F}_{2^10}\f$"
+ALIASES = "GF2E=\f$\mathbb{F}_{2^e}\f$" "GF2=\f$\mathbb{F}_2\f$" "GF2X=\f$\mathbb{F}_2[x]\f$" " e=\f$e\f$" "GF4=\f$\mathbb{F}_{2^2}\f$" "GF8=\f$\mathbb{F}_{2^3}\f$" "GF16=\f$\mathbb{F}_{2^4}\f$" "GF32=\f$\mathbb{F}_{2^5}\f$" "GF64=\f$\mathbb{F}_{2^6}\f$" "GF128=\f$\mathbb{F}_{2^7}\f$" "GF256=\f$\mathbb{F}_{2^8}\f$" "GF512=\f$\mathbb{F}_{2^9}\f$" "GF1024=\f$\mathbb{F}_{2^10}\f$"
# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
# sources only. Doxygen will then generate output that is more tailored for C.
diff --git a/src/gf2e.h b/src/gf2e.h
index 9b0b700..496650d 100644
--- a/src/gf2e.h
+++ b/src/gf2e.h
@@ -9,109 +9,31 @@
#ifndef M4RIE_GF2E_H
#define M4RIE_GF2E_H
+/******************************************************************************
+*
+* M4RIE: Linear Algebra over GF(2^e)
+*
+* Copyright (C) 2010,2011 Martin Albrecht <martinralbrecht at googlemail.com>
+*
+* Distributed under the terms of the GNU General Public License (GEL)
+* version 2 or higher.
+*
+* This code is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* The full text of the GPL is available at:
+*
+* http://www.gnu.org/licenses/
+******************************************************************************/
+
#include <m4ri/m4ri.h>
+#include <m4rie/gf2x.h>
#define M4RIE_MAX_DEGREE 10
/**
- * \brief Multiplication in GF(2)[x] where a,b have a degree smaller than d <= 16.
- */
-
-static inline word gf2x_mul(const word a, const word b, unsigned int d) {
- /* double check that integer multiplication is indeed not horribly slow here */
- word res = 0;
-
- switch(d) {
- case 16: res ^= ((a&(1<<15))>>15) * (b<<15);
- case 15: res ^= ((a&(1<<14))>>14) * (b<<14);
- case 14: res ^= ((a&(1<<13))>>13) * (b<<13);
- case 13: res ^= ((a&(1<<12))>>12) * (b<<12);
- case 12: res ^= ((a&(1<<11))>>11) * (b<<11);
- case 11: res ^= ((a&(1<<10))>>10) * (b<<10);
- case 10: res ^= ((a&(1<< 9))>> 9) * (b<< 9);
- case 9: res ^= ((a&(1<< 8))>> 8) * (b<< 8);
- case 8: res ^= ((a&(1<< 7))>> 7) * (b<< 7);
- case 7: res ^= ((a&(1<< 6))>> 6) * (b<< 6);
- case 6: res ^= ((a&(1<< 5))>> 5) * (b<< 5);
- case 5: res ^= ((a&(1<< 4))>> 4) * (b<< 4);
- case 4: res ^= ((a&(1<< 3))>> 3) * (b<< 3);
- case 3: res ^= ((a&(1<< 2))>> 2) * (b<< 2);
- case 2: res ^= ((a&(1<< 1))>> 1) * (b<< 1);
- case 1: res ^= ((a&(1<< 0))>> 0) * (b<< 0);
- break;
- default:
- m4ri_die("degree %d not supported.",d);
- }
- return res;
-}
-
-/**
- * \brief Degree of elements in GF(2)[x].
- *
- */
-
-static inline unsigned int gf2x_deg(word a) {
- unsigned int degree = 0;
- if( (a & 0xffffffff00000000ULL) != 0) { degree += 32; a>>=32; }
- if( (a & 0xffff0000ULL) != 0) { degree += 16; a>>=16; }
- if( (a & 0xff00ULL) != 0) { degree += 8; a>>= 8; }
- if( (a & 0xf0ULL) != 0) { degree += 4; a>>= 4; }
- if( (a & 0xcULL) != 0) { degree += 2; a>>= 2; }
- if( (a & 0x2ULL) != 0) { degree += 1; a>>= 1; }
- return degree;
-}
-
-/**
- * \brief Division in GF(2)[x].
- */
-
-static inline word gf2x_div(word a, word b) {
- word res = 0;
- while(a >= b) {
- unsigned int diff = gf2x_deg(a) - gf2x_deg(b);
- res |= __M4RI_TWOPOW(diff);
- a ^= b<<diff;
- }
- return res;
-}
-
-/**
- * \brief Remainders in GF(2)[x].
- */
-
-static inline word gf2x_mod(word a, word b) {
- word res = 0;
- while(a >= b) {
- unsigned int diff = gf2x_deg(a) - gf2x_deg(b);
- res |= __M4RI_TWOPOW(diff);
- a ^= b<<diff;
- }
- return a;
-}
-
-/**
- * \brief a^(-1) % b.
- */
-
-static inline word gf2x_invmod(word a, word b, unsigned int d) {
- word x = 0;
- word lastx = 1;
- word y = 1;
- word lasty = 0;
-
- word tmp = 0;
-
- while (b != 0) {
- word quotient = gf2x_div(a,b);
- tmp = b; b = gf2x_mod(a, b); a = tmp;
- tmp = x; x = lastx ^ gf2x_mul(quotient, x, d); lastx = tmp;
- tmp = y; y = lasty ^ gf2x_mul(quotient, y, d); lasty = tmp;
- }
- return lastx;
-}
-
-
-/**
* \brief \GF2E
*/
@@ -135,6 +57,25 @@ typedef struct {
gf2e *gf2e_init(const word minpoly);
/**
+ * Generate gf2e::pow_gen.
+ *
+ * \param ff Finite field.
+ */
+
+static inline void gf2e_make_pow_gen(gf2e *ff) {
+ unsigned int n = 2*ff->degree-1;
+ word *m = (word*)m4ri_mm_malloc( n * sizeof(word));
+ for(unsigned int i=0; i<n; i++) {
+ m[i] = 1<<i;
+ for(unsigned int j=i; j>=ff->degree; j--) {
+ if (m[i] & 1<<j)
+ m[i] ^= ff->minpoly<<(j - ff->degree);
+ }
+ }
+ ff->pow_gen = m;
+}
+
+/**
* Free ff
*
* \param ff Finite field.
@@ -142,11 +83,22 @@ gf2e *gf2e_init(const word minpoly);
void gf2e_free(gf2e *ff);
+/**
+ * \brief a^(-1) % minpoly
+ */
+
static inline word gf2e_inv(const gf2e *ff, word a) {
return gf2x_invmod(a, ff->minpoly, ff->degree);
}
/**
+ * \brief a*b in \GF2E
+ */
+static inline word gf2e_mul(const gf2e *ff, const word a, const word b) {
+ return ff->mul[a][b];
+}
+
+/**
* Return the width used for storing elements of ff
*
* \param ff Finite field.
@@ -180,26 +132,6 @@ static inline size_t gf2e_degree_to_w(const gf2e *ff) {
}
/**
- * Generate gf2e::pow_gen.
- *
- * \param ff Finite field.
- */
-
-static inline void gf2e_make_pow_gen(gf2e *ff) {
- unsigned int n = 2*ff->degree-1;
- word *m = (word*)m4ri_mm_malloc( n * sizeof(word));
- for(unsigned int i=0; i<n; i++) {
- m[i] = 1<<i;
- for(unsigned int j=i; j>=ff->degree; j--) {
- if (m[i] & 1<<j)
- m[i] ^= ff->minpoly<<(j - ff->degree);
- }
- }
- ff->pow_gen = m;
-}
-
-
-/**
* Compute all multiples by a of vectors fitting into 16 bits.
*
* \param ff Finite field.
@@ -211,7 +143,6 @@ static inline word *gf2e_t16_init(const gf2e *ff, const word a) {
const unsigned int w = gf2e_degree_to_w(ff);
const word mask_w = (1<<w)-1;
- const word *x = ff->mul[a];
/**
* @todo: this is a bit of overkill, we could do better
@@ -219,17 +150,17 @@ static inline word *gf2e_t16_init(const gf2e *ff, const word a) {
for(word i=0; i<1<<16; i++) {
switch(w) {
case 2:
- mul[i] = x[(i&mask_w)] | x[((i>>2)&mask_w)]<<2 | x[((i>>4)&mask_w)]<<4 | x[((i>>6)&mask_w)]<<6;
- mul[i] |= x[((i>>8)&mask_w)]<<8 | x[((i>>10)&mask_w)]<<10 | x[((i>>12)&mask_w)]<<12 | x[((i>>14)&mask_w)]<<14;
+ mul[i] = gf2e_mul(ff, a, ((i>>0)&mask_w))<<0 | gf2e_mul(ff, a, ((i>> 2)&mask_w))<< 2 | gf2e_mul(ff, a, ((i>> 4)&mask_w))<< 4 | gf2e_mul(ff, a, ((i>> 6)&mask_w))<< 6;
+ mul[i] |= gf2e_mul(ff, a, ((i>>8)&mask_w))<<8 | gf2e_mul(ff, a, ((i>>10)&mask_w))<<10 | gf2e_mul(ff, a, ((i>>12)&mask_w))<<12 | gf2e_mul(ff, a, ((i>>14)&mask_w))<<14;
break;
case 4:
- mul[i] = x[(i&mask_w)] | x[((i>>4)&mask_w)]<<4 | x[((i>>8)&mask_w)]<<8 | x[((i>>12)&mask_w)]<<12;
+ mul[i] = gf2e_mul(ff, a, (i&mask_w)) | gf2e_mul(ff, a, ((i>>4)&mask_w))<<4 | gf2e_mul(ff, a, ((i>>8)&mask_w))<<8 | gf2e_mul(ff, a, ((i>>12)&mask_w))<<12;
break;
case 8:
- mul[i] = x[(i&mask_w)] | x[((i>>8)&mask_w)]<<8;
+ mul[i] = gf2e_mul(ff, a, (i&mask_w)) | gf2e_mul(ff, a, ((i>>8)&mask_w))<<8;
break;
case 16:
- mul[i] = x[(i&mask_w)];
+ mul[i] = gf2e_mul(ff, a, (i&mask_w));
break;
};
}
diff --git a/src/gf2x.h b/src/gf2x.h
new file mode 100644
index 0000000..7a38ec0
--- /dev/null
+++ b/src/gf2x.h
@@ -0,0 +1,169 @@
+/**
+ * \file gf2x.h
+ *
+ * \brief \GF2X for degrees < 64
+ *
+ * \author Martin Albrecht <martinralbrecht at googlemail.com>
+ *
+ * \warning Do not rely on these functions for high performance, they are not fully optimised.
+ */
+
+#ifndef M4RIE_GF2X_H
+#define M4RIE_GF2X_H
+
+/******************************************************************************
+*
+* M4RIE: Linear Algebra over GF(2^e)
+*
+* Copyright (C) 2012 Martin Albrecht <martinralbrecht at googlemail.com>
+*
+* Distributed under the terms of the GNU General Public License (GEL)
+* version 2 or higher.
+*
+* This code is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* General Public License for more details.
+*
+* The full text of the GPL is available at:
+*
+* http://www.gnu.org/licenses/
+******************************************************************************/
+
+#include <m4ri/m4ri.h>
+
+#define __M4RIE_1tF(X) ~((X)-1) /**< Maps 1 to word with all ones and 0 to 0. */
+
+/**
+ * \brief a*b in \GF2X with deg(a) and deg(b) < d.
+ */
+
+static inline word gf2x_mul(const word a, const word b, unsigned int d) {
+ word res = 0;
+
+ switch(d) {
+ case 32: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(31)) >>31) & (b<<31);
+ case 31: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(30)) >>30) & (b<<30);
+ case 30: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(29)) >>29) & (b<<29);
+ case 29: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(28)) >>28) & (b<<28);
+ case 28: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(27)) >>27) & (b<<27);
+ case 27: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(26)) >>26) & (b<<26);
+ case 26: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(25)) >>25) & (b<<25);
+ case 25: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(24)) >>24) & (b<<24);
+ case 24: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(23)) >>23) & (b<<23);
+ case 23: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(22)) >>22) & (b<<22);
+ case 22: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(21)) >>21) & (b<<21);
+ case 21: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(20)) >>20) & (b<<20);
+ case 20: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(19)) >>19) & (b<<19);
+ case 19: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(18)) >>18) & (b<<18);
+ case 18: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(17)) >>17) & (b<<17);
+ case 17: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(16)) >>16) & (b<<16);
+ case 16: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(15)) >>15) & (b<<15);
+ case 15: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(14)) >>14) & (b<<14);
+ case 14: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(13)) >>13) & (b<<13);
+ case 13: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(12)) >>12) & (b<<12);
+ case 12: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(11)) >>11) & (b<<11);
+ case 11: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW(10)) >>10) & (b<<10);
+ case 10: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 9)) >> 9) & (b<< 9);
+ case 9: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 8)) >> 8) & (b<< 8);
+ case 8: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 7)) >> 7) & (b<< 7);
+ case 7: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 6)) >> 6) & (b<< 6);
+ case 6: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 5)) >> 5) & (b<< 5);
+ case 5: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 4)) >> 4) & (b<< 4);
+ case 4: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 3)) >> 3) & (b<< 3);
+ case 3: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 2)) >> 2) & (b<< 2);
+ case 2: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 1)) >> 1) & (b<< 1);
+ case 1: res ^= __M4RIE_1tF((a & __M4RI_TWOPOW( 0)) >> 0) & (b<< 0);
+ break;
+ default:
+ m4ri_die("degree %d too big.\n",d);
+ }
+ return res;
+}
+/**
+ * \brief deg(a) in \GF2X.
+ *
+ * \param a Polynomial of degree <= 64.
+ */
+
+static inline int gf2x_deg(word a) {
+ int degree = 0;
+ if( (a & 0xffffffff00000000ULL) != 0) { degree += 32; a>>=32; }
+ if( (a & 0xffff0000ULL) != 0) { degree += 16; a>>=16; }
+ if( (a & 0xff00ULL) != 0) { degree += 8; a>>= 8; }
+ if( (a & 0xf0ULL) != 0) { degree += 4; a>>= 4; }
+ if( (a & 0xcULL) != 0) { degree += 2; a>>= 2; }
+ if( (a & 0x2ULL) != 0) { degree += 1; a>>= 1; }
+ return degree;
+}
+
+/**
+ * \brief a / b in \GF2X.
+ */
+
+static inline word gf2x_div(word a, word b) {
+ word res = 0;
+ word mask = 0;
+ const int deg_b = gf2x_deg(b);
+ for(int deg_a=gf2x_deg(a); deg_a>=deg_b; deg_a--) {
+ mask = __M4RIE_1tF(a>>deg_a);
+ res |= mask & __M4RI_TWOPOW(deg_a - deg_b);
+ a ^= mask & b<<(deg_a - deg_b);
+ }
+ return res;
+}
+
+/**
+ * \brief a mod b in \GF2X.
+ */
+
+static inline word gf2x_mod(word a, word b) {
+ const int deg_b = gf2x_deg(b);
+ for(int deg_a=gf2x_deg(a); deg_a>=deg_b; deg_a--) {
+ a ^= __M4RIE_1tF(a>>deg_a) & b<<(deg_a - deg_b);
+ }
+ return a;
+}
+
+/**
+ * \brief a / b and a mod b in \GF2X.
+ */
+
+static inline word gf2x_divmod(word a, word b, word *rem) {
+ word res = 0;
+ word mask = 0;
+ const int deg_b = gf2x_deg(b);
+ for(int deg_a=gf2x_deg(a); deg_a>=deg_b; deg_a--) {
+ mask = __M4RIE_1tF(a>>deg_a);
+ res |= mask & __M4RI_TWOPOW(deg_a - deg_b);
+ a ^= mask & b<<(deg_a - deg_b);
+ }
+ *rem = a;
+ return res;
+}
+
+
+/**
+ * \brief a^(-1) % b with deg(a), deg(b) <= d.
+ */
+
+static inline word gf2x_invmod(word a, word b, unsigned int d) {
+ word x = 0;
+ word lastx = 1;
+ word y = 1;
+ word lasty = 0;
+
+ word rem;
+ word quo;
+ word tmp = 0;
+
+ while (b != 0) {
+ quo = gf2x_divmod(a,b, &rem);
+ a = b; b = rem;
+ tmp = x; x = lastx ^ gf2x_mul(quo, x, d); lastx = tmp;
+ tmp = y; y = lasty ^ gf2x_mul(quo, y, d); lasty = tmp;
+ }
+ return lastx;
+}
+
+#endif //M4RIE_GF2X_H
diff --git a/src/mzed.c b/src/mzed.c
index fdd61eb..5b2ec5c 100644
--- a/src/mzed.c
+++ b/src/mzed.c
@@ -134,7 +134,7 @@ mzed_t *_mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B) {
for (rci_t i=0; i<C->nrows; ++i) {
for (rci_t j=0; j<C->ncols; ++j) {
for (rci_t k=0; k<A->ncols; ++k) {
- mzed_add_elem(C, i, j, ff->mul[mzed_read_elem(A,i, k)][mzed_read_elem(B, k, j)]);
+ mzed_add_elem(C, i, j, gf2e_mul(ff, mzed_read_elem(A,i, k), mzed_read_elem(B, k, j)));
}
}
}
--
Fast arithmetic with dense matrices over F_{2^e}
More information about the debian-science-commits
mailing list