[sdpb] 92/233: Added parallelized version of Rgemm (matrix multiplication) for use in Rgetrf (LU decomposition); another 10-15% speedup
Tobias Hansen
thansen at moszumanska.debian.org
Thu Mar 9 04:06:23 UTC 2017
This is an automated email from the git hooks/post-receive script.
thansen pushed a commit to branch master
in repository sdpb.
commit 3d997da7721352809f79c500af9d069be859c403
Author: David Simmons-Duffin <dsd at minerva.sns.ias.edu>
Date: Sat Oct 18 21:59:15 2014 -0400
Added parallelized version of Rgemm (matrix multiplication) for use in Rgetrf (LU decomposition); another 10-15% speedup
---
src/mpack/RgemmParallel.cpp | 233 ++++++++++++++++++++++++++++++++++++++++++++
src/mpack/Rgetrf.cpp | 15 ++-
src/mpack/mblas_gmp.h | 3 +
3 files changed, 250 insertions(+), 1 deletion(-)
diff --git a/src/mpack/RgemmParallel.cpp b/src/mpack/RgemmParallel.cpp
new file mode 100644
index 0000000..7d584c8
--- /dev/null
+++ b/src/mpack/RgemmParallel.cpp
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2008-2010
+ * Nakata, Maho
+ * All rights reserved.
+ *
+ * $Id: Rgemm.cpp,v 1.7 2010/08/07 05:50:10 nakatamaho Exp $
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/*
+Copyright (c) 1992-2007 The University of Tennessee. All rights reserved.
+ *
+ * $Id: Rgemm.cpp,v 1.7 2010/08/07 05:50:10 nakatamaho Exp $
+
+$COPYRIGHT$
+
+Additional copyrights may follow
+
+$HEADER$
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer listed
+ in this license in the documentation and/or other materials
+ provided with the distribution.
+
+- Neither the name of the copyright holders nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+Based on http://www.netlib.org/blas/dgemm.f
+Rgemm performs one of the matrix-matrix operations
+ C := alpha*op(A)*op(B) + beta*C,
+where op(X) is one of
+ op(X) = X or op(X) = X',
+alpha and beta are scalars, and A, B and C are matrices, with op( A )
+an m by k matrix, op(B) a k by n matrix and C an m by n matrix.
+*/
+
+#include <mblas.h>
+#include "omp.h"
+
+void RgemmParallel(const char *transa, const char *transb, INTEGER m, INTEGER n, INTEGER k, REAL alpha, REAL * A, INTEGER lda, REAL * B,
+ INTEGER ldb, REAL beta, REAL * C, INTEGER ldc)
+{
+ INTEGER nota, notb, nrowa, ncola, nrowb, info;
+ REAL Zero = 0.0, One = 1.0;
+
+ nota = Mlsame(transa, "N");
+ notb = Mlsame(transb, "N");
+ if (nota) {
+ nrowa = m;
+ ncola = k;
+ } else {
+ nrowa = k;
+ ncola = m;
+ }
+ if (notb) {
+ nrowb = k;
+ } else {
+ nrowb = n;
+ }
+//Test the input parameters.
+ info = 0;
+ if (!nota && (!Mlsame(transa, "C")) && (!Mlsame(transa, "T")))
+ info = 1;
+ else if (!notb && (!Mlsame(transb, "C")) && (!Mlsame(transb, "T")))
+ info = 2;
+ else if (m < 0)
+ info = 3;
+ else if (n < 0)
+ info = 4;
+ else if (k < 0)
+ info = 5;
+ else if (lda < max((INTEGER) 1, nrowa))
+ info = 8;
+ else if (ldb < max((INTEGER) 1, nrowb))
+ info = 10;
+ else if (ldc < max((INTEGER) 1, m))
+ info = 13;
+ if (info != 0) {
+ Mxerbla("Rgemm ", info);
+ return;
+ }
+//Quick return if possible.
+ if ((m == 0) || (n == 0) || (((alpha == Zero) || (k == 0)) && (beta == One)))
+ return;
+
+//And when alpha == 0.0
+ if (alpha == Zero) {
+ if (beta == Zero) {
+ #pragma omp parallel for schedule(static)
+ for (INTEGER j = 0; j < n; j++) {
+ for (INTEGER i = 0; i < m; i++) {
+ C[i + j * ldc] = Zero;
+ }
+ }
+ } else {
+ #pragma omp parallel for schedule(static)
+ for (INTEGER j = 0; j < n; j++) {
+ for (INTEGER i = 0; i < m; i++) {
+ C[i + j * ldc] = beta * C[i + j * ldc];
+ }
+ }
+ }
+ return;
+ }
+//Start the operations.
+ if (notb) {
+ if (nota) {
+//Form C := alpha*A*B + beta*C.
+ #pragma omp parallel for schedule(static)
+ for (INTEGER j = 0; j < n; j++) {
+ REAL temp;
+ if (beta == Zero) {
+ for (INTEGER i = 0; i < m; i++) {
+ C[i + j * ldc] = Zero;
+ }
+ } else if (beta != One) {
+ for (INTEGER i = 0; i < m; i++) {
+ C[i + j * ldc] = beta * C[i + j * ldc];
+ }
+ }
+ for (INTEGER l = 0; l < k; l++) {
+ if (B[l + j * ldb] != Zero) {
+ temp = alpha * B[l + j * ldb];
+ for (INTEGER i = 0; i < m; i++) {
+ C[i + j * ldc] = C[i + j * ldc] + temp * A[i + l * lda];
+ }
+ }
+ }
+ }
+ } else {
+//Form C := alpha*A'*B + beta*C.
+ #pragma omp parallel for schedule(static)
+ for (INTEGER j = 0; j < n; j++) {
+ REAL temp;
+ for (INTEGER i = 0; i < m; i++) {
+ temp = Zero;
+ for (INTEGER l = 0; l < k; l++) {
+ temp = temp + A[l + i * lda] * B[l + j * ldb];
+ }
+ if (beta == Zero)
+ C[i + j * ldc] = alpha * temp;
+ else
+ C[i + j * ldc] = alpha * temp + beta * C[i + j * ldc];
+ }
+ }
+ }
+ } else {
+ if (nota) {
+//Form C := alpha*A*B' + beta*C.
+ #pragma omp parallel for schedule(static)
+ for (INTEGER j = 0; j < n; j++) {
+ REAL temp;
+ if (beta == Zero) {
+ for (INTEGER i = 0; i < m; i++) {
+ C[i + j * ldc] = Zero;
+ }
+ } else if (beta != One) {
+ for (INTEGER i = 0; i < m; i++) {
+ C[i + j * ldc] = beta * C[i + j * ldc];
+ }
+ }
+ for (INTEGER l = 0; l < k; l++) {
+ if (B[j + l * ldb] != Zero) {
+ temp = alpha * B[j + l * ldb];
+ for (INTEGER i = 0; i < m; i++) {
+ C[i + j * ldc] = C[i + j * ldc] + temp * A[i + l * lda];
+ }
+ }
+ }
+ }
+ } else {
+//Form C := alpha*A'*B' + beta*C.
+ #pragma omp parallel for schedule(static)
+ for (INTEGER j = 0; j < n; j++) {
+ REAL temp;
+ for (INTEGER i = 0; i < m; i++) {
+ temp = Zero;
+ for (INTEGER l = 0; l < k; l++) {
+ temp = temp + A[l + i * lda] * B[j + l * ldb];
+ }
+ if (beta == Zero)
+ C[i + j * ldc] = alpha * temp;
+ else
+ C[i + j * ldc] = alpha * temp + beta * C[i + j * ldc];
+ }
+ }
+ }
+ }
+ return;
+}
diff --git a/src/mpack/Rgetrf.cpp b/src/mpack/Rgetrf.cpp
index a359c90..7d8d0ee 100644
--- a/src/mpack/Rgetrf.cpp
+++ b/src/mpack/Rgetrf.cpp
@@ -67,6 +67,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <mblas.h>
#include <mlapack.h>
+#include "../Timers.h"
void Rgetrf(INTEGER m, INTEGER n, REAL * A, INTEGER lda, INTEGER * ipiv, INTEGER * info)
{
@@ -94,14 +95,18 @@ void Rgetrf(INTEGER m, INTEGER n, REAL * A, INTEGER lda, INTEGER * ipiv, INTEGER
nb = iMlaenv(1, "Rgetrf", " ", m, n, -1, -1);
if (nb <= 1 || nb >= min(m, n)) {
//Use unblocked code.
+ timers["Rgetf2 unblocked"].resume();
Rgetf2(m, n, A, lda, ipiv, info);
+ timers["Rgetf2 unblocked"].stop();
} else {
//Use blocked code.
for (j = 1; j <= min(m, n); j = j + nb) {
jb = min(min(m, n) - j + 1, nb);
//Factor diagonal and subdiagonal blocks and test for exact
//singularity.
+ timers["Rgetf2 blocked"].resume();
Rgetf2(m - j + 1, jb, &A[(j - 1) + (j - 1) * lda], lda, &ipiv[j - 1], &iinfo);
+ timers["Rgetf2 blocked"].stop();
//Adjust INFO and the pivot indices.
if (*info == 0 && iinfo > 0) {
*info = iinfo + j - 1;
@@ -110,16 +115,24 @@ void Rgetrf(INTEGER m, INTEGER n, REAL * A, INTEGER lda, INTEGER * ipiv, INTEGER
ipiv[i - 1] = j - 1 + ipiv[i - 1];
}
//Apply interchanges to columns 1:J-one
+ timers["Rlaswp columns"].resume();
Rlaswp(j - 1, A, lda, j, j + jb - 1, ipiv, 1);
+ timers["Rlaswp columns"].stop();
if (j + jb <= n) {
//Apply interchanges to columns J+JB:N.
+ timers["Rlaswp columns2"].resume();
Rlaswp(n - j - jb + 1, &A[0 + (j + jb - 1) * lda], lda, j, j + jb - 1, ipiv, 1);
+ timers["Rlaswp columns2"].stop();
//Compute block row of U.
+ timers["Rtrsm"].resume();
Rtrsm("Left", "Lower", "No transpose", "Unit", jb, n - j - jb + 1, One, &A[(j - 1) + (j - 1) * lda], lda, &A[(j - 1) + (j + jb - 1) * lda], lda);
+ timers["Rtrsm"].stop();
if (j + jb <= m) {
//Update trailing submatrix.
- Rgemm("No transpose", "No transpose", m - j - jb + 1,
+ timers["Rgemm"].resume();
+ RgemmParallel("No transpose", "No transpose", m - j - jb + 1,
n - j - jb + 1, jb, -One, &A[(j + jb - 1) + (j - 1) * lda], lda, &A[(j - 1) + (j + jb - 1) * lda], lda, One, &A[(j + jb - 1) + (j + jb - 1) * lda], lda);
+ timers["Rgemm"].stop();
}
}
}
diff --git a/src/mpack/mblas_gmp.h b/src/mpack/mblas_gmp.h
index 253c18a..53bdf1b 100644
--- a/src/mpack/mblas_gmp.h
+++ b/src/mpack/mblas_gmp.h
@@ -84,6 +84,9 @@ void Rtrsm(const char *side, const char *uplo, const char *transa,
void Rgemm(const char *transa, const char *transb, mpackint m, mpackint n,
mpackint k, mpf_class alpha, mpf_class * A, mpackint lda, mpf_class * B,
mpackint ldb, mpf_class beta, mpf_class * C, mpackint ldc);
+void RgemmParallel(const char *transa, const char *transb, mpackint m, mpackint n,
+ mpackint k, mpf_class alpha, mpf_class * A, mpackint lda, mpf_class * B,
+ mpackint ldb, mpf_class beta, mpf_class * C, mpackint ldc);
void Rsyr2k(const char *uplo, const char *trans, mpackint n, mpackint k,
mpf_class alpha, mpf_class * A, mpackint lda, mpf_class * B, mpackint ldb,
mpf_class beta, mpf_class * C, mpackint ldc);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/sdpb.git
More information about the debian-science-commits
mailing list