[superlu] 05/11: Imported Upstream version 3.0+20070106
Nico Schlömer
nschloe-guest at moszumanska.debian.org
Tue May 17 19:22:58 UTC 2016
This is an automated email from the git hooks/post-receive script.
nschloe-guest pushed a commit to branch master
in repository superlu.
commit 5e31076dda557ee3dc0735e83c786670d3087ce4
Author: Nico Schlömer <nico.schloemer at gmail.com>
Date: Tue May 17 21:22:24 2016 +0200
Imported Upstream version 3.0+20070106
---
CBLAS/Cnames.h | 1 -
CBLAS/Makefile | 2 +-
CBLAS/cmyblas2.c | 183 --
CBLAS/dmyblas2.c | 225 ---
CBLAS/f2c.h | 2 +-
CBLAS/slu_Cnames.h | 1 +
CBLAS/smyblas2.c | 225 ---
CBLAS/zmyblas2.c | 183 --
EXAMPLE/Makefile | 64 +-
EXAMPLE/clinsol.c | 2 +-
EXAMPLE/clinsol1.c | 2 +-
EXAMPLE/clinsolx.c | 2 +-
EXAMPLE/clinsolx1.c | 2 +-
EXAMPLE/clinsolx2.c | 2 +-
EXAMPLE/dlinsol.c | 2 +-
EXAMPLE/dlinsol1.c | 2 +-
EXAMPLE/dlinsolx.c | 2 +-
EXAMPLE/dlinsolx1.c | 2 +-
EXAMPLE/dlinsolx2.c | 2 +-
EXAMPLE/dreadtriple.c | 118 ++
EXAMPLE/slinsol.c | 2 +-
EXAMPLE/slinsol1.c | 2 +-
EXAMPLE/slinsolx.c | 2 +-
EXAMPLE/slinsolx1.c | 2 +-
EXAMPLE/slinsolx2.c | 2 +-
EXAMPLE/sp_ienv.c | 2 +
EXAMPLE/superlu.c | 2 +-
EXAMPLE/zlinsol.c | 2 +-
EXAMPLE/zlinsol1.c | 2 +-
EXAMPLE/zlinsolx.c | 2 +-
EXAMPLE/zlinsolx1.c | 2 +-
EXAMPLE/zlinsolx2.c | 2 +-
EXAMPLE/zreadtriple.c | 90 +
FORTRAN/Makefile | 3 +-
FORTRAN/{c_fortran_dgssv.c => c_fortran_cgssv.c} | 30 +-
FORTRAN/c_fortran_dgssv.c | 17 +-
.../{c_fortran_dgssv.c => c_fortran_dgssv.c.bak} | 2 +-
FORTRAN/{c_fortran_dgssv.c => c_fortran_sgssv.c} | 30 +-
FORTRAN/{c_fortran_dgssv.c => c_fortran_zgssv.c} | 28 +-
FORTRAN/f77_main.f | 4 +-
FORTRAN/f77exm.out | 9 +
INSTALL/Makefile | 6 +-
MAKE_INC/make.alpha | 4 +-
MAKE_INC/make.cray | 4 +-
MAKE_INC/make.hppa | 4 +-
MAKE_INC/make.inc | 6 +-
MAKE_INC/make.linux | 6 +-
MAKE_INC/make.rs6k | 4 +-
MAKE_INC/make.sgi | 6 +-
MAKE_INC/make.solaris | 8 +-
MAKE_INC/make.sp | 4 +-
MAKE_INC/make.sun4 | 8 +-
MATLAB/mexlusolve.c | 2 +-
MATLAB/mexsuperlu.c | 2 +-
README | 2 +-
SRC/Makefile | 68 +-
SRC/ccolumn_bmod.c | 2 +-
SRC/ccolumn_dfs.c | 3 +-
SRC/ccopy_to_ucol.c | 4 +-
SRC/cgscon.c | 2 +-
SRC/cgsequ.c | 4 +-
SRC/cgsrfs.c | 2 +-
SRC/cgssv.c | 3 +-
SRC/cgssvx.c | 4 +-
SRC/cgstrf.c | 6 +-
SRC/cgstrs.c | 2 +-
SRC/cgstrs.c.bak | 339 ----
SRC/clacon.c | 5 +-
SRC/clangs.c | 4 +-
SRC/claqgs.c | 4 +-
SRC/cmemory.c | 24 +-
SRC/cmyblas2.c | 3 +-
SRC/colamd.c | 1961 ++++++++++++++------
SRC/colamd.h | 215 ++-
SRC/cpanel_bmod.c | 2 +-
SRC/cpanel_dfs.c | 4 +-
SRC/cpivotL.c | 2 +-
SRC/cpivotgrowth.c | 4 +-
SRC/cpruneL.c | 4 +-
SRC/creadhb.c | 3 +-
SRC/csnode_bmod.c | 2 +-
SRC/csnode_dfs.c | 4 +-
SRC/csp_blas2.c | 14 +-
SRC/csp_blas2.c.bak | 479 -----
SRC/csp_blas3.c | 4 +-
SRC/cutil.c | 14 +-
SRC/dcolumn_bmod.c | 2 +-
SRC/dcolumn_dfs.c | 3 +-
SRC/dcomplex.c | 7 +-
SRC/dcopy_to_ucol.c | 4 +-
SRC/dgscon.c | 2 +-
SRC/dgsequ.c | 4 +-
SRC/dgsrfs.c | 2 +-
SRC/dgssv.c | 3 +-
SRC/dgssvx.c | 2 +-
SRC/dgstrf.c | 6 +-
SRC/dgstrs.c | 2 +-
SRC/dgstrs.c.bak | 334 ----
SRC/dgstrsL.c | 4 +-
SRC/dlacon.c | 3 +-
SRC/dlamch.c | 2 +
SRC/dlangs.c | 4 +-
SRC/dlaqgs.c | 3 +-
SRC/dmemory.c | 24 +-
SRC/dmyblas2.c | 1 -
SRC/dpanel_bmod.c | 2 +-
SRC/dpanel_dfs.c | 4 +-
SRC/dpivotL.c | 2 +-
SRC/dpivotgrowth.c | 4 +-
SRC/dpruneL.c | 4 +-
SRC/dreadhb.c | 3 +-
SRC/dsnode_bmod.c | 2 +-
SRC/dsnode_dfs.c | 4 +-
SRC/dsp_blas2.c | 2 +-
SRC/dsp_blas2.c.bak | 469 -----
SRC/dsp_blas3.c | 4 +-
SRC/dutil.c | 11 +-
SRC/dzsum1.c | 3 +-
SRC/get_perm_c.c | 14 +-
SRC/heap_relax_snode.c | 2 +-
SRC/icmax1.c | 3 +-
SRC/izmax1.c | 16 +-
SRC/lsame.c | 2 +
SRC/memory.c | 5 +-
SRC/{colamd.c => old_colamd.c} | 0
SRC/{colamd.h => old_colamd.h} | 0
SRC/relax_snode.c | 2 +-
SRC/scolumn_bmod.c | 2 +-
SRC/scolumn_dfs.c | 3 +-
SRC/scomplex.c | 7 +-
SRC/scopy_to_ucol.c | 4 +-
SRC/scsum1.c | 3 +-
SRC/sgscon.c | 2 +-
SRC/sgsequ.c | 4 +-
SRC/sgsrfs.c | 2 +-
SRC/sgssv.c | 3 +-
SRC/sgssvx.c | 2 +-
SRC/sgstrf.c | 6 +-
SRC/sgstrs.c | 2 +-
SRC/sgstrs.c.bak | 334 ----
SRC/slacon.c | 3 +-
SRC/slamch.c | 2 +
SRC/slangs.c | 4 +-
SRC/slaqgs.c | 4 +-
SRC/{Cnames.h => slu_Cnames.h} | 57 +
SRC/{csp_defs.h => slu_cdefs.h} | 6 +-
SRC/{dcomplex.h => slu_dcomplex.h} | 0
SRC/{dsp_defs.h => slu_ddefs.h} | 4 +-
SRC/{scomplex.h => slu_scomplex.h} | 0
SRC/{ssp_defs.h => slu_sdefs.h} | 4 +-
SRC/{util.h => slu_util.h} | 8 +-
SRC/{zsp_defs.h => slu_zdefs.h} | 6 +-
SRC/smemory.c | 24 +-
SRC/smyblas2.c | 1 -
SRC/sp_coletree.c | 2 +-
SRC/sp_ienv.c | 2 +
SRC/sp_preorder.c | 2 +-
SRC/spanel_bmod.c | 2 +-
SRC/spanel_dfs.c | 4 +-
SRC/spivotL.c | 2 +-
SRC/spivotgrowth.c | 4 +-
SRC/spruneL.c | 4 +-
SRC/sreadhb.c | 3 +-
SRC/ssnode_bmod.c | 2 +-
SRC/ssnode_dfs.c | 4 +-
SRC/ssp_blas2.c | 2 +-
SRC/ssp_blas2.c.bak | 469 -----
SRC/ssp_blas3.c | 4 +-
SRC/sutil.c | 11 +-
SRC/util.c | 21 +-
SRC/xerbla.c | 3 +
SRC/zcolumn_bmod.c | 2 +-
SRC/zcolumn_dfs.c | 3 +-
SRC/zcopy_to_ucol.c | 4 +-
SRC/zgscon.c | 2 +-
SRC/zgsequ.c | 4 +-
SRC/zgsrfs.c | 2 +-
SRC/zgssv.c | 3 +-
SRC/zgssvx.c | 4 +-
SRC/zgstrf.c | 6 +-
SRC/zgstrs.c | 2 +-
SRC/zgstrs.c.bak | 339 ----
SRC/zlacon.c | 5 +-
SRC/zlangs.c | 4 +-
SRC/zlaqgs.c | 4 +-
SRC/zmemory.c | 24 +-
SRC/{zmemory.c => zmemory.c.bak} | 14 +-
SRC/zmyblas2.c | 3 +-
SRC/zpanel_bmod.c | 2 +-
SRC/zpanel_dfs.c | 4 +-
SRC/zpivotL.c | 2 +-
SRC/zpivotgrowth.c | 4 +-
SRC/zpruneL.c | 4 +-
SRC/zreadhb.c | 3 +-
SRC/zsnode_bmod.c | 2 +-
SRC/zsnode_dfs.c | 4 +-
SRC/zsp_blas2.c | 14 +-
SRC/zsp_blas2.c.bak | 479 -----
SRC/zsp_blas3.c | 4 +-
SRC/zutil.c | 14 +-
TESTING/MATGEN/Cnames.h | 1 -
TESTING/MATGEN/clatb4.c | 1 +
TESTING/MATGEN/dlatb4.c | 1 +
TESTING/MATGEN/f2c.h | 2 +-
TESTING/MATGEN/slatb4.c | 1 +
TESTING/MATGEN/slu_Cnames.h | 1 +
TESTING/MATGEN/zlatb4.c | 1 +
TESTING/Makefile | 5 +-
TESTING/cdrive.c | 9 +-
TESTING/ddrive.c | 7 +-
TESTING/sdrive.c | 9 +-
TESTING/sp_cconvert.c | 4 +-
TESTING/sp_cget01.c | 4 +-
TESTING/sp_cget02.c | 2 +-
TESTING/sp_cget04.c | 4 +-
TESTING/sp_cget07.c | 2 +-
TESTING/sp_dconvert.c | 4 +-
TESTING/sp_dget01.c | 4 +-
TESTING/sp_dget02.c | 2 +-
TESTING/sp_dget04.c | 4 +-
TESTING/sp_dget07.c | 2 +-
TESTING/sp_ienv.c | 2 +
TESTING/sp_sconvert.c | 4 +-
TESTING/sp_sget01.c | 4 +-
TESTING/sp_sget02.c | 2 +-
TESTING/sp_sget04.c | 4 +-
TESTING/sp_sget07.c | 2 +-
TESTING/sp_zconvert.c | 4 +-
TESTING/sp_zget01.c | 4 +-
TESTING/sp_zget02.c | 2 +-
TESTING/sp_zget04.c | 4 +-
TESTING/sp_zget07.c | 2 +-
TESTING/zdrive.c | 7 +-
make.inc | 12 +-
234 files changed, 2408 insertions(+), 5202 deletions(-)
diff --git a/CBLAS/Cnames.h b/CBLAS/Cnames.h
deleted file mode 120000
index dd3a080..0000000
--- a/CBLAS/Cnames.h
+++ /dev/null
@@ -1 +0,0 @@
-../SRC/Cnames.h
\ No newline at end of file
diff --git a/CBLAS/Makefile b/CBLAS/Makefile
index a6fdc77..76d368c 100644
--- a/CBLAS/Makefile
+++ b/CBLAS/Makefile
@@ -83,4 +83,4 @@ complex16: $(ZBLAS1) $(ZBLAS2) $(ZBLAS3)
$(CC) $(CFLAGS) $(CDEFS) -I$(HEADER) -c $< $(VERBOSE)
clean:
- rm -f *.o ../blas$(PLAT).a
+ rm -f *.o ../libblas.a
diff --git a/CBLAS/cmyblas2.c b/CBLAS/cmyblas2.c
deleted file mode 100644
index 74fdbca..0000000
--- a/CBLAS/cmyblas2.c
+++ /dev/null
@@ -1,183 +0,0 @@
-
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
- * File name: cmyblas2.c
- * Purpose:
- * Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- * This is only used when the system lacks an efficient BLAS library.
- */
-#include "scomplex.h"
-
-/*
- * Solves a dense UNIT lower triangular system. The unit lower
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol).
- * The solution will be returned in the rhs vector.
- */
-void clsolve ( int ldm, int ncol, complex *M, complex *rhs )
-{
- int k;
- complex x0, x1, x2, x3, temp;
- complex *M0;
- complex *Mki0, *Mki1, *Mki2, *Mki3;
- register int firstcol = 0;
-
- M0 = &M[0];
-
-
- while ( firstcol < ncol - 3 ) { /* Do 4 columns */
- Mki0 = M0 + 1;
- Mki1 = Mki0 + ldm + 1;
- Mki2 = Mki1 + ldm + 1;
- Mki3 = Mki2 + ldm + 1;
-
- x0 = rhs[firstcol];
- cc_mult(&temp, &x0, Mki0); Mki0++;
- c_sub(&x1, &rhs[firstcol+1], &temp);
- cc_mult(&temp, &x0, Mki0); Mki0++;
- c_sub(&x2, &rhs[firstcol+2], &temp);
- cc_mult(&temp, &x1, Mki1); Mki1++;
- c_sub(&x2, &x2, &temp);
- cc_mult(&temp, &x0, Mki0); Mki0++;
- c_sub(&x3, &rhs[firstcol+3], &temp);
- cc_mult(&temp, &x1, Mki1); Mki1++;
- c_sub(&x3, &x3, &temp);
- cc_mult(&temp, &x2, Mki2); Mki2++;
- c_sub(&x3, &x3, &temp);
-
- rhs[++firstcol] = x1;
- rhs[++firstcol] = x2;
- rhs[++firstcol] = x3;
- ++firstcol;
-
- for (k = firstcol; k < ncol; k++) {
- cc_mult(&temp, &x0, Mki0); Mki0++;
- c_sub(&rhs[k], &rhs[k], &temp);
- cc_mult(&temp, &x1, Mki1); Mki1++;
- c_sub(&rhs[k], &rhs[k], &temp);
- cc_mult(&temp, &x2, Mki2); Mki2++;
- c_sub(&rhs[k], &rhs[k], &temp);
- cc_mult(&temp, &x3, Mki3); Mki3++;
- c_sub(&rhs[k], &rhs[k], &temp);
- }
-
- M0 += 4 * ldm + 4;
- }
-
- if ( firstcol < ncol - 1 ) { /* Do 2 columns */
- Mki0 = M0 + 1;
- Mki1 = Mki0 + ldm + 1;
-
- x0 = rhs[firstcol];
- cc_mult(&temp, &x0, Mki0); Mki0++;
- c_sub(&x1, &rhs[firstcol+1], &temp);
-
- rhs[++firstcol] = x1;
- ++firstcol;
-
- for (k = firstcol; k < ncol; k++) {
- cc_mult(&temp, &x0, Mki0); Mki0++;
- c_sub(&rhs[k], &rhs[k], &temp);
- cc_mult(&temp, &x1, Mki1); Mki1++;
- c_sub(&rhs[k], &rhs[k], &temp);
- }
- }
-
-}
-
-/*
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-void
-cusolve ( ldm, ncol, M, rhs )
-int ldm; /* in */
-int ncol; /* in */
-complex *M; /* in */
-complex *rhs; /* modified */
-{
- complex xj, temp;
- int jcol, j, irow;
-
- jcol = ncol - 1;
-
- for (j = 0; j < ncol; j++) {
-
- c_div(&xj, &rhs[jcol], &M[jcol + jcol*ldm]); /* M(jcol, jcol) */
- rhs[jcol] = xj;
-
- for (irow = 0; irow < jcol; irow++) {
- cc_mult(&temp, &xj, &M[irow+jcol*ldm]); /* M(irow, jcol) */
- c_sub(&rhs[irow], &rhs[irow], &temp);
- }
-
- jcol--;
-
- }
-}
-
-
-/*
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void cmatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-int ldm; /* in -- leading dimension of M */
-int nrow; /* in */
-int ncol; /* in */
-complex *M; /* in */
-complex *vec; /* in */
-complex *Mxvec; /* in/out */
-{
- complex vi0, vi1, vi2, vi3;
- complex *M0, temp;
- complex *Mki0, *Mki1, *Mki2, *Mki3;
- register int firstcol = 0;
- int k;
-
- M0 = &M[0];
-
- while ( firstcol < ncol - 3 ) { /* Do 4 columns */
- Mki0 = M0;
- Mki1 = Mki0 + ldm;
- Mki2 = Mki1 + ldm;
- Mki3 = Mki2 + ldm;
-
- vi0 = vec[firstcol++];
- vi1 = vec[firstcol++];
- vi2 = vec[firstcol++];
- vi3 = vec[firstcol++];
- for (k = 0; k < nrow; k++) {
- cc_mult(&temp, &vi0, Mki0); Mki0++;
- c_add(&Mxvec[k], &Mxvec[k], &temp);
- cc_mult(&temp, &vi1, Mki1); Mki1++;
- c_add(&Mxvec[k], &Mxvec[k], &temp);
- cc_mult(&temp, &vi2, Mki2); Mki2++;
- c_add(&Mxvec[k], &Mxvec[k], &temp);
- cc_mult(&temp, &vi3, Mki3); Mki3++;
- c_add(&Mxvec[k], &Mxvec[k], &temp);
- }
-
- M0 += 4 * ldm;
- }
-
- while ( firstcol < ncol ) { /* Do 1 column */
- Mki0 = M0;
- vi0 = vec[firstcol++];
- for (k = 0; k < nrow; k++) {
- cc_mult(&temp, &vi0, Mki0); Mki0++;
- c_add(&Mxvec[k], &Mxvec[k], &temp);
- }
- M0 += ldm;
- }
-
-}
-
diff --git a/CBLAS/dmyblas2.c b/CBLAS/dmyblas2.c
deleted file mode 100644
index e6bbdd1..0000000
--- a/CBLAS/dmyblas2.c
+++ /dev/null
@@ -1,225 +0,0 @@
-
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
- * File name: dmyblas2.c
- * Purpose:
- * Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- * This is only used when the system lacks an efficient BLAS library.
- */
-
-/*
- * Solves a dense UNIT lower triangular system. The unit lower
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol).
- * The solution will be returned in the rhs vector.
- */
-void dlsolve ( int ldm, int ncol, double *M, double *rhs )
-{
- int k;
- double x0, x1, x2, x3, x4, x5, x6, x7;
- double *M0;
- register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
- register int firstcol = 0;
-
- M0 = &M[0];
-
- while ( firstcol < ncol - 7 ) { /* Do 8 columns */
- Mki0 = M0 + 1;
- Mki1 = Mki0 + ldm + 1;
- Mki2 = Mki1 + ldm + 1;
- Mki3 = Mki2 + ldm + 1;
- Mki4 = Mki3 + ldm + 1;
- Mki5 = Mki4 + ldm + 1;
- Mki6 = Mki5 + ldm + 1;
- Mki7 = Mki6 + ldm + 1;
-
- x0 = rhs[firstcol];
- x1 = rhs[firstcol+1] - x0 * *Mki0++;
- x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
- x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
- x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
- - x3 * *Mki3++;
- x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
- - x3 * *Mki3++ - x4 * *Mki4++;
- x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
- - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
- x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
- - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
- - x6 * *Mki6++;
-
- rhs[++firstcol] = x1;
- rhs[++firstcol] = x2;
- rhs[++firstcol] = x3;
- rhs[++firstcol] = x4;
- rhs[++firstcol] = x5;
- rhs[++firstcol] = x6;
- rhs[++firstcol] = x7;
- ++firstcol;
-
- for (k = firstcol; k < ncol; k++)
- rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
- - x2 * *Mki2++ - x3 * *Mki3++
- - x4 * *Mki4++ - x5 * *Mki5++
- - x6 * *Mki6++ - x7 * *Mki7++;
-
- M0 += 8 * ldm + 8;
- }
-
- while ( firstcol < ncol - 3 ) { /* Do 4 columns */
- Mki0 = M0 + 1;
- Mki1 = Mki0 + ldm + 1;
- Mki2 = Mki1 + ldm + 1;
- Mki3 = Mki2 + ldm + 1;
-
- x0 = rhs[firstcol];
- x1 = rhs[firstcol+1] - x0 * *Mki0++;
- x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
- x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
-
- rhs[++firstcol] = x1;
- rhs[++firstcol] = x2;
- rhs[++firstcol] = x3;
- ++firstcol;
-
- for (k = firstcol; k < ncol; k++)
- rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
- - x2 * *Mki2++ - x3 * *Mki3++;
-
- M0 += 4 * ldm + 4;
- }
-
- if ( firstcol < ncol - 1 ) { /* Do 2 columns */
- Mki0 = M0 + 1;
- Mki1 = Mki0 + ldm + 1;
-
- x0 = rhs[firstcol];
- x1 = rhs[firstcol+1] - x0 * *Mki0++;
-
- rhs[++firstcol] = x1;
- ++firstcol;
-
- for (k = firstcol; k < ncol; k++)
- rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
-
- }
-
-}
-
-/*
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-void
-dusolve ( ldm, ncol, M, rhs )
-int ldm; /* in */
-int ncol; /* in */
-double *M; /* in */
-double *rhs; /* modified */
-{
- double xj;
- int jcol, j, irow;
-
- jcol = ncol - 1;
-
- for (j = 0; j < ncol; j++) {
-
- xj = rhs[jcol] / M[jcol + jcol*ldm]; /* M(jcol, jcol) */
- rhs[jcol] = xj;
-
- for (irow = 0; irow < jcol; irow++)
- rhs[irow] -= xj * M[irow + jcol*ldm]; /* M(irow, jcol) */
-
- jcol--;
-
- }
-}
-
-
-/*
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void dmatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-
-int ldm; /* in -- leading dimension of M */
-int nrow; /* in */
-int ncol; /* in */
-double *M; /* in */
-double *vec; /* in */
-double *Mxvec; /* in/out */
-
-{
- double vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
- double *M0;
- register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
- register int firstcol = 0;
- int k;
-
- M0 = &M[0];
- while ( firstcol < ncol - 7 ) { /* Do 8 columns */
-
- Mki0 = M0;
- Mki1 = Mki0 + ldm;
- Mki2 = Mki1 + ldm;
- Mki3 = Mki2 + ldm;
- Mki4 = Mki3 + ldm;
- Mki5 = Mki4 + ldm;
- Mki6 = Mki5 + ldm;
- Mki7 = Mki6 + ldm;
-
- vi0 = vec[firstcol++];
- vi1 = vec[firstcol++];
- vi2 = vec[firstcol++];
- vi3 = vec[firstcol++];
- vi4 = vec[firstcol++];
- vi5 = vec[firstcol++];
- vi6 = vec[firstcol++];
- vi7 = vec[firstcol++];
-
- for (k = 0; k < nrow; k++)
- Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
- + vi2 * *Mki2++ + vi3 * *Mki3++
- + vi4 * *Mki4++ + vi5 * *Mki5++
- + vi6 * *Mki6++ + vi7 * *Mki7++;
-
- M0 += 8 * ldm;
- }
-
- while ( firstcol < ncol - 3 ) { /* Do 4 columns */
-
- Mki0 = M0;
- Mki1 = Mki0 + ldm;
- Mki2 = Mki1 + ldm;
- Mki3 = Mki2 + ldm;
-
- vi0 = vec[firstcol++];
- vi1 = vec[firstcol++];
- vi2 = vec[firstcol++];
- vi3 = vec[firstcol++];
- for (k = 0; k < nrow; k++)
- Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
- + vi2 * *Mki2++ + vi3 * *Mki3++ ;
-
- M0 += 4 * ldm;
- }
-
- while ( firstcol < ncol ) { /* Do 1 column */
-
- Mki0 = M0;
- vi0 = vec[firstcol++];
- for (k = 0; k < nrow; k++)
- Mxvec[k] += vi0 * *Mki0++;
-
- M0 += ldm;
- }
-
-}
-
diff --git a/CBLAS/f2c.h b/CBLAS/f2c.h
index caa33e1..3116864 100644
--- a/CBLAS/f2c.h
+++ b/CBLAS/f2c.h
@@ -4,7 +4,7 @@
- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
-#include "Cnames.h"
+#include "slu_Cnames.h"
#ifndef F2C_INCLUDE
#define F2C_INCLUDE
diff --git a/CBLAS/slu_Cnames.h b/CBLAS/slu_Cnames.h
new file mode 120000
index 0000000..62b2820
--- /dev/null
+++ b/CBLAS/slu_Cnames.h
@@ -0,0 +1 @@
+../SRC/slu_Cnames.h
\ No newline at end of file
diff --git a/CBLAS/smyblas2.c b/CBLAS/smyblas2.c
deleted file mode 100644
index 729e17f..0000000
--- a/CBLAS/smyblas2.c
+++ /dev/null
@@ -1,225 +0,0 @@
-
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
- * File name: smyblas2.c
- * Purpose:
- * Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- * This is only used when the system lacks an efficient BLAS library.
- */
-
-/*
- * Solves a dense UNIT lower triangular system. The unit lower
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol).
- * The solution will be returned in the rhs vector.
- */
-void slsolve ( int ldm, int ncol, float *M, float *rhs )
-{
- int k;
- float x0, x1, x2, x3, x4, x5, x6, x7;
- float *M0;
- register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
- register int firstcol = 0;
-
- M0 = &M[0];
-
- while ( firstcol < ncol - 7 ) { /* Do 8 columns */
- Mki0 = M0 + 1;
- Mki1 = Mki0 + ldm + 1;
- Mki2 = Mki1 + ldm + 1;
- Mki3 = Mki2 + ldm + 1;
- Mki4 = Mki3 + ldm + 1;
- Mki5 = Mki4 + ldm + 1;
- Mki6 = Mki5 + ldm + 1;
- Mki7 = Mki6 + ldm + 1;
-
- x0 = rhs[firstcol];
- x1 = rhs[firstcol+1] - x0 * *Mki0++;
- x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
- x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
- x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
- - x3 * *Mki3++;
- x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
- - x3 * *Mki3++ - x4 * *Mki4++;
- x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
- - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
- x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
- - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
- - x6 * *Mki6++;
-
- rhs[++firstcol] = x1;
- rhs[++firstcol] = x2;
- rhs[++firstcol] = x3;
- rhs[++firstcol] = x4;
- rhs[++firstcol] = x5;
- rhs[++firstcol] = x6;
- rhs[++firstcol] = x7;
- ++firstcol;
-
- for (k = firstcol; k < ncol; k++)
- rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
- - x2 * *Mki2++ - x3 * *Mki3++
- - x4 * *Mki4++ - x5 * *Mki5++
- - x6 * *Mki6++ - x7 * *Mki7++;
-
- M0 += 8 * ldm + 8;
- }
-
- while ( firstcol < ncol - 3 ) { /* Do 4 columns */
- Mki0 = M0 + 1;
- Mki1 = Mki0 + ldm + 1;
- Mki2 = Mki1 + ldm + 1;
- Mki3 = Mki2 + ldm + 1;
-
- x0 = rhs[firstcol];
- x1 = rhs[firstcol+1] - x0 * *Mki0++;
- x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
- x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
-
- rhs[++firstcol] = x1;
- rhs[++firstcol] = x2;
- rhs[++firstcol] = x3;
- ++firstcol;
-
- for (k = firstcol; k < ncol; k++)
- rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
- - x2 * *Mki2++ - x3 * *Mki3++;
-
- M0 += 4 * ldm + 4;
- }
-
- if ( firstcol < ncol - 1 ) { /* Do 2 columns */
- Mki0 = M0 + 1;
- Mki1 = Mki0 + ldm + 1;
-
- x0 = rhs[firstcol];
- x1 = rhs[firstcol+1] - x0 * *Mki0++;
-
- rhs[++firstcol] = x1;
- ++firstcol;
-
- for (k = firstcol; k < ncol; k++)
- rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
-
- }
-
-}
-
-/*
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-void
-susolve ( ldm, ncol, M, rhs )
-int ldm; /* in */
-int ncol; /* in */
-float *M; /* in */
-float *rhs; /* modified */
-{
- float xj;
- int jcol, j, irow;
-
- jcol = ncol - 1;
-
- for (j = 0; j < ncol; j++) {
-
- xj = rhs[jcol] / M[jcol + jcol*ldm]; /* M(jcol, jcol) */
- rhs[jcol] = xj;
-
- for (irow = 0; irow < jcol; irow++)
- rhs[irow] -= xj * M[irow + jcol*ldm]; /* M(irow, jcol) */
-
- jcol--;
-
- }
-}
-
-
-/*
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void smatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-
-int ldm; /* in -- leading dimension of M */
-int nrow; /* in */
-int ncol; /* in */
-float *M; /* in */
-float *vec; /* in */
-float *Mxvec; /* in/out */
-
-{
- float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
- float *M0;
- register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
- register int firstcol = 0;
- int k;
-
- M0 = &M[0];
- while ( firstcol < ncol - 7 ) { /* Do 8 columns */
-
- Mki0 = M0;
- Mki1 = Mki0 + ldm;
- Mki2 = Mki1 + ldm;
- Mki3 = Mki2 + ldm;
- Mki4 = Mki3 + ldm;
- Mki5 = Mki4 + ldm;
- Mki6 = Mki5 + ldm;
- Mki7 = Mki6 + ldm;
-
- vi0 = vec[firstcol++];
- vi1 = vec[firstcol++];
- vi2 = vec[firstcol++];
- vi3 = vec[firstcol++];
- vi4 = vec[firstcol++];
- vi5 = vec[firstcol++];
- vi6 = vec[firstcol++];
- vi7 = vec[firstcol++];
-
- for (k = 0; k < nrow; k++)
- Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
- + vi2 * *Mki2++ + vi3 * *Mki3++
- + vi4 * *Mki4++ + vi5 * *Mki5++
- + vi6 * *Mki6++ + vi7 * *Mki7++;
-
- M0 += 8 * ldm;
- }
-
- while ( firstcol < ncol - 3 ) { /* Do 4 columns */
-
- Mki0 = M0;
- Mki1 = Mki0 + ldm;
- Mki2 = Mki1 + ldm;
- Mki3 = Mki2 + ldm;
-
- vi0 = vec[firstcol++];
- vi1 = vec[firstcol++];
- vi2 = vec[firstcol++];
- vi3 = vec[firstcol++];
- for (k = 0; k < nrow; k++)
- Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
- + vi2 * *Mki2++ + vi3 * *Mki3++ ;
-
- M0 += 4 * ldm;
- }
-
- while ( firstcol < ncol ) { /* Do 1 column */
-
- Mki0 = M0;
- vi0 = vec[firstcol++];
- for (k = 0; k < nrow; k++)
- Mxvec[k] += vi0 * *Mki0++;
-
- M0 += ldm;
- }
-
-}
-
diff --git a/CBLAS/zmyblas2.c b/CBLAS/zmyblas2.c
deleted file mode 100644
index 59450cd..0000000
--- a/CBLAS/zmyblas2.c
+++ /dev/null
@@ -1,183 +0,0 @@
-
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
- * File name: zmyblas2.c
- * Purpose:
- * Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- * This is only used when the system lacks an efficient BLAS library.
- */
-#include "dcomplex.h"
-
-/*
- * Solves a dense UNIT lower triangular system. The unit lower
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol).
- * The solution will be returned in the rhs vector.
- */
-void zlsolve ( int ldm, int ncol, doublecomplex *M, doublecomplex *rhs )
-{
- int k;
- doublecomplex x0, x1, x2, x3, temp;
- doublecomplex *M0;
- doublecomplex *Mki0, *Mki1, *Mki2, *Mki3;
- register int firstcol = 0;
-
- M0 = &M[0];
-
-
- while ( firstcol < ncol - 3 ) { /* Do 4 columns */
- Mki0 = M0 + 1;
- Mki1 = Mki0 + ldm + 1;
- Mki2 = Mki1 + ldm + 1;
- Mki3 = Mki2 + ldm + 1;
-
- x0 = rhs[firstcol];
- zz_mult(&temp, &x0, Mki0); Mki0++;
- z_sub(&x1, &rhs[firstcol+1], &temp);
- zz_mult(&temp, &x0, Mki0); Mki0++;
- z_sub(&x2, &rhs[firstcol+2], &temp);
- zz_mult(&temp, &x1, Mki1); Mki1++;
- z_sub(&x2, &x2, &temp);
- zz_mult(&temp, &x0, Mki0); Mki0++;
- z_sub(&x3, &rhs[firstcol+3], &temp);
- zz_mult(&temp, &x1, Mki1); Mki1++;
- z_sub(&x3, &x3, &temp);
- zz_mult(&temp, &x2, Mki2); Mki2++;
- z_sub(&x3, &x3, &temp);
-
- rhs[++firstcol] = x1;
- rhs[++firstcol] = x2;
- rhs[++firstcol] = x3;
- ++firstcol;
-
- for (k = firstcol; k < ncol; k++) {
- zz_mult(&temp, &x0, Mki0); Mki0++;
- z_sub(&rhs[k], &rhs[k], &temp);
- zz_mult(&temp, &x1, Mki1); Mki1++;
- z_sub(&rhs[k], &rhs[k], &temp);
- zz_mult(&temp, &x2, Mki2); Mki2++;
- z_sub(&rhs[k], &rhs[k], &temp);
- zz_mult(&temp, &x3, Mki3); Mki3++;
- z_sub(&rhs[k], &rhs[k], &temp);
- }
-
- M0 += 4 * ldm + 4;
- }
-
- if ( firstcol < ncol - 1 ) { /* Do 2 columns */
- Mki0 = M0 + 1;
- Mki1 = Mki0 + ldm + 1;
-
- x0 = rhs[firstcol];
- zz_mult(&temp, &x0, Mki0); Mki0++;
- z_sub(&x1, &rhs[firstcol+1], &temp);
-
- rhs[++firstcol] = x1;
- ++firstcol;
-
- for (k = firstcol; k < ncol; k++) {
- zz_mult(&temp, &x0, Mki0); Mki0++;
- z_sub(&rhs[k], &rhs[k], &temp);
- zz_mult(&temp, &x1, Mki1); Mki1++;
- z_sub(&rhs[k], &rhs[k], &temp);
- }
- }
-
-}
-
-/*
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-void
-zusolve ( ldm, ncol, M, rhs )
-int ldm; /* in */
-int ncol; /* in */
-doublecomplex *M; /* in */
-doublecomplex *rhs; /* modified */
-{
- doublecomplex xj, temp;
- int jcol, j, irow;
-
- jcol = ncol - 1;
-
- for (j = 0; j < ncol; j++) {
-
- z_div(&xj, &rhs[jcol], &M[jcol + jcol*ldm]); /* M(jcol, jcol) */
- rhs[jcol] = xj;
-
- for (irow = 0; irow < jcol; irow++) {
- zz_mult(&temp, &xj, &M[irow+jcol*ldm]); /* M(irow, jcol) */
- z_sub(&rhs[irow], &rhs[irow], &temp);
- }
-
- jcol--;
-
- }
-}
-
-
-/*
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void zmatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-int ldm; /* in -- leading dimension of M */
-int nrow; /* in */
-int ncol; /* in */
-doublecomplex *M; /* in */
-doublecomplex *vec; /* in */
-doublecomplex *Mxvec; /* in/out */
-{
- doublecomplex vi0, vi1, vi2, vi3;
- doublecomplex *M0, temp;
- doublecomplex *Mki0, *Mki1, *Mki2, *Mki3;
- register int firstcol = 0;
- int k;
-
- M0 = &M[0];
-
- while ( firstcol < ncol - 3 ) { /* Do 4 columns */
- Mki0 = M0;
- Mki1 = Mki0 + ldm;
- Mki2 = Mki1 + ldm;
- Mki3 = Mki2 + ldm;
-
- vi0 = vec[firstcol++];
- vi1 = vec[firstcol++];
- vi2 = vec[firstcol++];
- vi3 = vec[firstcol++];
- for (k = 0; k < nrow; k++) {
- zz_mult(&temp, &vi0, Mki0); Mki0++;
- z_add(&Mxvec[k], &Mxvec[k], &temp);
- zz_mult(&temp, &vi1, Mki1); Mki1++;
- z_add(&Mxvec[k], &Mxvec[k], &temp);
- zz_mult(&temp, &vi2, Mki2); Mki2++;
- z_add(&Mxvec[k], &Mxvec[k], &temp);
- zz_mult(&temp, &vi3, Mki3); Mki3++;
- z_add(&Mxvec[k], &Mxvec[k], &temp);
- }
-
- M0 += 4 * ldm;
- }
-
- while ( firstcol < ncol ) { /* Do 1 column */
- Mki0 = M0;
- vi0 = vec[firstcol++];
- for (k = 0; k < nrow; k++) {
- zz_mult(&temp, &vi0, Mki0); Mki0++;
- z_add(&Mxvec[k], &Mxvec[k], &temp);
- }
- M0 += ldm;
- }
-
-}
-
diff --git a/EXAMPLE/Makefile b/EXAMPLE/Makefile
index f3275cc..241af0d 100644
--- a/EXAMPLE/Makefile
+++ b/EXAMPLE/Makefile
@@ -33,6 +33,7 @@ include ../make.inc
#######################################################################
HEADER = ../SRC
+LIBS = ../$(SUPERLULIB) $(BLASLIB) -lm
SLINEXM = slinsol.o
SLINEXM1 = slinsol1.o
@@ -68,88 +69,67 @@ complex: clinsol clinsol1 clinsolx clinsolx1 clinsolx2
complex16: zlinsol zlinsol1 zlinsolx zlinsolx1 zlinsolx2
slinsol: $(SLINEXM) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(SLINEXM) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(SLINEXM) $(LIBS) -o $@
slinsol1: $(SLINEXM1) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(SLINEXM1) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(SLINEXM1) $(LIBS) -o $@
slinsolx: $(SLINXEXM) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(SLINXEXM) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(SLINXEXM) $(LIBS) -o $@
slinsolx1: $(SLINXEXM1) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(SLINXEXM1) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(SLINXEXM1) $(LIBS) -o $@
slinsolx2: $(SLINXEXM2) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(SLINXEXM2) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(SLINXEXM2) $(LIBS) -o $@
dlinsol: $(DLINEXM) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(DLINEXM) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(DLINEXM) $(LIBS) -o $@
dlinsol1: $(DLINEXM1) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(DLINEXM1) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(DLINEXM1) $(LIBS) -o $@
dlinsolx: $(DLINXEXM) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(DLINXEXM) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(DLINXEXM) $(LIBS) -o $@
dlinsolx1: $(DLINXEXM1) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(DLINXEXM1) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(DLINXEXM1) $(LIBS) -o $@
dlinsolx2: $(DLINXEXM2) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(DLINXEXM2) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(DLINXEXM2) $(LIBS) -o $@
superlu: $(SUPERLUEXM) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(SUPERLUEXM) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(SUPERLUEXM) $(LIBS) -o $@
clinsol: $(CLINEXM) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(CLINEXM) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(CLINEXM) $(LIBS) -o $@
clinsol1: $(CLINEXM1) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(CLINEXM1) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(CLINEXM1) $(LIBS) -o $@
clinsolx: $(CLINXEXM) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(CLINXEXM) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(CLINXEXM) $(LIBS) -o $@
clinsolx1: $(CLINXEXM1) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(CLINXEXM1) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(CLINXEXM1) $(LIBS) -o $@
clinsolx2: $(CLINXEXM2) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(CLINXEXM2) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(CLINXEXM2) $(LIBS) -o $@
zlinsol: $(ZLINEXM) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(ZLINEXM) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(ZLINEXM) $(LIBS) -o $@
zlinsol1: $(ZLINEXM1) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(ZLINEXM1) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(ZLINEXM1) $(LIBS) -o $@
zlinsolx: $(ZLINXEXM) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(ZLINXEXM) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(ZLINXEXM) $(LIBS) -o $@
zlinsolx1: $(ZLINXEXM1) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(ZLINXEXM1) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(ZLINXEXM1) $(LIBS) -o $@
zlinsolx2: $(ZLINXEXM2) ../$(SUPERLULIB)
- $(LOADER) $(LOADOPTS) $(ZLINXEXM2) \
- ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+ $(LOADER) $(LOADOPTS) $(ZLINXEXM2) $(LIBS) -o $@
.c.o:
$(CC) $(CFLAGS) -I$(HEADER) -c $< $(VERBOSE)
diff --git a/EXAMPLE/clinsol.c b/EXAMPLE/clinsol.c
index 567bffe..93e43c7 100644
--- a/EXAMPLE/clinsol.c
+++ b/EXAMPLE/clinsol.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "csp_defs.h"
+#include "slu_cdefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/clinsol1.c b/EXAMPLE/clinsol1.c
index f72c9c8..db765c3 100644
--- a/EXAMPLE/clinsol1.c
+++ b/EXAMPLE/clinsol1.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "csp_defs.h"
+#include "slu_cdefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/clinsolx.c b/EXAMPLE/clinsolx.c
index 340cf9d..91147af 100644
--- a/EXAMPLE/clinsolx.c
+++ b/EXAMPLE/clinsolx.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "csp_defs.h"
+#include "slu_cdefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/clinsolx1.c b/EXAMPLE/clinsolx1.c
index f5283dd..a1953f7 100644
--- a/EXAMPLE/clinsolx1.c
+++ b/EXAMPLE/clinsolx1.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "csp_defs.h"
+#include "slu_cdefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/clinsolx2.c b/EXAMPLE/clinsolx2.c
index 522f2d5..4946e97 100644
--- a/EXAMPLE/clinsolx2.c
+++ b/EXAMPLE/clinsolx2.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "csp_defs.h"
+#include "slu_cdefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/dlinsol.c b/EXAMPLE/dlinsol.c
index f5c5244..ca81d26 100644
--- a/EXAMPLE/dlinsol.c
+++ b/EXAMPLE/dlinsol.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/dlinsol1.c b/EXAMPLE/dlinsol1.c
index 85b4f06..87793de 100644
--- a/EXAMPLE/dlinsol1.c
+++ b/EXAMPLE/dlinsol1.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/dlinsolx.c b/EXAMPLE/dlinsolx.c
index 8fc4eed..12c9b37 100644
--- a/EXAMPLE/dlinsolx.c
+++ b/EXAMPLE/dlinsolx.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/dlinsolx1.c b/EXAMPLE/dlinsolx1.c
index 157404d..7f727b7 100644
--- a/EXAMPLE/dlinsolx1.c
+++ b/EXAMPLE/dlinsolx1.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/dlinsolx2.c b/EXAMPLE/dlinsolx2.c
index 5fe99a9..4824421 100644
--- a/EXAMPLE/dlinsolx2.c
+++ b/EXAMPLE/dlinsolx2.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/dreadtriple.c b/EXAMPLE/dreadtriple.c
new file mode 100644
index 0000000..b8768fa
--- /dev/null
+++ b/EXAMPLE/dreadtriple.c
@@ -0,0 +1,118 @@
+#include <stdio.h>
+#include "dsp_defs.h"
+#include "util.h"
+
+
+void
+dreadtriple(int *m, int *n, int *nonz,
+ double **nzval, int **rowind, int **colptr)
+{
+/*
+ * Output parameters
+ * =================
+ * (a,asub,xa): asub[*] contains the row subscripts of nonzeros
+ * in columns of matrix A; a[*] the numerical values;
+ * row i of A is given by a[k],k=xa[i],...,xa[i+1]-1.
+ *
+ */
+ int i, j, k, jsize, lasta, nnz, nz;
+ double *a, *val;
+ int *asub, *xa, *row, *col;
+
+ /* Matrix format:
+ * First line: #rows, #cols, #non-zero
+ * Triplet in the rest of lines:
+ * row, col, value
+ */
+
+ scanf("%d%d", n, nonz);
+ *m = *n;
+ printf("m %d, n %d, nonz %d\n", *m, *n, *nonz);
+ dallocateA(*n, *nonz, nzval, rowind, colptr); /* Allocate storage */
+ a = *nzval;
+ asub = *rowind;
+ xa = *colptr;
+
+ val = (double *) SUPERLU_MALLOC(*nonz * sizeof(double));
+ row = (int *) SUPERLU_MALLOC(*nonz * sizeof(int));
+ col = (int *) SUPERLU_MALLOC(*nonz * sizeof(int));
+
+ for (j = 0; j < *n; ++j) xa[j] = 0;
+
+ /* Read into the triplet array from a file */
+ for (nnz = 0, nz = 0; nnz < *nonz; ++nnz) {
+ scanf("%d%d%lf\n", &row[nz], &col[nz], &val[nz]);
+ /* Change to 0-based indexing. */
+#if 0
+ --row[nz];
+ --col[nz];
+#endif
+ if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n
+ /*|| val[nz] == 0.*/) {
+ fprintf(stderr, "nz %d, (%d, %d) = %e out of bound, removed\n",
+ nz, row[nz], col[nz], val[nz]);
+ exit(-1);
+ } else {
+ ++xa[col[nz]];
+ ++nz;
+ }
+ }
+
+ *nonz = nz;
+
+ /* Initialize the array of column pointers */
+ k = 0;
+ jsize = xa[0];
+ xa[0] = 0;
+ for (j = 1; j < *n; ++j) {
+ k += jsize;
+ jsize = xa[j];
+ xa[j] = k;
+ }
+
+ /* Copy the triplets into the column oriented storage */
+ for (nz = 0; nz < *nonz; ++nz) {
+ j = col[nz];
+ k = xa[j];
+ asub[k] = row[nz];
+ a[k] = val[nz];
+ ++xa[j];
+ }
+
+ /* Reset the column pointers to the beginning of each column */
+ for (j = *n; j > 0; --j)
+ xa[j] = xa[j-1];
+ xa[0] = 0;
+
+ SUPERLU_FREE(val);
+ SUPERLU_FREE(row);
+ SUPERLU_FREE(col);
+
+#ifdef CHK_INPUT
+ for (i = 0; i < *n; i++) {
+ printf("Col %d, xa %d\n", i, xa[i]);
+ for (k = xa[i]; k < xa[i+1]; k++)
+ printf("%d\t%16.10f\n", asub[k], a[k]);
+ }
+#endif
+
+}
+
+
+void dreadrhs(int m, double *b)
+{
+ FILE *fp, *fopen();
+ int i, j;
+
+ if ( !(fp = fopen("b.dat", "r")) ) {
+ fprintf(stderr, "dreadrhs: file does not exist\n");
+ exit(-1);
+ }
+ for (i = 0; i < m; ++i)
+ fscanf(fp, "%lf\n", &b[i]);
+ /*fscanf(fp, "%d%lf\n", &j, &b[i]);*/
+ /* readpair_(j, &b[i]);*/
+ fclose(fp);
+}
+
+
diff --git a/EXAMPLE/slinsol.c b/EXAMPLE/slinsol.c
index e63bfdf..9c53764 100644
--- a/EXAMPLE/slinsol.c
+++ b/EXAMPLE/slinsol.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/slinsol1.c b/EXAMPLE/slinsol1.c
index a736a09..07d6d30 100644
--- a/EXAMPLE/slinsol1.c
+++ b/EXAMPLE/slinsol1.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/slinsolx.c b/EXAMPLE/slinsolx.c
index d73cc6d..b3d2111 100644
--- a/EXAMPLE/slinsolx.c
+++ b/EXAMPLE/slinsolx.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/slinsolx1.c b/EXAMPLE/slinsolx1.c
index deb650f..f6e76ee 100644
--- a/EXAMPLE/slinsolx1.c
+++ b/EXAMPLE/slinsolx1.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/slinsolx2.c b/EXAMPLE/slinsolx2.c
index e0acad7..f9a2265 100644
--- a/EXAMPLE/slinsolx2.c
+++ b/EXAMPLE/slinsolx2.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/sp_ienv.c b/EXAMPLE/sp_ienv.c
index 052d860..7b0f93b 100644
--- a/EXAMPLE/sp_ienv.c
+++ b/EXAMPLE/sp_ienv.c
@@ -9,6 +9,8 @@
* File name: sp_ienv.c
* History: Modified from lapack routine ILAENV
*/
+#include "slu_Cnames.h"
+
int
sp_ienv(int ispec)
{
diff --git a/EXAMPLE/superlu.c b/EXAMPLE/superlu.c
index 6b88a63..39d9582 100644
--- a/EXAMPLE/superlu.c
+++ b/EXAMPLE/superlu.c
@@ -5,7 +5,7 @@
* November 15, 1997
*
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/zlinsol.c b/EXAMPLE/zlinsol.c
index d3a75c4..3993113 100644
--- a/EXAMPLE/zlinsol.c
+++ b/EXAMPLE/zlinsol.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/zlinsol1.c b/EXAMPLE/zlinsol1.c
index 1747397..ed9ec31 100644
--- a/EXAMPLE/zlinsol1.c
+++ b/EXAMPLE/zlinsol1.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/zlinsolx.c b/EXAMPLE/zlinsolx.c
index 364f62b..a89f8e0 100644
--- a/EXAMPLE/zlinsolx.c
+++ b/EXAMPLE/zlinsolx.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/zlinsolx1.c b/EXAMPLE/zlinsolx1.c
index 48f6261..e75ee28 100644
--- a/EXAMPLE/zlinsolx1.c
+++ b/EXAMPLE/zlinsolx1.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/zlinsolx2.c b/EXAMPLE/zlinsolx2.c
index ce7f2aa..1fce9d4 100644
--- a/EXAMPLE/zlinsolx2.c
+++ b/EXAMPLE/zlinsolx2.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
main(int argc, char *argv[])
{
diff --git a/EXAMPLE/zreadtriple.c b/EXAMPLE/zreadtriple.c
new file mode 100644
index 0000000..ee0ac57
--- /dev/null
+++ b/EXAMPLE/zreadtriple.c
@@ -0,0 +1,90 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "zsp_defs.h"
+#include "util.h"
+
+
+void
+zreadtriple(int *m, int *n, int *nonz,
+ doublecomplex **nzval, int **rowind, int **colptr)
+{
+/*
+ * Output parameters
+ * =================
+ * (a,asub,xa): asub[*] contains the row subscripts of nonzeros
+ * in columns of matrix A; a[*] the numerical values;
+ * row i of A is given by a[k],k=xa[i],...,xa[i+1]-1.
+ *
+ */
+ int i, j, k, jsize, nz, lasta;
+ doublecomplex *a, *val;
+ int *asub, *xa, *row, *col;
+
+ /* Matrix format:
+ * First line: #rows, #cols, #non-zero
+ * Triplet in the rest of lines:
+ * row, col, value
+ */
+
+ scanf("%d%d%d", m, n, nonz);
+#ifdef DEBUG
+ printf("zreadtriple(): *m %d, *n %d, *nonz, %d\n", *m, *n, *nonz);
+#endif
+ zallocateA(*n, *nonz, nzval, rowind, colptr); /* Allocate storage */
+ a = *nzval;
+ asub = *rowind;
+ xa = *colptr;
+
+ val = (doublecomplex *) SUPERLU_MALLOC(*nonz * sizeof(doublecomplex));
+ row = (int *) SUPERLU_MALLOC(*nonz * sizeof(int));
+ col = (int *) SUPERLU_MALLOC(*nonz * sizeof(int));
+
+ /* Read into the triplet array from a file */
+ for (i = 0; i < *n+1; ++i) xa[i] = 0;
+ for (nz = 0; nz < *nonz; ++nz) {
+ scanf("%d%d%lf%lf\n", &row[nz], &col[nz], &val[nz].r, &val[nz].i);
+ if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n) {
+ fprintf(stderr, "(%d, %d) out of bound!\n", row[nz], col[nz]);
+ exit (-1);
+ }
+ ++xa[col[nz]]; /* Count number of nonzeros in each column */
+ }
+
+ /* Initialize the array of column pointers */
+ k = 0;
+ jsize = xa[0];
+ xa[0] = 0;
+ for (j = 1; j < *n; ++j) {
+ k += jsize;
+ jsize = xa[j];
+ xa[j] = k;
+ }
+
+ /* Copy the triplets into the column oriented storage */
+ for (nz = 0; nz < *nonz; ++nz) {
+ j = col[nz];
+ k = xa[j];
+ asub[k] = row[nz];
+ a[k] = val[nz];
+ ++xa[j];
+ }
+
+ /* Reset the column pointers to the beginning of each column */
+ for (j = *n; j > 0; --j)
+ xa[j] = xa[j-1];
+ xa[0] = 0;
+
+ SUPERLU_FREE(val);
+ SUPERLU_FREE(row);
+ SUPERLU_FREE(col);
+
+#ifdef CHK_INPUT
+ for (i = 0; i < *n; i++) {
+ printf("Col %d, xa %d\n", i, xa[i]);
+ for (k = xa[i]; k < xa[i+1]; k++)
+ printf("%d\t%16.10f\n", asub[k], a[k]);
+ }
+#endif
+
+}
diff --git a/FORTRAN/Makefile b/FORTRAN/Makefile
index 98abda6..0b171d2 100644
--- a/FORTRAN/Makefile
+++ b/FORTRAN/Makefile
@@ -6,13 +6,14 @@ include ../make.inc
#######################################################################
HEADER = ../SRC
+LIBS = ../$(SUPERLULIB) $(BLASLIB) -lm
F77EXM = f77_main.o hbcode1.o c_fortran_dgssv.o
all: f77exm
f77exm: $(F77EXM) ../$(SUPERLULIB)
- $(FORTRAN) $(F77EXM) ../$(SUPERLULIB) $(BLASLIB) -o $@
+ $(FORTRAN) $(LOADOPTS) $(F77EXM) $(LIBS) -o $@
c_fortran_zgssv.o: c_fortran_zgssv.c
$(CC) $(CFLAGS) $(CDEFS) -I$(HEADER) -c $< $(VERBOSE)
diff --git a/FORTRAN/c_fortran_dgssv.c b/FORTRAN/c_fortran_cgssv.c
similarity index 86%
copy from FORTRAN/c_fortran_dgssv.c
copy to FORTRAN/c_fortran_cgssv.c
index 22dd066..219d922 100644
--- a/FORTRAN/c_fortran_dgssv.c
+++ b/FORTRAN/c_fortran_cgssv.c
@@ -1,3 +1,4 @@
+
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -6,7 +7,7 @@
*
*/
-#include "dsp_defs.h"
+#include "slu_cdefs.h"
#define HANDLE_SIZE 8
/* kind of integer to hold a pointer. Use int.
@@ -21,8 +22,9 @@ typedef struct {
} factors_t;
void
-c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
- int *rowind, int *colptr, double *b, int *ldb,
+c_fortran_cgssv_(int *iopt, int *n, int *nnz, int *nrhs,
+ complex *values, int *rowind, int *colptr,
+ complex *b, int *ldb,
fptr *f_factors, /* a handle containing the address
pointing to the factored matrices */
int *info)
@@ -53,7 +55,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
NCformat *Ustore;
int i, panel_size, permc_spec, relax;
trans_t trans;
- double drop_tol = 0.0;
+ float drop_tol = 0.0;
mem_usage_t mem_usage;
superlu_options_t options;
SuperLUStat_t stat;
@@ -73,8 +75,8 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
for (i = 0; i < *nnz; ++i) --rowind[i];
for (i = 0; i <= *n; ++i) --colptr[i];
- dCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr,
- SLU_NC, SLU_D, SLU_GE);
+ cCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr,
+ SLU_NC, SLU_C, SLU_GE);
L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
if ( !(perm_r = intMalloc(*n)) ) ABORT("Malloc fails for perm_r[].");
@@ -88,7 +90,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
* permc_spec = 2: minimum degree on structure of A'+A
* permc_spec = 3: approximate minimum degree for unsymmetric matrices
*/
- permc_spec = 3;
+ permc_spec = options.ColPerm;
get_perm_c(permc_spec, &A, perm_c);
sp_preorder(&options, &A, perm_c, etree, &AC);
@@ -96,7 +98,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
panel_size = sp_ienv(1);
relax = sp_ienv(2);
- dgstrf(&options, &AC, drop_tol, relax, panel_size,
+ cgstrf(&options, &AC, drop_tol, relax, panel_size,
etree, NULL, 0, perm_c, perm_r, L, U, &stat, info);
if ( *info == 0 ) {
@@ -105,14 +107,14 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
- dQuerySpace(L, U, &mem_usage);
+ cQuerySpace(L, U, &mem_usage);
printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
mem_usage.expansions);
} else {
- printf("dgstrf() error returns INFO= %d\n", *info);
+ printf("cgstrf() error returns INFO= %d\n", *info);
if ( *info <= *n ) { /* factorization completes */
- dQuerySpace(L, U, &mem_usage);
+ cQuerySpace(L, U, &mem_usage);
printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
mem_usage.expansions);
@@ -148,10 +150,10 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
perm_c = LUfactors->perm_c;
perm_r = LUfactors->perm_r;
- dCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_D, SLU_GE);
+ cCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_C, SLU_GE);
/* Solve the system A*X=B, overwriting B with X. */
- dgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info);
+ cgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info);
Destroy_SuperMatrix_Store(&B);
StatFree(&stat);
@@ -167,7 +169,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
SUPERLU_FREE (LUfactors->U);
SUPERLU_FREE (LUfactors);
} else {
- fprintf(stderr,"Invalid iopt=%d passed to c_fortran_dgssv()\n",*iopt);
+ fprintf(stderr,"Invalid iopt=%d passed to c_fortran_cgssv()\n",*iopt);
exit(-1);
}
}
diff --git a/FORTRAN/c_fortran_dgssv.c b/FORTRAN/c_fortran_dgssv.c
index 22dd066..9d824f0 100644
--- a/FORTRAN/c_fortran_dgssv.c
+++ b/FORTRAN/c_fortran_dgssv.c
@@ -1,3 +1,4 @@
+
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -6,12 +7,13 @@
*
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
#define HANDLE_SIZE 8
-/* kind of integer to hold a pointer. Use int.
- This might need to be changed on 64-bit systems. */
-typedef int fptr; /* 32-bit by default */
+
+/* kind of integer to hold a pointer. Use 'long int'
+ so it works on 64-bit systems. */
+typedef long int fptr; /* 64 bit */
typedef struct {
SuperMatrix *L;
@@ -21,8 +23,9 @@ typedef struct {
} factors_t;
void
-c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
- int *rowind, int *colptr, double *b, int *ldb,
+c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs,
+ double *values, int *rowind, int *colptr,
+ double *b, int *ldb,
fptr *f_factors, /* a handle containing the address
pointing to the factored matrices */
int *info)
@@ -88,7 +91,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
* permc_spec = 2: minimum degree on structure of A'+A
* permc_spec = 3: approximate minimum degree for unsymmetric matrices
*/
- permc_spec = 3;
+ permc_spec = options.ColPerm;
get_perm_c(permc_spec, &A, perm_c);
sp_preorder(&options, &A, perm_c, etree, &AC);
diff --git a/FORTRAN/c_fortran_dgssv.c b/FORTRAN/c_fortran_dgssv.c.bak
similarity index 99%
copy from FORTRAN/c_fortran_dgssv.c
copy to FORTRAN/c_fortran_dgssv.c.bak
index 22dd066..284abef 100644
--- a/FORTRAN/c_fortran_dgssv.c
+++ b/FORTRAN/c_fortran_dgssv.c.bak
@@ -88,7 +88,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
* permc_spec = 2: minimum degree on structure of A'+A
* permc_spec = 3: approximate minimum degree for unsymmetric matrices
*/
- permc_spec = 3;
+ permc_spec = options.ColPerm;
get_perm_c(permc_spec, &A, perm_c);
sp_preorder(&options, &A, perm_c, etree, &AC);
diff --git a/FORTRAN/c_fortran_dgssv.c b/FORTRAN/c_fortran_sgssv.c
similarity index 86%
copy from FORTRAN/c_fortran_dgssv.c
copy to FORTRAN/c_fortran_sgssv.c
index 22dd066..1fa08f1 100644
--- a/FORTRAN/c_fortran_dgssv.c
+++ b/FORTRAN/c_fortran_sgssv.c
@@ -1,3 +1,4 @@
+
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -6,7 +7,7 @@
*
*/
-#include "dsp_defs.h"
+#include "slu_sdefs.h"
#define HANDLE_SIZE 8
/* kind of integer to hold a pointer. Use int.
@@ -21,8 +22,9 @@ typedef struct {
} factors_t;
void
-c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
- int *rowind, int *colptr, double *b, int *ldb,
+c_fortran_sgssv_(int *iopt, int *n, int *nnz, int *nrhs,
+ float *values, int *rowind, int *colptr,
+ float *b, int *ldb,
fptr *f_factors, /* a handle containing the address
pointing to the factored matrices */
int *info)
@@ -53,7 +55,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
NCformat *Ustore;
int i, panel_size, permc_spec, relax;
trans_t trans;
- double drop_tol = 0.0;
+ float drop_tol = 0.0;
mem_usage_t mem_usage;
superlu_options_t options;
SuperLUStat_t stat;
@@ -73,8 +75,8 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
for (i = 0; i < *nnz; ++i) --rowind[i];
for (i = 0; i <= *n; ++i) --colptr[i];
- dCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr,
- SLU_NC, SLU_D, SLU_GE);
+ sCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr,
+ SLU_NC, SLU_S, SLU_GE);
L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
if ( !(perm_r = intMalloc(*n)) ) ABORT("Malloc fails for perm_r[].");
@@ -88,7 +90,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
* permc_spec = 2: minimum degree on structure of A'+A
* permc_spec = 3: approximate minimum degree for unsymmetric matrices
*/
- permc_spec = 3;
+ permc_spec = options.ColPerm;
get_perm_c(permc_spec, &A, perm_c);
sp_preorder(&options, &A, perm_c, etree, &AC);
@@ -96,7 +98,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
panel_size = sp_ienv(1);
relax = sp_ienv(2);
- dgstrf(&options, &AC, drop_tol, relax, panel_size,
+ sgstrf(&options, &AC, drop_tol, relax, panel_size,
etree, NULL, 0, perm_c, perm_r, L, U, &stat, info);
if ( *info == 0 ) {
@@ -105,14 +107,14 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
- dQuerySpace(L, U, &mem_usage);
+ sQuerySpace(L, U, &mem_usage);
printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
mem_usage.expansions);
} else {
- printf("dgstrf() error returns INFO= %d\n", *info);
+ printf("sgstrf() error returns INFO= %d\n", *info);
if ( *info <= *n ) { /* factorization completes */
- dQuerySpace(L, U, &mem_usage);
+ sQuerySpace(L, U, &mem_usage);
printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
mem_usage.expansions);
@@ -148,10 +150,10 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
perm_c = LUfactors->perm_c;
perm_r = LUfactors->perm_r;
- dCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_D, SLU_GE);
+ sCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_S, SLU_GE);
/* Solve the system A*X=B, overwriting B with X. */
- dgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info);
+ sgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info);
Destroy_SuperMatrix_Store(&B);
StatFree(&stat);
@@ -167,7 +169,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
SUPERLU_FREE (LUfactors->U);
SUPERLU_FREE (LUfactors);
} else {
- fprintf(stderr,"Invalid iopt=%d passed to c_fortran_dgssv()\n",*iopt);
+ fprintf(stderr,"Invalid iopt=%d passed to c_fortran_sgssv()\n",*iopt);
exit(-1);
}
}
diff --git a/FORTRAN/c_fortran_dgssv.c b/FORTRAN/c_fortran_zgssv.c
similarity index 87%
copy from FORTRAN/c_fortran_dgssv.c
copy to FORTRAN/c_fortran_zgssv.c
index 22dd066..b7cf074 100644
--- a/FORTRAN/c_fortran_dgssv.c
+++ b/FORTRAN/c_fortran_zgssv.c
@@ -1,3 +1,4 @@
+
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -6,7 +7,7 @@
*
*/
-#include "dsp_defs.h"
+#include "slu_zdefs.h"
#define HANDLE_SIZE 8
/* kind of integer to hold a pointer. Use int.
@@ -21,8 +22,9 @@ typedef struct {
} factors_t;
void
-c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
- int *rowind, int *colptr, double *b, int *ldb,
+c_fortran_zgssv_(int *iopt, int *n, int *nnz, int *nrhs,
+ doublecomplex *values, int *rowind, int *colptr,
+ doublecomplex *b, int *ldb,
fptr *f_factors, /* a handle containing the address
pointing to the factored matrices */
int *info)
@@ -73,8 +75,8 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
for (i = 0; i < *nnz; ++i) --rowind[i];
for (i = 0; i <= *n; ++i) --colptr[i];
- dCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr,
- SLU_NC, SLU_D, SLU_GE);
+ zCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr,
+ SLU_NC, SLU_Z, SLU_GE);
L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
if ( !(perm_r = intMalloc(*n)) ) ABORT("Malloc fails for perm_r[].");
@@ -88,7 +90,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
* permc_spec = 2: minimum degree on structure of A'+A
* permc_spec = 3: approximate minimum degree for unsymmetric matrices
*/
- permc_spec = 3;
+ permc_spec = options.ColPerm;
get_perm_c(permc_spec, &A, perm_c);
sp_preorder(&options, &A, perm_c, etree, &AC);
@@ -96,7 +98,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
panel_size = sp_ienv(1);
relax = sp_ienv(2);
- dgstrf(&options, &AC, drop_tol, relax, panel_size,
+ zgstrf(&options, &AC, drop_tol, relax, panel_size,
etree, NULL, 0, perm_c, perm_r, L, U, &stat, info);
if ( *info == 0 ) {
@@ -105,14 +107,14 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
- dQuerySpace(L, U, &mem_usage);
+ zQuerySpace(L, U, &mem_usage);
printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
mem_usage.expansions);
} else {
- printf("dgstrf() error returns INFO= %d\n", *info);
+ printf("zgstrf() error returns INFO= %d\n", *info);
if ( *info <= *n ) { /* factorization completes */
- dQuerySpace(L, U, &mem_usage);
+ zQuerySpace(L, U, &mem_usage);
printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
mem_usage.expansions);
@@ -148,10 +150,10 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
perm_c = LUfactors->perm_c;
perm_r = LUfactors->perm_r;
- dCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_D, SLU_GE);
+ zCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_Z, SLU_GE);
/* Solve the system A*X=B, overwriting B with X. */
- dgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info);
+ zgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info);
Destroy_SuperMatrix_Store(&B);
StatFree(&stat);
@@ -167,7 +169,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
SUPERLU_FREE (LUfactors->U);
SUPERLU_FREE (LUfactors);
} else {
- fprintf(stderr,"Invalid iopt=%d passed to c_fortran_dgssv()\n",*iopt);
+ fprintf(stderr,"Invalid iopt=%d passed to c_fortran_zgssv()\n",*iopt);
exit(-1);
}
}
diff --git a/FORTRAN/f77_main.f b/FORTRAN/f77_main.f
index 28efb22..997f88e 100644
--- a/FORTRAN/f77_main.f
+++ b/FORTRAN/f77_main.f
@@ -3,8 +3,8 @@
parameter ( maxn = 10000, maxnz = 100000 )
integer rowind(maxnz), colptr(maxn)
real*8 values(maxnz), b(maxn)
- integer n, nnz, nrhs, ldb, info
- integer factors, iopt
+ integer n, nnz, nrhs, ldb, info, iopt
+ integer*8 factors
*
call hbcode1(n, n, nnz, values, rowind, colptr)
*
diff --git a/FORTRAN/f77exm.out b/FORTRAN/f77exm.out
new file mode 100644
index 0000000..186fa25
--- /dev/null
+++ b/FORTRAN/f77exm.out
@@ -0,0 +1,9 @@
+No of nonzeros in factor L = 835
+No of nonzeros in factor U = 978
+No of nonzeros in L+U = 1813
+L\U MB 0.020 total MB needed 0.040 expansions 0
+ Factorization succeeded
+ Solve succeeded
+ 188.45681574593 133.96798695468 -470.23879928609 -278.80339526911
+ 19.917307361526 272.77268232866 -247.80663474720 -313.99765880983
+ -91.277211882061 99.759496460021
diff --git a/INSTALL/Makefile b/INSTALL/Makefile
index a8f77ec..a75e370 100644
--- a/INSTALL/Makefile
+++ b/INSTALL/Makefile
@@ -3,13 +3,13 @@ include ../make.inc
all: testdlamch testslamch testtimer install.out
testdlamch: dlamch.o lsame.o dlamchtst.o
- $(LOADER) -o testdlamch dlamch.o lsame.o dlamchtst.o
+ $(LOADER) $(LOADOPTS) -o testdlamch dlamch.o lsame.o dlamchtst.o
testslamch: slamch.o lsame.o slamchtst.o
- $(LOADER) -o testslamch slamch.o lsame.o slamchtst.o
+ $(LOADER) $(LOADOPTS) -o testslamch slamch.o lsame.o slamchtst.o
testtimer: superlu_timer.o timertst.o
- $(LOADER) -o testtimer superlu_timer.o timertst.o
+ $(LOADER) $(LOADOPTS) -o testtimer superlu_timer.o timertst.o
install.out: install.csh
@echo Testing machines parameters and timer
diff --git a/MAKE_INC/make.alpha b/MAKE_INC/make.alpha
index f354d12..a19aa2c 100644
--- a/MAKE_INC/make.alpha
+++ b/MAKE_INC/make.alpha
@@ -21,8 +21,8 @@ PLAT = _alpha
#
# The name of the libraries to be created/linked to
#
-TMGLIB = tmglib$(PLAT).a
-SUPERLULIB = superlu$(PLAT).a
+TMGLIB = libtmglib.a
+SUPERLULIB =lib superlu_3.0.a
BLASDEF = -DUSE_VENDOR_BLAS
BLASLIB = -ldxml
diff --git a/MAKE_INC/make.cray b/MAKE_INC/make.cray
index d8841ed..e2d5846 100644
--- a/MAKE_INC/make.cray
+++ b/MAKE_INC/make.cray
@@ -19,8 +19,8 @@ PLAT = _cray
#
# The name of the libraries to be created/linked to
#
-TMGLIB = tmglib$(PLAT).a
-SUPERLULIB = superlu$(PLAT).a
+TMGLIB = libtmglib.a
+SUPERLULIB = libsuperlu_3.0.a
#
#
BLASDEF = -DUSE_VENDOR_BLAS
diff --git a/MAKE_INC/make.hppa b/MAKE_INC/make.hppa
index 04e9d34..c7c1484 100644
--- a/MAKE_INC/make.hppa
+++ b/MAKE_INC/make.hppa
@@ -21,8 +21,8 @@ PLAT = _hppa
#
# The name of the libraries to be created/linked to
#
-TMGLIB = tmglib$(PLAT).a
-SUPERLULIB = superlu$(PLAT).a
+TMGLIB = libtmglib.a
+SUPERLULIB = libsuperlu_3.0.a
BLASDEF = -DUSE_VENDOR_BLAS
BLASLIB = -lblas -lcl
diff --git a/MAKE_INC/make.inc b/MAKE_INC/make.inc
index 83ffb02..707a52c 100644
--- a/MAKE_INC/make.inc
+++ b/MAKE_INC/make.inc
@@ -21,9 +21,9 @@ PLAT = _linux
#
# The name of the libraries to be created/linked to
#
-TMGLIB = tmglib$(PLAT).a
-SUPERLULIB = superlu$(PLAT).a
-BLASLIB = ../blas$(PLAT).a
+TMGLIB = libtmglib.a
+SUPERLULIB = libsuperlu_3.0.a
+BLASLIB = ../libblas.a
#
# The archiver and the flag(s) to use when building archive (library)
diff --git a/MAKE_INC/make.linux b/MAKE_INC/make.linux
index 0d17a92..50bdda9 100644
--- a/MAKE_INC/make.linux
+++ b/MAKE_INC/make.linux
@@ -21,9 +21,9 @@ PLAT = _linux
#
# The name of the libraries to be created/linked to
#
-TMGLIB = tmglib$(PLAT).a
-SUPERLULIB = superlu$(PLAT).a
-BLASLIB = ../blas$(PLAT).a
+TMGLIB = libtmglib.a
+SUPERLULIB = libsuperlu_3.0.a
+BLASLIB = ../libblas.a
#
# The archiver and the flag(s) to use when building archive (library)
diff --git a/MAKE_INC/make.rs6k b/MAKE_INC/make.rs6k
index 389b9cc..f68151c 100644
--- a/MAKE_INC/make.rs6k
+++ b/MAKE_INC/make.rs6k
@@ -21,8 +21,8 @@ PLAT = _rs6k
#
# The name of the libraries to be created/linked to
#
-TMGLIB = tmglib$(PLAT).a
-SUPERLULIB = superlu$(PLAT).a
+TMGLIB = libtmglib.a
+SUPERLULIB = libsuperlu_3.0.a
#
# If you don't have ESSL, you can use the following blaslib instead:
# BLASLIB = -lblas -lxlf -lxlf90
diff --git a/MAKE_INC/make.sgi b/MAKE_INC/make.sgi
index 606b5aa..61e2197 100644
--- a/MAKE_INC/make.sgi
+++ b/MAKE_INC/make.sgi
@@ -21,9 +21,9 @@ PLAT = _sgi
#
# The name of the libraries to be created/linked to
#
-TMGLIB = tmglib$(PLAT).a
-SUPERLULIB = superlu$(PLAT).a
-BLASLIB = ../blas$(PLAT).a
+TMGLIB = libtmglib.a
+SUPERLULIB = libsuperlu_3.0.a
+BLASLIB = ../libblas.a
#
# The archiver and the flag(s) to use when building archive (library)
diff --git a/MAKE_INC/make.solaris b/MAKE_INC/make.solaris
index feaad46..2f864b3 100644
--- a/MAKE_INC/make.solaris
+++ b/MAKE_INC/make.solaris
@@ -21,9 +21,9 @@ PLAT = _solaris
#
# The name of the libraries to be created/linked to
#
-TMGLIB = tmglib$(PLAT).a
-SUPERLULIB = superlu$(PLAT).a
-BLASLIB = ../blas$(PLAT).a
+TMGLIB = libtmglib.a
+SUPERLULIB = libsuperlu_3.0.a
+BLASLIB = ../libblas.a
#
# The archiver and the flag(s) to use when building archive (library)
@@ -48,4 +48,4 @@ CDEFS = -DAdd_
#
# The directory in which Matlab is installed
#
-MATLAB = /usr/sww/matlab
+MATLAB = /usr/sww/pkg/matlab
diff --git a/MAKE_INC/make.sp b/MAKE_INC/make.sp
index 03417e3..a6d08f7 100644
--- a/MAKE_INC/make.sp
+++ b/MAKE_INC/make.sp
@@ -21,8 +21,8 @@ PLAT = _sp
#
# The name of the libraries to be created/linked to
#
-TMGLIB = tmglib$(PLAT).a
-SUPERLULIB = superlu$(PLAT).a
+TMGLIB = libtmglib.a
+SUPERLULIB = libsuperlu_3.0.a
#
# If you don't have ESSL, you can use the following blaslib instead:
# BLASLIB = -lblas -lxlf -lxlf90
diff --git a/MAKE_INC/make.sun4 b/MAKE_INC/make.sun4
index 7ed5661..b15fa2b 100644
--- a/MAKE_INC/make.sun4
+++ b/MAKE_INC/make.sun4
@@ -21,9 +21,9 @@ PLAT = _sun4
#
# The name of the libraries to be created/linked to
#
-TMGLIB = tmglib$(PLAT).a
-SUPERLULIB = superlu$(PLAT).a
-BLASLIB = ../blas$(PLAT).a
+TMGLIB = libtmglib.a
+SUPERLULIB = libsuperlu_3.0.a
+BLASLIB = ../libblas.a
#
# The archiver and the flag(s) to use when building archive (library)
@@ -50,7 +50,7 @@ CDEFS = -DAdd_
#
# The directory in which Matlab is installed
#
-MATLAB = /usr/sww/matlab
+MATLAB = /usr/sww/pkg/matlab
diff --git a/MATLAB/mexlusolve.c b/MATLAB/mexlusolve.c
index f7ceeb2..6db98a5 100644
--- a/MATLAB/mexlusolve.c
+++ b/MATLAB/mexlusolve.c
@@ -7,7 +7,7 @@
*/
#include <stdio.h>
#include "mex.h"
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
#ifdef V5
#define MatlabMatrix mxArray
diff --git a/MATLAB/mexsuperlu.c b/MATLAB/mexsuperlu.c
index 0da4a9e..90d4a96 100644
--- a/MATLAB/mexsuperlu.c
+++ b/MATLAB/mexsuperlu.c
@@ -7,7 +7,7 @@
*/
#include <stdio.h>
#include "mex.h"
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
#ifdef V5
diff --git a/README b/README
index f0f010f..102c476 100644
--- a/README
+++ b/README
@@ -107,7 +107,7 @@ on your system setup:
to make the BLAS library from the routines in the CBLAS/ subdirectory.
3. C preprocessor definition CDEFS.
- In the header file SRC/Cnames.h, we use macros to determine how
+ In the header file SRC/slu_Cnames.h, we use macros to determine how
C routines should be named so that they are callable by Fortran.
(Some vendor-supplied BLAS libraries do not have C interface. So the
re-naming is needed in order for the SuperLU BLAS calls (in C) to
diff --git a/SRC/Makefile b/SRC/Makefile
index 30840c0..83200a7 100644
--- a/SRC/Makefile
+++ b/SRC/Makefile
@@ -5,9 +5,14 @@ include ../make.inc
# This is the makefile to create a library for SuperLU.
# The files are organized as follows:
#
-# ALLAUX -- Auxiliary routines called from all precisions
-# SCLAUX -- Auxiliary routines called from both real and complex
-# DZLAUX -- Auxiliary routines called from both double precision
+# ALLAUX -- Auxiliary routines called from all precisions of SuperLU
+# LAAUX -- LAPACK auxiliary routines called from all precisions
+# SLASRC -- LAPACK single precision real routines
+# DLASRC -- LAPACK double precision real routines
+# CLASRC -- LAPACK single precision complex routines
+# ZLASRC -- LAPACK double precision complex routines
+# SCLAUX -- LAPACK Auxiliary routines called from both real and complex
+# DZLAUX -- LAPACK Auxiliary routines called from both double precision
# and complex*16
# SLUSRC -- Single precision real SuperLU routines
# DLUSRC -- Double precision real SuperLU routines
@@ -32,17 +37,23 @@ include ../make.inc
#
#######################################################################
-ALLAUX = superlu_timer.o lsame.o util.o memory.o get_perm_c.o mmd.o \
- sp_coletree.o sp_preorder.o sp_ienv.o relax_snode.o heap_relax_snode.o \
- xerbla.o colamd.o
+### LAPACK
+LAAUX = lsame.o xerbla.o
+SLASRC = slacon.o
+DLASRC = dlacon.o
+CLASRC = clacon.o scsum1.o icmax1.o
+ZLASRC = zlacon.o dzsum1.o izmax1.o
+SCLAUX = slamch.o
+DZLAUX = dlamch.o
-SCLAUX = slamch.o
-
-DZLAUX = dlamch.o
+### SuperLU
+ALLAUX = superlu_timer.o util.o memory.o get_perm_c.o mmd.o \
+ sp_coletree.o sp_preorder.o sp_ienv.o relax_snode.o \
+ heap_relax_snode.o colamd.o
SLUSRC = \
sgssv.o sgssvx.o \
- ssp_blas2.o ssp_blas3.o sgscon.o slacon.o \
+ ssp_blas2.o ssp_blas3.o sgscon.o \
slangs.o sgsequ.o slaqgs.o spivotgrowth.o \
sgsrfs.o sgstrf.o sgstrs.o scopy_to_ucol.o \
ssnode_dfs.o ssnode_bmod.o \
@@ -52,7 +63,7 @@ SLUSRC = \
DLUSRC = \
dgssv.o dgssvx.o \
- dsp_blas2.o dsp_blas3.o dgscon.o dlacon.o \
+ dsp_blas2.o dsp_blas3.o dgscon.o \
dlangs.o dgsequ.o dlaqgs.o dpivotgrowth.o \
dgsrfs.o dgstrf.o dgstrs.o dcopy_to_ucol.o \
dsnode_dfs.o dsnode_bmod.o \
@@ -61,9 +72,7 @@ DLUSRC = \
dmemory.o dutil.o dmyblas2.o
CLUSRC = \
- scomplex.o scsum1.o icmax1.o \
- cgssv.o cgssvx.o \
- csp_blas2.o csp_blas3.o cgscon.o clacon.o \
+ scomplex.o cgssv.o cgssvx.o csp_blas2.o csp_blas3.o cgscon.o \
clangs.o cgsequ.o claqgs.o cpivotgrowth.o \
cgsrfs.o cgstrf.o cgstrs.o ccopy_to_ucol.o \
csnode_dfs.o csnode_bmod.o \
@@ -72,9 +81,7 @@ CLUSRC = \
cmemory.o cutil.o cmyblas2.o
ZLUSRC = \
- dcomplex.o dzsum1.o izmax1.o \
- zgssv.o zgssvx.o \
- zsp_blas2.o zsp_blas3.o zgscon.o zlacon.o \
+ dcomplex.o zgssv.o zgssvx.o zsp_blas2.o zsp_blas3.o zgscon.o \
zlangs.o zgsequ.o zlaqgs.o zpivotgrowth.o \
zgsrfs.o zgstrf.o zgstrs.o zcopy_to_ucol.o \
zsnode_dfs.o zsnode_bmod.o \
@@ -84,32 +91,37 @@ ZLUSRC = \
all: single double complex complex16
-single: $(SLUSRC) $(ALLAUX) $(SCLAUX)
- $(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) $(SLUSRC) $(ALLAUX) $(SCLAUX)
+single: $(SLUSRC) $(ALLAUX) $(LAAUX) $(SLASRC) $(SCLAUX)
+ $(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) \
+ $(SLUSRC) $(ALLAUX) $(LAAUX) $(SLASRC) $(SCLAUX)
$(RANLIB) ../$(SUPERLULIB)
-double: $(DLUSRC) $(ALLAUX) $(DZLAUX)
- $(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) $(DLUSRC) $(ALLAUX) $(DZLAUX)
+double: $(DLUSRC) $(ALLAUX) $(LAAUX) $(DLASRC) $(DZLAUX)
+ $(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) \
+ $(DLUSRC) $(ALLAUX) $(LAAUX) $(DLASRC) $(DZLAUX)
$(RANLIB) ../$(SUPERLULIB)
-complex: $(CLUSRC) $(ALLAUX) $(SCLAUX)
- $(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) $(CLUSRC) $(ALLAUX) $(SCLAUX)
+complex: $(CLUSRC) $(ALLAUX) $(LAAUX) $(CLASRC) $(SCLAUX)
+ $(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) \
+ $(CLUSRC) $(ALLAUX) $(LAAUX) $(CLASRC) $(SCLAUX)
$(RANLIB) ../$(SUPERLULIB)
-complex16: $(ZLUSRC) $(ALLAUX) $(DZLAUX)
- $(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) $(ZLUSRC) $(ALLAUX) $(DZLAUX)
+complex16: $(ZLUSRC) $(ALLAUX) $(LAAUX) $(ZLASRC) $(DZLAUX)
+ $(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) \
+ $(ZLUSRC) $(ALLAUX) $(LAAUX) $(ZLASRC) $(DZLAUX)
$(RANLIB) ../$(SUPERLULIB)
##################################
# Do not optimize these routines #
##################################
-slamch.o: slamch.c ; $(CC) -c $(NOOPTS) $<
-dlamch.o: dlamch.c ; $(CC) -c $(NOOPTS) $<
+slamch.o: slamch.c ; $(CC) -c $(NOOPTS) $(CDEFS) $<
+dlamch.o: dlamch.c ; $(CC) -c $(NOOPTS) $(CDEFS) $<
superlu_timer.o: superlu_timer.c ; $(CC) -c $(NOOPTS) $<
+##################################
.c.o:
$(CC) $(CFLAGS) $(CDEFS) $(BLASDEF) -c $< $(VERBOSE)
clean:
- rm -f *.o ../superlu$(PLAT).a
+ rm -f *.o ../libsuperlu_3.0.a
diff --git a/SRC/ccolumn_bmod.c b/SRC/ccolumn_bmod.c
index 730f04a..72ae5be 100644
--- a/SRC/ccolumn_bmod.c
+++ b/SRC/ccolumn_bmod.c
@@ -21,7 +21,7 @@
#include <stdio.h>
#include <stdlib.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
/*
* Function prototypes
diff --git a/SRC/ccolumn_dfs.c b/SRC/ccolumn_dfs.c
index d2c65fb..10f0fb6 100644
--- a/SRC/ccolumn_dfs.c
+++ b/SRC/ccolumn_dfs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "csp_defs.h"
+#include "slu_cdefs.h"
/* What type of supernodes we want */
#define T2_SUPER
diff --git a/SRC/ccopy_to_ucol.c b/SRC/ccopy_to_ucol.c
index 0c7a969..a0972fa 100644
--- a/SRC/ccopy_to_ucol.c
+++ b/SRC/ccopy_to_ucol.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
int
ccopy_to_ucol(
diff --git a/SRC/cgscon.c b/SRC/cgscon.c
index b3f5c99..ee8bb4b 100644
--- a/SRC/cgscon.c
+++ b/SRC/cgscon.c
@@ -11,7 +11,7 @@
* History: Modified from lapack routines CGECON.
*/
#include <math.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
void
cgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/cgsequ.c b/SRC/cgsequ.c
index 10420cb..77a4961 100644
--- a/SRC/cgsequ.c
+++ b/SRC/cgsequ.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
* History: Modified from LAPACK routine CGEEQU
*/
#include <math.h>
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
void
cgsequ(SuperMatrix *A, float *r, float *c, float *rowcnd,
diff --git a/SRC/cgsrfs.c b/SRC/cgsrfs.c
index a2d2e89..68568cf 100644
--- a/SRC/cgsrfs.c
+++ b/SRC/cgsrfs.c
@@ -11,7 +11,7 @@
* History: Modified from lapack routine CGERFS
*/
#include <math.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
void
cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/cgssv.c b/SRC/cgssv.c
index ba745ce..d0ecf19 100644
--- a/SRC/cgssv.c
+++ b/SRC/cgssv.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -7,7 +6,7 @@
* October 15, 2003
*
*/
-#include "csp_defs.h"
+#include "slu_cdefs.h"
void
cgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
diff --git a/SRC/cgssvx.c b/SRC/cgssvx.c
index c678d76..36e6fdb 100644
--- a/SRC/cgssvx.c
+++ b/SRC/cgssvx.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "csp_defs.h"
+#include "slu_cdefs.h"
void
cgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
@@ -455,7 +455,7 @@ printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
Astore->nzval, Astore->colind, Astore->rowptr,
SLU_NC, A->Dtype, A->Mtype);
if ( notran ) { /* Reverse the transpose argument. */
- trant = CONJ;
+ trant = TRANS;
notran = 0;
} else {
trant = NOTRANS;
diff --git a/SRC/cgstrf.c b/SRC/cgstrf.c
index f64de34..65700ce 100644
--- a/SRC/cgstrf.c
+++ b/SRC/cgstrf.c
@@ -19,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "csp_defs.h"
+#include "slu_cdefs.h"
void
cgstrf (superlu_options_t *options, SuperMatrix *A, float drop_tol,
@@ -182,8 +182,8 @@ cgstrf (superlu_options_t *options, SuperMatrix *A, float drop_tol,
*/
/* Local working arrays */
NCPformat *Astore;
- int *iperm_r; /* inverse of perm_r;
- used when options->Fact == SamePattern_SameRowPerm */
+ int *iperm_r = NULL; /* inverse of perm_r; used when
+ options->Fact == SamePattern_SameRowPerm */
int *iperm_c; /* inverse of perm_c */
int *iwork;
complex *cwork;
diff --git a/SRC/cgstrs.c b/SRC/cgstrs.c
index dd3b1a1..270a343 100644
--- a/SRC/cgstrs.c
+++ b/SRC/cgstrs.c
@@ -19,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "csp_defs.h"
+#include "slu_cdefs.h"
/*
diff --git a/SRC/cgstrs.c.bak b/SRC/cgstrs.c.bak
deleted file mode 100644
index e609d3c..0000000
--- a/SRC/cgstrs.c.bak
+++ /dev/null
@@ -1,339 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
- Copyright (c) 1994 by Xerox Corporation. All rights reserved.
-
- THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
- EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
-
- Permission is hereby granted to use or copy this program for any
- purpose, provided the above notices are retained on all copies.
- Permission to modify the code and to distribute modified code is
- granted, provided the above notices are retained, and a notice that
- the code was modified is included with the above copyright notice.
-*/
-
-#include "csp_defs.h"
-
-
-/*
- * Function prototypes
- */
-void cusolve(int, int, complex*, complex*);
-void clsolve(int, int, complex*, complex*);
-void cmatvec(int, int, int, complex*, complex*, complex*);
-
-
-void
-cgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
- int *perm_c, int *perm_r, SuperMatrix *B,
- SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * CGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- * CGSTRF.
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans (input) trans_t
- * Specifies the form of the system of equations:
- * = NOTRANS: A * X = B (No transpose)
- * = TRANS: A'* X = B (Transpose)
- * = CONJ: A**H * X = B (Conjugate transpose)
- *
- * L (input) SuperMatrix*
- * The factor L from the factorization Pr*A*Pc=L*U as computed by
- * cgstrf(). Use compressed row subscripts storage for supernodes,
- * i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
- *
- * U (input) SuperMatrix*
- * The factor U from the factorization Pr*A*Pc=L*U as computed by
- * cgstrf(). Use column-wise storage scheme, i.e., U has types:
- * Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
- *
- * perm_c (input) int*, dimension (L->ncol)
- * Column permutation vector, which defines the
- * permutation matrix Pc; perm_c[i] = j means column i of A is
- * in position j in A*Pc.
- *
- * perm_r (input) int*, dimension (L->nrow)
- * Row permutation vector, which defines the permutation matrix Pr;
- * perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B (input/output) SuperMatrix*
- * B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
- * On entry, the right hand side matrix.
- * On exit, the solution matrix if info = 0;
- *
- * stat (output) SuperLUStat_t*
- * Record the statistics on runtime and floating-point operation count.
- * See util.h for the definition of 'SuperLUStat_t'.
- *
- * info (output) int*
- * = 0: successful exit
- * < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
- _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-#endif
- int incx = 1, incy = 1;
-#ifdef USE_VENDOR_BLAS
- complex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
- complex *work_col;
-#endif
- complex temp_comp;
- DNformat *Bstore;
- complex *Bmat;
- SCformat *Lstore;
- NCformat *Ustore;
- complex *Lval, *Uval;
- int fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
- int i, j, k, iptr, jcol, n, ldb, nrhs;
- complex *work, *rhs_work, *soln;
- flops_t solve_ops;
- void cprint_soln();
-
- /* Test input parameters ... */
- *info = 0;
- Bstore = B->Store;
- ldb = Bstore->lda;
- nrhs = B->ncol;
- if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
- else if ( L->nrow != L->ncol || L->nrow < 0 ||
- L->Stype != SLU_SC || L->Dtype != SLU_C || L->Mtype != SLU_TRLU )
- *info = -2;
- else if ( U->nrow != U->ncol || U->nrow < 0 ||
- U->Stype != SLU_NC || U->Dtype != SLU_C || U->Mtype != SLU_TRU )
- *info = -3;
- else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
- B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE )
- *info = -6;
- if ( *info ) {
- i = -(*info);
- xerbla_("cgstrs", &i);
- return;
- }
-
- n = L->nrow;
- work = complexCalloc(n * nrhs);
- if ( !work ) ABORT("Malloc fails for local work[].");
- soln = complexMalloc(n);
- if ( !soln ) ABORT("Malloc fails for local soln[].");
-
- Bmat = Bstore->nzval;
- Lstore = L->Store;
- Lval = Lstore->nzval;
- Ustore = U->Store;
- Uval = Ustore->nzval;
- solve_ops = 0;
-
- if ( trans == NOTRANS ) {
- /* Permute right hand sides to form Pr*B */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- /* Forward solve PLy=Pb. */
- for (k = 0; k <= Lstore->nsuper; k++) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- nrow = nsupr - nsupc;
-
- solve_ops += 4 * nsupc * (nsupc - 1) * nrhs;
- solve_ops += 8 * nrow * nsupc * nrhs;
-
- if ( nsupc == 1 ) {
- for (j = 0; j < nrhs; j++) {
- rhs_work = &Bmat[j*ldb];
- luptr = L_NZ_START(fsupc);
- for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
- irow = L_SUB(iptr);
- ++luptr;
- cc_mult(&temp_comp, &rhs_work[fsupc], &Lval[luptr]);
- c_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
- }
- }
- } else {
- luptr = L_NZ_START(fsupc);
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- ftcs1 = _cptofcd("L", strlen("L"));
- ftcs2 = _cptofcd("N", strlen("N"));
- ftcs3 = _cptofcd("U", strlen("U"));
- CTRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-
- CGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha,
- &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
- &beta, &work[0], &n );
-#else
- ctrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-
- cgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha,
- &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
- &beta, &work[0], &n );
-#endif
- for (j = 0; j < nrhs; j++) {
- rhs_work = &Bmat[j*ldb];
- work_col = &work[j*n];
- iptr = istart + nsupc;
- for (i = 0; i < nrow; i++) {
- irow = L_SUB(iptr);
- c_sub(&rhs_work[irow], &rhs_work[irow], &work_col[i]);
- work_col[i].r = 0.0;
- work_col[i].i = 0.0;
- iptr++;
- }
- }
-#else
- for (j = 0; j < nrhs; j++) {
- rhs_work = &Bmat[j*ldb];
- clsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
- cmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
- &rhs_work[fsupc], &work[0] );
-
- iptr = istart + nsupc;
- for (i = 0; i < nrow; i++) {
- irow = L_SUB(iptr);
- c_sub(&rhs_work[irow], &rhs_work[irow], &work[i]);
- work[i].r = 0.;
- work[i].i = 0.;
- iptr++;
- }
- }
-#endif
- } /* else ... */
- } /* for L-solve */
-
-#ifdef DEBUG
- printf("After L-solve: y=\n");
- cprint_soln(n, nrhs, Bmat);
-#endif
-
- /*
- * Back solve Ux=y.
- */
- for (k = Lstore->nsuper; k >= 0; k--) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- solve_ops += 4 * nsupc * (nsupc + 1) * nrhs;
-
- if ( nsupc == 1 ) {
- rhs_work = &Bmat[0];
- for (j = 0; j < nrhs; j++) {
- c_div(&rhs_work[fsupc], &rhs_work[fsupc], &Lval[luptr]);
- rhs_work += ldb;
- }
- } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- ftcs1 = _cptofcd("L", strlen("L"));
- ftcs2 = _cptofcd("U", strlen("U"));
- ftcs3 = _cptofcd("N", strlen("N"));
- CTRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#else
- ctrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#endif
-#else
- for (j = 0; j < nrhs; j++)
- cusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-#endif
- }
-
- for (j = 0; j < nrhs; ++j) {
- rhs_work = &Bmat[j*ldb];
- for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
- solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
- for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
- irow = U_SUB(i);
- cc_mult(&temp_comp, &rhs_work[jcol], &Uval[i]);
- c_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
- }
- }
- }
-
- } /* for U-solve */
-
-#ifdef DEBUG
- printf("After U-solve: x=\n");
- cprint_soln(n, nrhs, Bmat);
-#endif
-
- /* Compute the final solution X := Pc*X. */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- stat->ops[SOLVE] = solve_ops;
-
- } else { /* Solve A'*X=B */
- /* Permute right hand sides to form Pc'*B. */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- stat->ops[SOLVE] = 0;
-
- for (k = 0; k < nrhs; ++k) {
-
- /* Multiply by inv(U'). */
- sp_ctrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-
- /* Multiply by inv(L'). */
- sp_ctrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-
- }
-
- /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- }
-
- SUPERLU_FREE(work);
- SUPERLU_FREE(soln);
-}
-
-/*
- * Diagnostic print of the solution vector
- */
-void
-cprint_soln(int n, int nrhs, complex *soln)
-{
- int i;
-
- for (i = 0; i < n; i++)
- printf("\t%d: %.4f\n", i, soln[i]);
-}
diff --git a/SRC/clacon.c b/SRC/clacon.c
index ada4b61..704f1bf 100644
--- a/SRC/clacon.c
+++ b/SRC/clacon.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,8 @@
*
*/
#include <math.h>
-#include "Cnames.h"
-#include "scomplex.h"
+#include "slu_Cnames.h"
+#include "slu_scomplex.h"
int
clacon_(int *n, complex *v, complex *x, float *est, int *kase)
diff --git a/SRC/clangs.c b/SRC/clangs.c
index 612bf52..de7f91f 100644
--- a/SRC/clangs.c
+++ b/SRC/clangs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
* History: Modified from lapack routine CLANGE
*/
#include <math.h>
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
float clangs(char *norm, SuperMatrix *A)
{
diff --git a/SRC/claqgs.c b/SRC/claqgs.c
index 9347a03..377b501 100644
--- a/SRC/claqgs.c
+++ b/SRC/claqgs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
* History: Modified from LAPACK routine CLAQGE
*/
#include <math.h>
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
void
claqgs(SuperMatrix *A, float *r, float *c,
diff --git a/SRC/cmemory.c b/SRC/cmemory.c
index 04185e7..d50f58d 100644
--- a/SRC/cmemory.c
+++ b/SRC/cmemory.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "csp_defs.h"
+#include "slu_cdefs.h"
/* Constants */
#define NO_MEMTYPE 4 /* 0: lusup;
@@ -193,9 +193,10 @@ cLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
cSetupSpace(work, lwork, &Glu->MemModel);
}
-#ifdef DEBUG
- printf("cLUMemInit() called: annz %d, MemModel %d\n",
- annz, Glu->MemModel);
+#if ( PRNTlevel >= 1 )
+ printf("cLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n",
+ FILL, nzlmax, nzumax);
+ fflush(stdout);
#endif
/* Integer pointers for L\U factors */
@@ -234,6 +235,11 @@ cLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
printf("Not enough memory to perform factorization.\n");
return (cmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
}
+#if ( PRNTlevel >= 1)
+ printf("cLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n",
+ nzlmax, nzumax);
+ fflush(stdout);
+#endif
lusup = (complex *) cexpand( &nzlumax, LUSUP, 0, 0, Glu );
ucol = (complex *) cexpand( &nzumax, UCOL, 0, 0, Glu );
lsub = (int *) cexpand( &nzlmax, LSUB, 0, 0, Glu );
@@ -476,8 +482,7 @@ void
else lword = sizeof(complex);
if ( Glu->MemModel == SYSTEM ) {
- new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
-/* new_mem = (void *) calloc(new_len, lword); */
+ new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
if ( no_expand != 0 ) {
tries = 0;
if ( keep_prev ) {
@@ -487,8 +492,7 @@ void
if ( ++tries > 10 ) return (NULL);
alpha = Reduce(alpha);
new_len = alpha * *prev_len;
- new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
-/* new_mem = (void *) calloc(new_len, lword); */
+ new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
}
}
if ( type == LSUB || type == USUB ) {
@@ -641,7 +645,7 @@ callocateA(int n, int nnz, complex **a, int **asub, int **xa)
complex *complexMalloc(int n)
{
complex *buf;
- buf = (complex *) SUPERLU_MALLOC(n * sizeof(complex));
+ buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex));
if ( !buf ) {
ABORT("SUPERLU_MALLOC failed for buf in complexMalloc()\n");
}
@@ -653,7 +657,7 @@ complex *complexCalloc(int n)
complex *buf;
register int i;
complex zero = {0.0, 0.0};
- buf = (complex *) SUPERLU_MALLOC(n * sizeof(complex));
+ buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex));
if ( !buf ) {
ABORT("SUPERLU_MALLOC failed for buf in complexCalloc()\n");
}
diff --git a/SRC/cmyblas2.c b/SRC/cmyblas2.c
index 74fdbca..5998f87 100644
--- a/SRC/cmyblas2.c
+++ b/SRC/cmyblas2.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -14,7 +13,7 @@
* Note:
* This is only used when the system lacks an efficient BLAS library.
*/
-#include "scomplex.h"
+#include "slu_scomplex.h"
/*
* Solves a dense UNIT lower triangular system. The unit lower
diff --git a/SRC/colamd.c b/SRC/colamd.c
index b60718f..dc531f0 100644
--- a/SRC/colamd.c
+++ b/SRC/colamd.c
@@ -1,9 +1,15 @@
/* ========================================================================== */
-/* === colamd - a sparse matrix column ordering algorithm =================== */
+/* === colamd/symamd - a sparse matrix column ordering algorithm ============ */
/* ========================================================================== */
/*
- colamd: An approximate minimum degree column ordering algorithm.
+ colamd: an approximate minimum degree column ordering algorithm,
+ for LU factorization of symmetric or unsymmetric matrices,
+ QR factorization, least squares, interior point methods for
+ linear programming problems, and other related problems.
+
+ symamd: an approximate minimum degree ordering algorithm for Cholesky
+ factorization of symmetric matrices.
Purpose:
@@ -14,12 +20,16 @@
factorization, and P is computed during numerical factorization via
conventional partial pivoting with row interchanges. Colamd is the
column ordering method used in SuperLU, part of the ScaLAPACK library.
- It is also available as user-contributed software for Matlab 5.2,
+ It is also available as built-in function in MATLAB Version 6,
available from MathWorks, Inc. (http://www.mathworks.com). This
- routine can be used in place of COLMMD in Matlab. By default, the \
- and / operators in Matlab perform a column ordering (using COLMMD)
- prior to LU factorization using sparse partial pivoting, in the
- built-in Matlab LU(A) routine.
+ routine can be used in place of colmmd in MATLAB.
+
+ Symamd computes a permutation P of a symmetric matrix A such that the
+ Cholesky factorization of PAP' has less fill-in and requires fewer
+ floating point operations than A. Symamd constructs a matrix M such
+ that M'M has the same nonzero pattern of A, and then orders the columns
+ of M using colmmd. The column ordering of M is then returned as the
+ row and column ordering P of A.
Authors:
@@ -30,45 +40,39 @@
Date:
- August 3, 1998. Version 1.0.
+ September 8, 2003. Version 2.3.
Acknowledgements:
This work was supported by the National Science Foundation, under
grants DMS-9504974 and DMS-9803599.
- Notice:
+ Copyright and License:
- Copyright (c) 1998 by the University of Florida. All Rights Reserved.
+ Copyright (c) 1998-2003 by the University of Florida.
+ All Rights Reserved.
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
- Permission is hereby granted to use or copy this program for any
- purpose, provided the above notices are retained on all copies.
- User documentation of any code that uses this code must cite the
- Authors, the Copyright, and "Used by permission." If this code is
- accessible from within Matlab, then typing "help colamd" or "colamd"
- (with no arguments) must cite the Authors. Permission to modify the
- code and to distribute modified code is granted, provided the above
- notices are retained, and a notice that the code was modified is
- included with the above copyright notice. You must also retain the
- Availability information below, of the original version.
-
- This software is provided free of charge.
+ Permission is hereby granted to use, copy, modify, and/or distribute
+ this program, provided that the Copyright, this License, and the
+ Availability of the original version is retained on all copies and made
+ accessible to the end-user of any code or package that includes COLAMD
+ or any modified version of COLAMD.
Availability:
- This file is located at
+ The colamd/symamd library is available at
- http://www.cise.ufl.edu/~davis/colamd/colamd.c
+ http://www.cise.ufl.edu/research/sparse/colamd/
+
+ This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.c
+ file. It requires the colamd.h file. It is required by the colamdmex.c
+ and symamdmex.c files, for the MATLAB interface to colamd and symamd.
+
+ See the ChangeLog file for changes since Version 1.0.
- The colamd.h file is required, located in the same directory.
- The colamdmex.c file provides a Matlab interface for colamd.
- The symamdmex.c file provides a Matlab interface for symamd, which is
- a symmetric ordering based on this code, colamd.c. All codes are
- purely ANSI C compliant (they use no Unix-specific routines, include
- files, etc.).
*/
/* ========================================================================== */
@@ -76,66 +80,86 @@
/* ========================================================================== */
/*
- Each user-callable routine (declared as PUBLIC) is briefly described below.
- Refer to the comments preceding each routine for more details.
-
----------------------------------------------------------------------------
colamd_recommended:
----------------------------------------------------------------------------
- Usage:
+ C syntax:
- Alen = colamd_recommended (nnz, n_row, n_col) ;
+ #include "colamd.h"
+ int colamd_recommended (int nnz, int n_row, int n_col) ;
+
+ or as a C macro
+
+ #include "colamd.h"
+ Alen = COLAMD_RECOMMENDED (int nnz, int n_row, int n_col) ;
Purpose:
Returns recommended value of Alen for use by colamd. Returns -1
- if any input argument is negative.
+ if any input argument is negative. The use of this routine
+ or macro is optional. Note that the macro uses its arguments
+ more than once, so be careful for side effects, if you pass
+ expressions as arguments to COLAMD_RECOMMENDED. Not needed for
+ symamd, which dynamically allocates its own memory.
- Arguments:
+ Arguments (all input arguments):
int nnz ; Number of nonzeros in the matrix A. This must
be the same value as p [n_col] in the call to
colamd - otherwise you will get a wrong value
of the recommended memory to use.
+
int n_row ; Number of rows in the matrix A.
+
int n_col ; Number of columns in the matrix A.
----------------------------------------------------------------------------
colamd_set_defaults:
----------------------------------------------------------------------------
- Usage:
+ C syntax:
- colamd_set_defaults (knobs) ;
+ #include "colamd.h"
+ colamd_set_defaults (double knobs [COLAMD_KNOBS]) ;
Purpose:
- Sets the default parameters.
+ Sets the default parameters. The use of this routine is optional.
Arguments:
double knobs [COLAMD_KNOBS] ; Output only.
- Rows with more than (knobs [COLAMD_DENSE_ROW] * n_col) entries
- are removed prior to ordering. Columns with more than
- (knobs [COLAMD_DENSE_COL] * n_row) entries are removed
- prior to ordering, and placed last in the output column
- ordering. Default values of these two knobs are both 0.5.
- Currently, only knobs [0] and knobs [1] are used, but future
- versions may use more knobs. If so, they will be properly set
- to their defaults by the future version of colamd_set_defaults,
- so that the code that calls colamd will not need to change,
- assuming that you either use colamd_set_defaults, or pass a
- (double *) NULL pointer as the knobs array to colamd.
+ Colamd: rows with more than (knobs [COLAMD_DENSE_ROW] * n_col)
+ entries are removed prior to ordering. Columns with more than
+ (knobs [COLAMD_DENSE_COL] * n_row) entries are removed prior to
+ ordering, and placed last in the output column ordering.
+
+ Symamd: uses only knobs [COLAMD_DENSE_ROW], which is knobs [0].
+ Rows and columns with more than (knobs [COLAMD_DENSE_ROW] * n)
+ entries are removed prior to ordering, and placed last in the
+ output ordering.
+
+ COLAMD_DENSE_ROW and COLAMD_DENSE_COL are defined as 0 and 1,
+ respectively, in colamd.h. Default values of these two knobs
+ are both 0.5. Currently, only knobs [0] and knobs [1] are
+ used, but future versions may use more knobs. If so, they will
+ be properly set to their defaults by the future version of
+ colamd_set_defaults, so that the code that calls colamd will
+ not need to change, assuming that you either use
+ colamd_set_defaults, or pass a (double *) NULL pointer as the
+ knobs array to colamd or symamd.
----------------------------------------------------------------------------
colamd:
----------------------------------------------------------------------------
- Usage:
+ C syntax:
- colamd (n_row, n_col, Alen, A, p, knobs) ;
+ #include "colamd.h"
+ int colamd (int n_row, int n_col, int Alen, int *A, int *p,
+ double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS]) ;
Purpose:
@@ -143,34 +167,44 @@
(AQ)'AQ=LL' have less fill-in and require fewer floating point
operations than factorizing the unpermuted matrix A or A'A,
respectively.
+
+ Returns:
+
+ TRUE (1) if successful, FALSE (0) otherwise.
Arguments:
- int n_row ;
+ int n_row ; Input argument.
Number of rows in the matrix A.
Restriction: n_row >= 0.
Colamd returns FALSE if n_row is negative.
- int n_col ;
+ int n_col ; Input argument.
Number of columns in the matrix A.
Restriction: n_col >= 0.
Colamd returns FALSE if n_col is negative.
- int Alen ;
+ int Alen ; Input argument.
Restriction (see note):
- Alen >= 2*nnz + 6*(n_col+1) + 4*(n_row+1) + n_col + COLAMD_STATS
+ Alen >= 2*nnz + 6*(n_col+1) + 4*(n_row+1) + n_col
Colamd returns FALSE if these conditions are not met.
Note: this restriction makes an modest assumption regarding
- the size of the two typedef'd structures, below. We do,
- however, guarantee that
- Alen >= colamd_recommended (nnz, n_row, n_col)
+ the size of the two typedef's structures in colamd.h.
+ We do, however, guarantee that
+
+ Alen >= colamd_recommended (nnz, n_row, n_col)
+
+ or equivalently as a C preprocessor macro:
+
+ Alen >= COLAMD_RECOMMENDED (nnz, n_row, n_col)
+
will be sufficient.
- int A [Alen] ; Input argument, stats on output.
+ int A [Alen] ; Input argument, undefined on output.
A is an integer array of size Alen. Alen must be at least as
large as the bare minimum value given above, but this is very
@@ -191,21 +225,8 @@
n_row-1, and columns are in the range 0 to n_col-1. Colamd
returns FALSE if any row index is out of range.
- The contents of A are modified during ordering, and are thus
- undefined on output with the exception of a few statistics
- about the ordering (A [0..COLAMD_STATS-1]):
- A [0]: number of dense or empty rows ignored.
- A [1]: number of dense or empty columns ignored (and ordered
- last in the output permutation p)
- A [2]: number of garbage collections performed.
- A [3]: 0, if all row indices in each column were in sorted
- order, and no duplicates were present.
- 1, otherwise (in which case colamd had to do more work)
- Note that a row can become "empty" if it contains only
- "dense" and/or "empty" columns, and similarly a column can
- become "empty" if it only contains "dense" and/or "empty" rows.
- Future versions may return more statistics in A, but the usage
- of these 4 entries in A will remain unchanged.
+ The contents of A are modified during ordering, and are
+ undefined on output.
int p [n_col+1] ; Both input and output argument.
@@ -227,25 +248,334 @@
If colamd returns FALSE, then no permutation is returned, and
p is undefined on output.
- double knobs [COLAMD_KNOBS] ; Input only.
+ double knobs [COLAMD_KNOBS] ; Input argument.
- See colamd_set_defaults for a description. If the knobs array
- is not present (that is, if a (double *) NULL pointer is passed
- in its place), then the default values of the parameters are
- used instead.
+ See colamd_set_defaults for a description.
-*/
+ int stats [COLAMD_STATS] ; Output argument.
+ Statistics on the ordering, and error status.
+ See colamd.h for related definitions.
+ Colamd returns FALSE if stats is not present.
-/* ========================================================================== */
-/* === Include files ======================================================== */
-/* ========================================================================== */
+ stats [0]: number of dense or empty rows ignored.
-/* limits.h: the largest positive integer (INT_MAX) */
-#include <limits.h>
+ stats [1]: number of dense or empty columns ignored (and
+ ordered last in the output permutation p)
+ Note that a row can become "empty" if it
+ contains only "dense" and/or "empty" columns,
+ and similarly a column can become "empty" if it
+ only contains "dense" and/or "empty" rows.
-/* colamd.h: knob array size, stats output size, and global prototypes */
-#include "colamd.h"
+ stats [2]: number of garbage collections performed.
+ This can be excessively high if Alen is close
+ to the minimum required value.
+
+ stats [3]: status code. < 0 is an error code.
+ > 1 is a warning or notice.
+
+ 0 OK. Each column of the input matrix contained
+ row indices in increasing order, with no
+ duplicates.
+
+ 1 OK, but columns of input matrix were jumbled
+ (unsorted columns or duplicate entries). Colamd
+ had to do some extra work to sort the matrix
+ first and remove duplicate entries, but it
+ still was able to return a valid permutation
+ (return value of colamd was TRUE).
+
+ stats [4]: highest numbered column that
+ is unsorted or has duplicate
+ entries.
+ stats [5]: last seen duplicate or
+ unsorted row index.
+ stats [6]: number of duplicate or
+ unsorted row indices.
+
+ -1 A is a null pointer
+
+ -2 p is a null pointer
+
+ -3 n_row is negative
+
+ stats [4]: n_row
+
+ -4 n_col is negative
+
+ stats [4]: n_col
+
+ -5 number of nonzeros in matrix is negative
+
+ stats [4]: number of nonzeros, p [n_col]
+
+ -6 p [0] is nonzero
+
+ stats [4]: p [0]
+
+ -7 A is too small
+
+ stats [4]: required size
+ stats [5]: actual size (Alen)
+
+ -8 a column has a negative number of entries
+
+ stats [4]: column with < 0 entries
+ stats [5]: number of entries in col
+
+ -9 a row index is out of bounds
+
+ stats [4]: column with bad row index
+ stats [5]: bad row index
+ stats [6]: n_row, # of rows of matrx
+
+ -10 (unused; see symamd.c)
+
+ -999 (unused; see symamd.c)
+
+ Future versions may return more statistics in the stats array.
+
+ Example:
+
+ See http://www.cise.ufl.edu/research/sparse/colamd/example.c
+ for a complete example.
+
+ To order the columns of a 5-by-4 matrix with 11 nonzero entries in
+ the following nonzero pattern
+
+ x 0 x 0
+ x 0 x x
+ 0 x x 0
+ 0 0 x x
+ x x 0 0
+
+ with default knobs and no output statistics, do the following:
+
+ #include "colamd.h"
+ #define ALEN COLAMD_RECOMMENDED (11, 5, 4)
+ int A [ALEN] = {1, 2, 5, 3, 5, 1, 2, 3, 4, 2, 4} ;
+ int p [ ] = {0, 3, 5, 9, 11} ;
+ int stats [COLAMD_STATS] ;
+ colamd (5, 4, ALEN, A, p, (double *) NULL, stats) ;
+
+ The permutation is returned in the array p, and A is destroyed.
+
+ ----------------------------------------------------------------------------
+ symamd:
+ ----------------------------------------------------------------------------
+
+ C syntax:
+
+ #include "colamd.h"
+ int symamd (int n, int *A, int *p, int *perm,
+ double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS],
+ void (*allocate) (size_t, size_t), void (*release) (void *)) ;
+
+ Purpose:
+
+ The symamd routine computes an ordering P of a symmetric sparse
+ matrix A such that the Cholesky factorization PAP' = LL' remains
+ sparse. It is based on a column ordering of a matrix M constructed
+ so that the nonzero pattern of M'M is the same as A. The matrix A
+ is assumed to be symmetric; only the strictly lower triangular part
+ is accessed. You must pass your selected memory allocator (usually
+ calloc/free or mxCalloc/mxFree) to symamd, for it to allocate
+ memory for the temporary matrix M.
+
+ Returns:
+
+ TRUE (1) if successful, FALSE (0) otherwise.
+
+ Arguments:
+
+ int n ; Input argument.
+
+ Number of rows and columns in the symmetrix matrix A.
+ Restriction: n >= 0.
+ Symamd returns FALSE if n is negative.
+
+ int A [nnz] ; Input argument.
+
+ A is an integer array of size nnz, where nnz = p [n].
+
+ The row indices of the entries in column c of the matrix are
+ held in A [(p [c]) ... (p [c+1]-1)]. The row indices in a
+ given column c need not be in ascending order, and duplicate
+ row indices may be present. However, symamd will run faster
+ if the columns are in sorted order with no duplicate entries.
+
+ The matrix is 0-based. That is, rows are in the range 0 to
+ n-1, and columns are in the range 0 to n-1. Symamd
+ returns FALSE if any row index is out of range.
+
+ The contents of A are not modified.
+
+ int p [n+1] ; Input argument.
+
+ p is an integer array of size n+1. On input, it holds the
+ "pointers" for the column form of the matrix A. Column c of
+ the matrix A is held in A [(p [c]) ... (p [c+1]-1)]. The first
+ entry, p [0], must be zero, and p [c] <= p [c+1] must hold
+ for all c in the range 0 to n-1. The value p [n] is
+ thus the total number of entries in the pattern of the matrix A.
+ Symamd returns FALSE if these conditions are not met.
+
+ The contents of p are not modified.
+
+ int perm [n+1] ; Output argument.
+
+ On output, if symamd returns TRUE, the array perm holds the
+ permutation P, where perm [0] is the first index in the new
+ ordering, and perm [n-1] is the last. That is, perm [k] = j
+ means that row and column j of A is the kth column in PAP',
+ where k is in the range 0 to n-1 (perm [0] = j means
+ that row and column j of A are the first row and column in
+ PAP'). The array is used as a workspace during the ordering,
+ which is why it must be of length n+1, not just n.
+
+ double knobs [COLAMD_KNOBS] ; Input argument.
+
+ See colamd_set_defaults for a description.
+
+ int stats [COLAMD_STATS] ; Output argument.
+
+ Statistics on the ordering, and error status.
+ See colamd.h for related definitions.
+ Symamd returns FALSE if stats is not present.
+
+ stats [0]: number of dense or empty row and columns ignored
+ (and ordered last in the output permutation
+ perm). Note that a row/column can become
+ "empty" if it contains only "dense" and/or
+ "empty" columns/rows.
+
+ stats [1]: (same as stats [0])
+
+ stats [2]: number of garbage collections performed.
+
+ stats [3]: status code. < 0 is an error code.
+ > 1 is a warning or notice.
+
+ 0 OK. Each column of the input matrix contained
+ row indices in increasing order, with no
+ duplicates.
+
+ 1 OK, but columns of input matrix were jumbled
+ (unsorted columns or duplicate entries). Symamd
+ had to do some extra work to sort the matrix
+ first and remove duplicate entries, but it
+ still was able to return a valid permutation
+ (return value of symamd was TRUE).
+
+ stats [4]: highest numbered column that
+ is unsorted or has duplicate
+ entries.
+ stats [5]: last seen duplicate or
+ unsorted row index.
+ stats [6]: number of duplicate or
+ unsorted row indices.
+
+ -1 A is a null pointer
+
+ -2 p is a null pointer
+
+ -3 (unused, see colamd.c)
+
+ -4 n is negative
+
+ stats [4]: n
+
+ -5 number of nonzeros in matrix is negative
+
+ stats [4]: # of nonzeros (p [n]).
+
+ -6 p [0] is nonzero
+
+ stats [4]: p [0]
+
+ -7 (unused)
+
+ -8 a column has a negative number of entries
+
+ stats [4]: column with < 0 entries
+ stats [5]: number of entries in col
+
+ -9 a row index is out of bounds
+
+ stats [4]: column with bad row index
+ stats [5]: bad row index
+ stats [6]: n_row, # of rows of matrx
+
+ -10 out of memory (unable to allocate temporary
+ workspace for M or count arrays using the
+ "allocate" routine passed into symamd).
+
+ -999 internal error. colamd failed to order the
+ matrix M, when it should have succeeded. This
+ indicates a bug. If this (and *only* this)
+ error code occurs, please contact the authors.
+ Don't contact the authors if you get any other
+ error code.
+
+ Future versions may return more statistics in the stats array.
+
+ void * (*allocate) (size_t, size_t)
+
+ A pointer to a function providing memory allocation. The
+ allocated memory must be returned initialized to zero. For a
+ C application, this argument should normally be a pointer to
+ calloc. For a MATLAB mexFunction, the routine mxCalloc is
+ passed instead.
+
+ void (*release) (size_t, size_t)
+
+ A pointer to a function that frees memory allocated by the
+ memory allocation routine above. For a C application, this
+ argument should normally be a pointer to free. For a MATLAB
+ mexFunction, the routine mxFree is passed instead.
+
+
+ ----------------------------------------------------------------------------
+ colamd_report:
+ ----------------------------------------------------------------------------
+
+ C syntax:
+
+ #include "colamd.h"
+ colamd_report (int stats [COLAMD_STATS]) ;
+
+ Purpose:
+
+ Prints the error status and statistics recorded in the stats
+ array on the standard error output (for a standard C routine)
+ or on the MATLAB output (for a mexFunction).
+
+ Arguments:
+
+ int stats [COLAMD_STATS] ; Input only. Statistics from colamd.
+
+
+ ----------------------------------------------------------------------------
+ symamd_report:
+ ----------------------------------------------------------------------------
+
+ C syntax:
+
+ #include "colamd.h"
+ symamd_report (int stats [COLAMD_STATS]) ;
+
+ Purpose:
+
+ Prints the error status and statistics recorded in the stats
+ array on the standard error output (for a standard C routine)
+ or on the MATLAB output (for a mexFunction).
+
+ Arguments:
+
+ int stats [COLAMD_STATS] ; Input only. Statistics from symamd.
+
+
+*/
/* ========================================================================== */
/* === Scaffolding code definitions ======================================== */
@@ -254,10 +584,7 @@
/* Ensure that debugging is turned off: */
#ifndef NDEBUG
#define NDEBUG
-#endif
-
-/* assert.h: the assert macro (no debugging if NDEBUG is defined) */
-#include <assert.h>
+#endif /* NDEBUG */
/*
Our "scaffolding code" philosophy: In our opinion, well-written library
@@ -276,77 +603,62 @@
(3) (gasp!) for actually finding bugs. This code has been heavily tested
and "should" be fully functional and bug-free ... but you never know...
- To enable debugging, comment out the "#define NDEBUG" above. The code will
- become outrageously slow when debugging is enabled. To control the level of
- debugging output, set an environment variable D to 0 (little), 1 (some),
- 2, 3, or 4 (lots).
+ To enable debugging, comment out the "#define NDEBUG" above. For a MATLAB
+ mexFunction, you will also need to modify mexopts.sh to remove the -DNDEBUG
+ definition. The code will become outrageously slow when debugging is
+ enabled. To control the level of debugging output, set an environment
+ variable D to 0 (little), 1 (some), 2, 3, or 4 (lots). When debugging,
+ you should see the following message on the standard output:
+
+ colamd: debug version, D = 1 (THIS WILL BE SLOW!)
+
+ or a similar message for symamd. If you don't, then debugging has not
+ been enabled.
+
*/
/* ========================================================================== */
-/* === Row and Column structures ============================================ */
+/* === Include files ======================================================== */
/* ========================================================================== */
-typedef struct ColInfo_struct
-{
- int start ; /* index for A of first row in this column, or DEAD */
- /* if column is dead */
- int length ; /* number of rows in this column */
- union
- {
- int thickness ; /* number of original columns represented by this */
- /* col, if the column is alive */
- int parent ; /* parent in parent tree super-column structure, if */
- /* the column is dead */
- } shared1 ;
- union
- {
- int score ; /* the score used to maintain heap, if col is alive */
- int order ; /* pivot ordering of this column, if col is dead */
- } shared2 ;
- union
- {
- int headhash ; /* head of a hash bucket, if col is at the head of */
- /* a degree list */
- int hash ; /* hash value, if col is not in a degree list */
- int prev ; /* previous column in degree list, if col is in a */
- /* degree list (but not at the head of a degree list) */
- } shared3 ;
- union
- {
- int degree_next ; /* next column, if col is in a degree list */
- int hash_next ; /* next column, if col is in a hash list */
- } shared4 ;
-
-} ColInfo ;
-
-typedef struct RowInfo_struct
-{
- int start ; /* index for A of first col in this row */
- int length ; /* number of principal columns in this row */
- union
- {
- int degree ; /* number of principal & non-principal columns in row */
- int p ; /* used as a row pointer in init_rows_cols () */
- } shared1 ;
- union
- {
- int mark ; /* for computing set differences and marking dead rows*/
- int first_column ;/* first column in row (used in garbage collection) */
- } shared2 ;
+#include "colamd.h"
+#include <limits.h>
-} RowInfo ;
+#ifdef MATLAB_MEX_FILE
+#include "mex.h"
+#include "matrix.h"
+#else
+#include <stdio.h>
+#include <assert.h>
+#endif /* MATLAB_MEX_FILE */
/* ========================================================================== */
/* === Definitions ========================================================== */
/* ========================================================================== */
+/* Routines are either PUBLIC (user-callable) or PRIVATE (not user-callable) */
+#define PUBLIC
+#define PRIVATE static
+
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
#define ONES_COMPLEMENT(r) (-(r)-1)
-#define TRUE (1)
-#define FALSE (0)
+/* -------------------------------------------------------------------------- */
+/* Change for version 2.1: define TRUE and FALSE only if not yet defined */
+/* -------------------------------------------------------------------------- */
+
+#ifndef TRUE
+#define TRUE (1)
+#endif
+
+#ifndef FALSE
+#define FALSE (0)
+#endif
+
+/* -------------------------------------------------------------------------- */
+
#define EMPTY (-1)
/* Row and column status */
@@ -368,9 +680,29 @@ typedef struct RowInfo_struct
#define KILL_PRINCIPAL_COL(c) { Col [c].start = DEAD_PRINCIPAL ; }
#define KILL_NON_PRINCIPAL_COL(c) { Col [c].start = DEAD_NON_PRINCIPAL ; }
-/* Routines are either PUBLIC (user-callable) or PRIVATE (not user-callable) */
-#define PUBLIC
-#define PRIVATE static
+/* ========================================================================== */
+/* === Colamd reporting mechanism =========================================== */
+/* ========================================================================== */
+
+#ifdef MATLAB_MEX_FILE
+
+/* use mexPrintf in a MATLAB mexFunction, for debugging and statistics output */
+#define PRINTF mexPrintf
+
+/* In MATLAB, matrices are 1-based to the user, but 0-based internally */
+#define INDEX(i) ((i)+1)
+
+#else
+
+/* Use printf in standard C environment, for debugging and statistics output. */
+/* Output is generated only if debugging is enabled at compile time, or if */
+/* the caller explicitly calls colamd_report or symamd_report. */
+#define PRINTF printf
+
+/* In C, matrices are 0-based and indices are reported as such in *_report */
+#define INDEX(i) (i)
+
+#endif /* MATLAB_MEX_FILE */
/* ========================================================================== */
/* === Prototypes of PRIVATE routines ======================================= */
@@ -380,18 +712,19 @@ PRIVATE int init_rows_cols
(
int n_row,
int n_col,
- RowInfo Row [],
- ColInfo Col [],
+ Colamd_Row Row [],
+ Colamd_Col Col [],
int A [],
- int p []
+ int p [],
+ int stats [COLAMD_STATS]
) ;
PRIVATE void init_scoring
(
int n_row,
int n_col,
- RowInfo Row [],
- ColInfo Col [],
+ Colamd_Row Row [],
+ Colamd_Col Col [],
int A [],
int head [],
double knobs [COLAMD_KNOBS],
@@ -405,8 +738,8 @@ PRIVATE int find_ordering
int n_row,
int n_col,
int Alen,
- RowInfo Row [],
- ColInfo Col [],
+ Colamd_Row Row [],
+ Colamd_Col Col [],
int A [],
int head [],
int n_col2,
@@ -417,17 +750,19 @@ PRIVATE int find_ordering
PRIVATE void order_children
(
int n_col,
- ColInfo Col [],
+ Colamd_Col Col [],
int p []
) ;
PRIVATE void detect_super_cols
(
+
#ifndef NDEBUG
int n_col,
- RowInfo Row [],
-#endif
- ColInfo Col [],
+ Colamd_Row Row [],
+#endif /* NDEBUG */
+
+ Colamd_Col Col [],
int A [],
int head [],
int row_start,
@@ -438,8 +773,8 @@ PRIVATE int garbage_collection
(
int n_row,
int n_col,
- RowInfo Row [],
- ColInfo Col [],
+ Colamd_Row Row [],
+ Colamd_Col Col [],
int A [],
int *pfree
) ;
@@ -447,29 +782,49 @@ PRIVATE int garbage_collection
PRIVATE int clear_mark
(
int n_row,
- RowInfo Row []
+ Colamd_Row Row []
+) ;
+
+PRIVATE void print_report
+(
+ char *method,
+ int stats [COLAMD_STATS]
) ;
/* ========================================================================== */
-/* === Debugging definitions ================================================ */
+/* === Debugging prototypes and definitions ================================= */
/* ========================================================================== */
#ifndef NDEBUG
-/* === With debugging ======================================================= */
+/* colamd_debug is the *ONLY* global variable, and is only */
+/* present when debugging */
-/* stdlib.h: for getenv and atoi, to get debugging level from environment */
-#include <stdlib.h>
+PRIVATE int colamd_debug ; /* debug print level */
-/* stdio.h: for printf (no printing if debugging is turned off) */
-#include <stdio.h>
+#define DEBUG0(params) { (void) PRINTF params ; }
+#define DEBUG1(params) { if (colamd_debug >= 1) (void) PRINTF params ; }
+#define DEBUG2(params) { if (colamd_debug >= 2) (void) PRINTF params ; }
+#define DEBUG3(params) { if (colamd_debug >= 3) (void) PRINTF params ; }
+#define DEBUG4(params) { if (colamd_debug >= 4) (void) PRINTF params ; }
+
+#ifdef MATLAB_MEX_FILE
+#define ASSERT(expression) (mxAssert ((expression), ""))
+#else
+#define ASSERT(expression) (assert (expression))
+#endif /* MATLAB_MEX_FILE */
+
+PRIVATE void colamd_get_debug /* gets the debug print level from getenv */
+(
+ char *method
+) ;
PRIVATE void debug_deg_lists
(
int n_row,
int n_col,
- RowInfo Row [],
- ColInfo Col [],
+ Colamd_Row Row [],
+ Colamd_Col Col [],
int head [],
int min_score,
int should,
@@ -479,7 +834,7 @@ PRIVATE void debug_deg_lists
PRIVATE void debug_mark
(
int n_row,
- RowInfo Row [],
+ Colamd_Row Row [],
int tag_mark,
int max_mark
) ;
@@ -488,8 +843,8 @@ PRIVATE void debug_matrix
(
int n_row,
int n_col,
- RowInfo Row [],
- ColInfo Col [],
+ Colamd_Row Row [],
+ Colamd_Col Col [],
int A []
) ;
@@ -497,24 +852,13 @@ PRIVATE void debug_structures
(
int n_row,
int n_col,
- RowInfo Row [],
- ColInfo Col [],
+ Colamd_Row Row [],
+ Colamd_Col Col [],
int A [],
int n_col2
) ;
-/* the following is the *ONLY* global variable in this file, and is only */
-/* present when debugging */
-
-PRIVATE int debug_colamd ; /* debug print level */
-
-#define DEBUG0(params) { (void) printf params ; }
-#define DEBUG1(params) { if (debug_colamd >= 1) (void) printf params ; }
-#define DEBUG2(params) { if (debug_colamd >= 2) (void) printf params ; }
-#define DEBUG3(params) { if (debug_colamd >= 3) (void) printf params ; }
-#define DEBUG4(params) { if (debug_colamd >= 4) (void) printf params ; }
-
-#else
+#else /* NDEBUG */
/* === No debugging ========================================================= */
@@ -524,104 +868,426 @@ PRIVATE int debug_colamd ; /* debug print level */
#define DEBUG3(params) ;
#define DEBUG4(params) ;
-#endif
+#define ASSERT(expression) ((void) 0)
+
+#endif /* NDEBUG */
+
+/* ========================================================================== */
+
+
+
+/* ========================================================================== */
+/* === USER-CALLABLE ROUTINES: ============================================== */
+/* ========================================================================== */
+
+
+/* ========================================================================== */
+/* === colamd_recommended =================================================== */
+/* ========================================================================== */
+
+/*
+ The colamd_recommended routine returns the suggested size for Alen. This
+ value has been determined to provide good balance between the number of
+ garbage collections and the memory requirements for colamd. If any
+ argument is negative, a -1 is returned as an error condition. This
+ function is also available as a macro defined in colamd.h, so that you
+ can use it for a statically-allocated array size.
+*/
+
+PUBLIC int colamd_recommended /* returns recommended value of Alen. */
+(
+ /* === Parameters ======================================================= */
+
+ int nnz, /* number of nonzeros in A */
+ int n_row, /* number of rows in A */
+ int n_col /* number of columns in A */
+)
+{
+ return (COLAMD_RECOMMENDED (nnz, n_row, n_col)) ;
+}
+
+
+/* ========================================================================== */
+/* === colamd_set_defaults ================================================== */
+/* ========================================================================== */
+
+/*
+ The colamd_set_defaults routine sets the default values of the user-
+ controllable parameters for colamd:
+
+ knobs [0] rows with knobs[0]*n_col entries or more are removed
+ prior to ordering in colamd. Rows and columns with
+ knobs[0]*n_col entries or more are removed prior to
+ ordering in symamd and placed last in the output
+ ordering.
+
+ knobs [1] columns with knobs[1]*n_row entries or more are removed
+ prior to ordering in colamd, and placed last in the
+ column permutation. Symamd ignores this knob.
+
+ knobs [2..19] unused, but future versions might use this
+*/
+
+PUBLIC void colamd_set_defaults
+(
+ /* === Parameters ======================================================= */
+
+ double knobs [COLAMD_KNOBS] /* knob array */
+)
+{
+ /* === Local variables ================================================== */
+
+ int i ;
+
+ if (!knobs)
+ {
+ return ; /* no knobs to initialize */
+ }
+ for (i = 0 ; i < COLAMD_KNOBS ; i++)
+ {
+ knobs [i] = 0 ;
+ }
+ knobs [COLAMD_DENSE_ROW] = 0.5 ; /* ignore rows over 50% dense */
+ knobs [COLAMD_DENSE_COL] = 0.5 ; /* ignore columns over 50% dense */
+}
+
+
+/* ========================================================================== */
+/* === symamd =============================================================== */
+/* ========================================================================== */
+
+PUBLIC int symamd /* return TRUE if OK, FALSE otherwise */
+(
+ /* === Parameters ======================================================= */
+
+ int n, /* number of rows and columns of A */
+ int A [], /* row indices of A */
+ int p [], /* column pointers of A */
+ int perm [], /* output permutation, size n+1 */
+ double knobs [COLAMD_KNOBS], /* parameters (uses defaults if NULL) */
+ int stats [COLAMD_STATS], /* output statistics and error codes */
+ void * (*allocate) (size_t, size_t),
+ /* pointer to calloc (ANSI C) or */
+ /* mxCalloc (for MATLAB mexFunction) */
+ void (*release) (void *)
+ /* pointer to free (ANSI C) or */
+ /* mxFree (for MATLAB mexFunction) */
+)
+{
+ /* === Local variables ================================================== */
+
+ int *count ; /* length of each column of M, and col pointer*/
+ int *mark ; /* mark array for finding duplicate entries */
+ int *M ; /* row indices of matrix M */
+ int Mlen ; /* length of M */
+ int n_row ; /* number of rows in M */
+ int nnz ; /* number of entries in A */
+ int i ; /* row index of A */
+ int j ; /* column index of A */
+ int k ; /* row index of M */
+ int mnz ; /* number of nonzeros in M */
+ int pp ; /* index into a column of A */
+ int last_row ; /* last row seen in the current column */
+ int length ; /* number of nonzeros in a column */
+
+ double cknobs [COLAMD_KNOBS] ; /* knobs for colamd */
+ double default_knobs [COLAMD_KNOBS] ; /* default knobs for colamd */
+ int cstats [COLAMD_STATS] ; /* colamd stats */
+
+#ifndef NDEBUG
+ colamd_get_debug ("symamd") ;
+#endif /* NDEBUG */
+
+ /* === Check the input arguments ======================================== */
+
+ if (!stats)
+ {
+ DEBUG0 (("symamd: stats not present\n")) ;
+ return (FALSE) ;
+ }
+ for (i = 0 ; i < COLAMD_STATS ; i++)
+ {
+ stats [i] = 0 ;
+ }
+ stats [COLAMD_STATUS] = COLAMD_OK ;
+ stats [COLAMD_INFO1] = -1 ;
+ stats [COLAMD_INFO2] = -1 ;
+
+ if (!A)
+ {
+ stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ;
+ DEBUG0 (("symamd: A not present\n")) ;
+ return (FALSE) ;
+ }
+
+ if (!p) /* p is not present */
+ {
+ stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ;
+ DEBUG0 (("symamd: p not present\n")) ;
+ return (FALSE) ;
+ }
+
+ if (n < 0) /* n must be >= 0 */
+ {
+ stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ;
+ stats [COLAMD_INFO1] = n ;
+ DEBUG0 (("symamd: n negative %d\n", n)) ;
+ return (FALSE) ;
+ }
+
+ nnz = p [n] ;
+ if (nnz < 0) /* nnz must be >= 0 */
+ {
+ stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ;
+ stats [COLAMD_INFO1] = nnz ;
+ DEBUG0 (("symamd: number of entries negative %d\n", nnz)) ;
+ return (FALSE) ;
+ }
+
+ if (p [0] != 0)
+ {
+ stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ;
+ stats [COLAMD_INFO1] = p [0] ;
+ DEBUG0 (("symamd: p[0] not zero %d\n", p [0])) ;
+ return (FALSE) ;
+ }
+
+ /* === If no knobs, set default knobs =================================== */
+
+ if (!knobs)
+ {
+ colamd_set_defaults (default_knobs) ;
+ knobs = default_knobs ;
+ }
+
+ /* === Allocate count and mark ========================================== */
+
+ count = (int *) ((*allocate) (n+1, sizeof (int))) ;
+ if (!count)
+ {
+ stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ;
+ DEBUG0 (("symamd: allocate count (size %d) failed\n", n+1)) ;
+ return (FALSE) ;
+ }
+
+ mark = (int *) ((*allocate) (n+1, sizeof (int))) ;
+ if (!mark)
+ {
+ stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ;
+ (*release) ((void *) count) ;
+ DEBUG0 (("symamd: allocate mark (size %d) failed\n", n+1)) ;
+ return (FALSE) ;
+ }
+
+ /* === Compute column counts of M, check if A is valid ================== */
+
+ stats [COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/
+
+ for (i = 0 ; i < n ; i++)
+ {
+ mark [i] = -1 ;
+ }
+
+ for (j = 0 ; j < n ; j++)
+ {
+ last_row = -1 ;
+
+ length = p [j+1] - p [j] ;
+ if (length < 0)
+ {
+ /* column pointers must be non-decreasing */
+ stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ;
+ stats [COLAMD_INFO1] = j ;
+ stats [COLAMD_INFO2] = length ;
+ (*release) ((void *) count) ;
+ (*release) ((void *) mark) ;
+ DEBUG0 (("symamd: col %d negative length %d\n", j, length)) ;
+ return (FALSE) ;
+ }
+
+ for (pp = p [j] ; pp < p [j+1] ; pp++)
+ {
+ i = A [pp] ;
+ if (i < 0 || i >= n)
+ {
+ /* row index i, in column j, is out of bounds */
+ stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ;
+ stats [COLAMD_INFO1] = j ;
+ stats [COLAMD_INFO2] = i ;
+ stats [COLAMD_INFO3] = n ;
+ (*release) ((void *) count) ;
+ (*release) ((void *) mark) ;
+ DEBUG0 (("symamd: row %d col %d out of bounds\n", i, j)) ;
+ return (FALSE) ;
+ }
+
+ if (i <= last_row || mark [i] == j)
+ {
+ /* row index is unsorted or repeated (or both), thus col */
+ /* is jumbled. This is a notice, not an error condition. */
+ stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ;
+ stats [COLAMD_INFO1] = j ;
+ stats [COLAMD_INFO2] = i ;
+ (stats [COLAMD_INFO3]) ++ ;
+ DEBUG1 (("symamd: row %d col %d unsorted/duplicate\n", i, j)) ;
+ }
+
+ if (i > j && mark [i] != j)
+ {
+ /* row k of M will contain column indices i and j */
+ count [i]++ ;
+ count [j]++ ;
+ }
-/* ========================================================================== */
+ /* mark the row as having been seen in this column */
+ mark [i] = j ;
+ last_row = i ;
+ }
+ }
-/* ========================================================================== */
-/* === USER-CALLABLE ROUTINES: ============================================== */
-/* ========================================================================== */
+ if (stats [COLAMD_STATUS] == COLAMD_OK)
+ {
+ /* if there are no duplicate entries, then mark is no longer needed */
+ (*release) ((void *) mark) ;
+ }
+ /* === Compute column pointers of M ===================================== */
-/* ========================================================================== */
-/* === colamd_recommended =================================================== */
-/* ========================================================================== */
+ /* use output permutation, perm, for column pointers of M */
+ perm [0] = 0 ;
+ for (j = 1 ; j <= n ; j++)
+ {
+ perm [j] = perm [j-1] + count [j-1] ;
+ }
+ for (j = 0 ; j < n ; j++)
+ {
+ count [j] = perm [j] ;
+ }
-/*
- The colamd_recommended routine returns the suggested size for Alen. This
- value has been determined to provide good balance between the number of
- garbage collections and the memory requirements for colamd.
-*/
+ /* === Construct M ====================================================== */
-PUBLIC int colamd_recommended /* returns recommended value of Alen. */
-(
- /* === Parameters ======================================================= */
+ mnz = perm [n] ;
+ n_row = mnz / 2 ;
+ Mlen = colamd_recommended (mnz, n_row, n) ;
+ M = (int *) ((*allocate) (Mlen, sizeof (int))) ;
+ DEBUG0 (("symamd: M is %d-by-%d with %d entries, Mlen = %d\n",
+ n_row, n, mnz, Mlen)) ;
- int nnz, /* number of nonzeros in A */
- int n_row, /* number of rows in A */
- int n_col /* number of columns in A */
-)
-{
- /* === Local variables ================================================== */
+ if (!M)
+ {
+ stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ;
+ (*release) ((void *) count) ;
+ (*release) ((void *) mark) ;
+ DEBUG0 (("symamd: allocate M (size %d) failed\n", Mlen)) ;
+ return (FALSE) ;
+ }
- int minimum ; /* bare minimum requirements */
- int recommended ; /* recommended value of Alen */
+ k = 0 ;
- if (nnz < 0 || n_row < 0 || n_col < 0)
+ if (stats [COLAMD_STATUS] == COLAMD_OK)
{
- /* return -1 if any input argument is corrupted */
- DEBUG0 (("colamd_recommended error!")) ;
- DEBUG0 ((" nnz: %d, n_row: %d, n_col: %d\n", nnz, n_row, n_col)) ;
- return (-1) ;
+ /* Matrix is OK */
+ for (j = 0 ; j < n ; j++)
+ {
+ ASSERT (p [j+1] - p [j] >= 0) ;
+ for (pp = p [j] ; pp < p [j+1] ; pp++)
+ {
+ i = A [pp] ;
+ ASSERT (i >= 0 && i < n) ;
+ if (i > j)
+ {
+ /* row k of M contains column indices i and j */
+ M [count [i]++] = k ;
+ M [count [j]++] = k ;
+ k++ ;
+ }
+ }
+ }
+ }
+ else
+ {
+ /* Matrix is jumbled. Do not add duplicates to M. Unsorted cols OK. */
+ DEBUG0 (("symamd: Duplicates in A.\n")) ;
+ for (i = 0 ; i < n ; i++)
+ {
+ mark [i] = -1 ;
+ }
+ for (j = 0 ; j < n ; j++)
+ {
+ ASSERT (p [j+1] - p [j] >= 0) ;
+ for (pp = p [j] ; pp < p [j+1] ; pp++)
+ {
+ i = A [pp] ;
+ ASSERT (i >= 0 && i < n) ;
+ if (i > j && mark [i] != j)
+ {
+ /* row k of M contains column indices i and j */
+ M [count [i]++] = k ;
+ M [count [j]++] = k ;
+ k++ ;
+ mark [i] = j ;
+ }
+ }
+ }
+ (*release) ((void *) mark) ;
}
- minimum =
- 2 * (nnz) /* for A */
- + (((n_col) + 1) * sizeof (ColInfo) / sizeof (int)) /* for Col */
- + (((n_row) + 1) * sizeof (RowInfo) / sizeof (int)) /* for Row */
- + n_col /* minimum elbow room to guarrantee success */
- + COLAMD_STATS ; /* for output statistics */
+ /* count and mark no longer needed */
+ (*release) ((void *) count) ;
+ ASSERT (k == n_row) ;
- /* recommended is equal to the minumum plus enough memory to keep the */
- /* number garbage collections low */
- recommended = minimum + nnz/5 ;
+ /* === Adjust the knobs for M =========================================== */
- return (recommended) ;
-}
+ for (i = 0 ; i < COLAMD_KNOBS ; i++)
+ {
+ cknobs [i] = knobs [i] ;
+ }
+ /* there are no dense rows in M */
+ cknobs [COLAMD_DENSE_ROW] = 1.0 ;
-/* ========================================================================== */
-/* === colamd_set_defaults ================================================== */
-/* ========================================================================== */
+ if (n_row != 0 && n < n_row)
+ {
+ /* On input, the knob is a fraction of 1..n, the number of rows of A. */
+ /* Convert it to a fraction of 1..n_row, of the number of rows of M. */
+ cknobs [COLAMD_DENSE_COL] = (knobs [COLAMD_DENSE_ROW] * n) / n_row ;
+ }
+ else
+ {
+ /* no dense columns in M */
+ cknobs [COLAMD_DENSE_COL] = 1.0 ;
+ }
-/*
- The colamd_set_defaults routine sets the default values of the user-
- controllable parameters for colamd:
+ DEBUG0 (("symamd: dense col knob for M: %g\n", cknobs [COLAMD_DENSE_COL])) ;
- knobs [0] rows with knobs[0]*n_col entries or more are removed
- prior to ordering.
+ /* === Order the columns of M =========================================== */
- knobs [1] columns with knobs[1]*n_row entries or more are removed
- prior to ordering, and placed last in the column
- permutation.
+ if (!colamd (n_row, n, Mlen, M, perm, cknobs, cstats))
+ {
+ /* This "cannot" happen, unless there is a bug in the code. */
+ stats [COLAMD_STATUS] = COLAMD_ERROR_internal_error ;
+ (*release) ((void *) M) ;
+ DEBUG0 (("symamd: internal error!\n")) ;
+ return (FALSE) ;
+ }
- knobs [2..19] unused, but future versions might use this
-*/
+ /* Note that the output permutation is now in perm */
-PUBLIC void colamd_set_defaults
-(
- /* === Parameters ======================================================= */
+ /* === get the statistics for symamd from colamd ======================== */
- double knobs [COLAMD_KNOBS] /* knob array */
-)
-{
- /* === Local variables ================================================== */
+ /* note that a dense column in colamd means a dense row and col in symamd */
+ stats [COLAMD_DENSE_ROW] = cstats [COLAMD_DENSE_COL] ;
+ stats [COLAMD_DENSE_COL] = cstats [COLAMD_DENSE_COL] ;
+ stats [COLAMD_DEFRAG_COUNT] = cstats [COLAMD_DEFRAG_COUNT] ;
- int i ;
+ /* === Free M =========================================================== */
- if (!knobs)
- {
- return ; /* no knobs to initialize */
- }
- for (i = 0 ; i < COLAMD_KNOBS ; i++)
- {
- knobs [i] = 0 ;
- }
- knobs [COLAMD_DENSE_ROW] = 0.5 ; /* ignore rows over 50% dense */
- knobs [COLAMD_DENSE_COL] = 0.5 ; /* ignore columns over 50% dense */
-}
+ (*release) ((void *) M) ;
+ DEBUG0 (("symamd: done.\n")) ;
+ return (TRUE) ;
+}
/* ========================================================================== */
/* === colamd =============================================================== */
@@ -633,79 +1299,9 @@ PUBLIC void colamd_set_defaults
selected via partial pivoting. The routine can also be viewed as
providing a permutation Q such that the Cholesky factorization
(AQ)'(AQ) = LL' remains sparse.
-
- On input, the nonzero patterns of the columns of A are stored in the
- array A, in order 0 to n_col-1. A is held in 0-based form (rows in the
- range 0 to n_row-1 and columns in the range 0 to n_col-1). Row indices
- for column c are located in A [(p [c]) ... (p [c+1]-1)], where p [0] = 0,
- and thus p [n_col] is the number of entries in A. The matrix is
- destroyed on output. The row indices within each column do not have to
- be sorted (from small to large row indices), and duplicate row indices
- may be present. However, colamd will work a little faster if columns are
- sorted and no duplicates are present. Matlab 5.2 always passes the matrix
- with sorted columns, and no duplicates.
-
- The integer array A is of size Alen. Alen must be at least of size
- (where nnz is the number of entries in A):
-
- nnz for the input column form of A
- + nnz for a row form of A that colamd generates
- + 6*(n_col+1) for a ColInfo Col [0..n_col] array
- (this assumes sizeof (ColInfo) is 6 int's).
- + 4*(n_row+1) for a RowInfo Row [0..n_row] array
- (this assumes sizeof (RowInfo) is 4 int's).
- + elbow_room must be at least n_col. We recommend at least
- nnz/5 in addition to that. If sufficient,
- changes in the elbow room affect the ordering
- time only, not the ordering itself.
- + COLAMD_STATS for the output statistics
-
- Colamd returns FALSE is memory is insufficient, or TRUE otherwise.
-
- On input, the caller must specify:
-
- n_row the number of rows of A
- n_col the number of columns of A
- Alen the size of the array A
- A [0 ... nnz-1] the row indices, where nnz = p [n_col]
- A [nnz ... Alen-1] (need not be initialized by the user)
- p [0 ... n_col] the column pointers, p [0] = 0, and p [n_col]
- is the number of entries in A. Column c of A
- is stored in A [p [c] ... p [c+1]-1].
- knobs [0 ... 19] a set of parameters that control the behavior
- of colamd. If knobs is a NULL pointer the
- defaults are used. The user-callable
- colamd_set_defaults routine sets the default
- parameters. See that routine for a description
- of the user-controllable parameters.
-
- If the return value of Colamd is TRUE, then on output:
-
- p [0 ... n_col-1] the column permutation. p [0] is the first
- column index, and p [n_col-1] is the last.
- That is, p [k] = j means that column j of A
- is the kth column of AQ.
-
- A is undefined on output (the matrix pattern is
- destroyed), except for the following statistics:
-
- A [0] the number of dense (or empty) rows ignored
- A [1] the number of dense (or empty) columms. These
- are ordered last, in their natural order.
- A [2] the number of garbage collections performed.
- If this is excessive, then you would have
- gotten your results faster if Alen was larger.
- A [3] 0, if all row indices in each column were in
- sorted order and no duplicates were present.
- 1, if there were unsorted or duplicate row
- indices in the input. You would have gotten
- your results faster if A [3] was returned as 0.
-
- If the return value of Colamd is FALSE, then A and p are undefined on
- output.
*/
-PUBLIC int colamd /* returns TRUE if successful */
+PUBLIC int colamd /* returns TRUE if successful, FALSE otherwise*/
(
/* === Parameters ======================================================= */
@@ -714,7 +1310,8 @@ PUBLIC int colamd /* returns TRUE if successful */
int Alen, /* length of A */
int A [], /* row indices of A */
int p [], /* pointers to columns in A */
- double knobs [COLAMD_KNOBS] /* parameters (uses defaults if NULL) */
+ double knobs [COLAMD_KNOBS],/* parameters (uses defaults if NULL) */
+ int stats [COLAMD_STATS] /* output statistics and error codes */
)
{
/* === Local variables ================================================== */
@@ -723,69 +1320,115 @@ PUBLIC int colamd /* returns TRUE if successful */
int nnz ; /* nonzeros in A */
int Row_size ; /* size of Row [], in integers */
int Col_size ; /* size of Col [], in integers */
- int elbow_room ; /* remaining free space */
- RowInfo *Row ; /* pointer into A of Row [0..n_row] array */
- ColInfo *Col ; /* pointer into A of Col [0..n_col] array */
+ int need ; /* minimum required length of A */
+ Colamd_Row *Row ; /* pointer into A of Row [0..n_row] array */
+ Colamd_Col *Col ; /* pointer into A of Col [0..n_col] array */
int n_col2 ; /* number of non-dense, non-empty columns */
int n_row2 ; /* number of non-dense, non-empty rows */
int ngarbage ; /* number of garbage collections performed */
int max_deg ; /* maximum row degree */
- double default_knobs [COLAMD_KNOBS] ; /* default knobs knobs array */
- int init_result ; /* return code from initialization */
+ double default_knobs [COLAMD_KNOBS] ; /* default knobs array */
#ifndef NDEBUG
- debug_colamd = 0 ; /* no debug printing */
- /* get "D" environment variable, which gives the debug printing level */
- if (getenv ("D")) debug_colamd = atoi (getenv ("D")) ;
- DEBUG0 (("debug version, D = %d (THIS WILL BE SLOOOOW!)\n", debug_colamd)) ;
-#endif
+ colamd_get_debug ("colamd") ;
+#endif /* NDEBUG */
/* === Check the input arguments ======================================== */
- if (n_row < 0 || n_col < 0 || !A || !p)
+ if (!stats)
+ {
+ DEBUG0 (("colamd: stats not present\n")) ;
+ return (FALSE) ;
+ }
+ for (i = 0 ; i < COLAMD_STATS ; i++)
+ {
+ stats [i] = 0 ;
+ }
+ stats [COLAMD_STATUS] = COLAMD_OK ;
+ stats [COLAMD_INFO1] = -1 ;
+ stats [COLAMD_INFO2] = -1 ;
+
+ if (!A) /* A is not present */
{
- /* n_row and n_col must be non-negative, A and p must be present */
- DEBUG0 (("colamd error! %d %d %d\n", n_row, n_col, Alen)) ;
+ stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ;
+ DEBUG0 (("colamd: A not present\n")) ;
return (FALSE) ;
}
+
+ if (!p) /* p is not present */
+ {
+ stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ;
+ DEBUG0 (("colamd: p not present\n")) ;
+ return (FALSE) ;
+ }
+
+ if (n_row < 0) /* n_row must be >= 0 */
+ {
+ stats [COLAMD_STATUS] = COLAMD_ERROR_nrow_negative ;
+ stats [COLAMD_INFO1] = n_row ;
+ DEBUG0 (("colamd: nrow negative %d\n", n_row)) ;
+ return (FALSE) ;
+ }
+
+ if (n_col < 0) /* n_col must be >= 0 */
+ {
+ stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ;
+ stats [COLAMD_INFO1] = n_col ;
+ DEBUG0 (("colamd: ncol negative %d\n", n_col)) ;
+ return (FALSE) ;
+ }
+
nnz = p [n_col] ;
- if (nnz < 0 || p [0] != 0)
+ if (nnz < 0) /* nnz must be >= 0 */
{
- /* nnz must be non-negative, and p [0] must be zero */
- DEBUG0 (("colamd error! %d %d\n", nnz, p [0])) ;
+ stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ;
+ stats [COLAMD_INFO1] = nnz ;
+ DEBUG0 (("colamd: number of entries negative %d\n", nnz)) ;
return (FALSE) ;
}
- /* === If no knobs, set default parameters ============================== */
+ if (p [0] != 0)
+ {
+ stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ;
+ stats [COLAMD_INFO1] = p [0] ;
+ DEBUG0 (("colamd: p[0] not zero %d\n", p [0])) ;
+ return (FALSE) ;
+ }
+
+ /* === If no knobs, set default knobs =================================== */
if (!knobs)
{
+ colamd_set_defaults (default_knobs) ;
knobs = default_knobs ;
- colamd_set_defaults (knobs) ;
}
/* === Allocate the Row and Col arrays from array A ===================== */
- Col_size = (n_col + 1) * sizeof (ColInfo) / sizeof (int) ;
- Row_size = (n_row + 1) * sizeof (RowInfo) / sizeof (int) ;
- elbow_room = Alen - (2*nnz + Col_size + Row_size) ;
- if (elbow_room < n_col + COLAMD_STATS)
+ Col_size = COLAMD_C (n_col) ;
+ Row_size = COLAMD_R (n_row) ;
+ need = 2*nnz + n_col + Col_size + Row_size ;
+
+ if (need > Alen)
{
/* not enough space in array A to perform the ordering */
- DEBUG0 (("colamd error! elbow_room %d, %d\n", elbow_room,n_col)) ;
+ stats [COLAMD_STATUS] = COLAMD_ERROR_A_too_small ;
+ stats [COLAMD_INFO1] = need ;
+ stats [COLAMD_INFO2] = Alen ;
+ DEBUG0 (("colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen));
return (FALSE) ;
}
- Alen = 2*nnz + elbow_room ;
- Col = (ColInfo *) &A [Alen] ;
- Row = (RowInfo *) &A [Alen + Col_size] ;
+
+ Alen -= Col_size + Row_size ;
+ Col = (Colamd_Col *) &A [Alen] ;
+ Row = (Colamd_Row *) &A [Alen + Col_size] ;
/* === Construct the row and column data structures ===================== */
- init_result = init_rows_cols (n_row, n_col, Row, Col, A, p) ;
- if (init_result == -1)
+ if (!init_rows_cols (n_row, n_col, Row, Col, A, p, stats))
{
/* input matrix is invalid */
- DEBUG0 (("colamd error! matrix invalid\n")) ;
+ DEBUG0 (("colamd: Matrix invalid\n")) ;
return (FALSE) ;
}
@@ -803,22 +1446,44 @@ PUBLIC int colamd /* returns TRUE if successful */
order_children (n_col, Col, p) ;
- /* === Return statistics in A =========================================== */
-
- for (i = 0 ; i < COLAMD_STATS ; i++)
- {
- A [i] = 0 ;
- }
- A [COLAMD_DENSE_ROW] = n_row - n_row2 ;
- A [COLAMD_DENSE_COL] = n_col - n_col2 ;
- A [COLAMD_DEFRAG_COUNT] = ngarbage ;
- A [COLAMD_JUMBLED_COLS] = init_result ;
+ /* === Return statistics in stats ======================================= */
+ stats [COLAMD_DENSE_ROW] = n_row - n_row2 ;
+ stats [COLAMD_DENSE_COL] = n_col - n_col2 ;
+ stats [COLAMD_DEFRAG_COUNT] = ngarbage ;
+ DEBUG0 (("colamd: done.\n")) ;
return (TRUE) ;
}
/* ========================================================================== */
+/* === colamd_report ======================================================== */
+/* ========================================================================== */
+
+PUBLIC void colamd_report
+(
+ int stats [COLAMD_STATS]
+)
+{
+ print_report ("colamd", stats) ;
+}
+
+
+/* ========================================================================== */
+/* === symamd_report ======================================================== */
+/* ========================================================================== */
+
+PUBLIC void symamd_report
+(
+ int stats [COLAMD_STATS]
+)
+{
+ print_report ("symamd", stats) ;
+}
+
+
+
+/* ========================================================================== */
/* === NON-USER-CALLABLE ROUTINES: ========================================== */
/* ========================================================================== */
@@ -834,20 +1499,21 @@ PUBLIC int colamd /* returns TRUE if successful */
matrix. Also, row and column attributes are stored in the Col and Row
structs. If the columns are un-sorted or contain duplicate row indices,
this routine will also sort and remove duplicate row indices from the
- column form of the matrix. Returns -1 on error, 1 if columns jumbled,
- or 0 if columns not jumbled. Not user-callable.
+ column form of the matrix. Returns FALSE if the matrix is invalid,
+ TRUE otherwise. Not user-callable.
*/
-PRIVATE int init_rows_cols /* returns status code */
+PRIVATE int init_rows_cols /* returns TRUE if OK, or FALSE otherwise */
(
/* === Parameters ======================================================= */
int n_row, /* number of rows of A */
int n_col, /* number of columns of A */
- RowInfo Row [], /* of size n_row+1 */
- ColInfo Col [], /* of size n_col+1 */
+ Colamd_Row Row [], /* of size n_row+1 */
+ Colamd_Col Col [], /* of size n_col+1 */
int A [], /* row indices of A, of size Alen */
- int p [] /* pointers to columns in A, of size n_col+1 */
+ int p [], /* pointers to columns in A, of size n_col+1 */
+ int stats [COLAMD_STATS] /* colamd statistics */
)
{
/* === Local variables ================================================== */
@@ -858,44 +1524,36 @@ PRIVATE int init_rows_cols /* returns status code */
int *cp_end ; /* a pointer to the end of a column */
int *rp ; /* a row pointer */
int *rp_end ; /* a pointer to the end of a row */
- int last_start ; /* start index of previous column in A */
- int start ; /* start index of column in A */
int last_row ; /* previous row */
- int jumbled_columns ; /* indicates if columns are jumbled */
/* === Initialize columns, and check column pointers ==================== */
- last_start = 0 ;
for (col = 0 ; col < n_col ; col++)
{
- start = p [col] ;
- if (start < last_start)
+ Col [col].start = p [col] ;
+ Col [col].length = p [col+1] - p [col] ;
+
+ if (Col [col].length < 0)
{
/* column pointers must be non-decreasing */
- DEBUG0 (("colamd error! last p %d p [col] %d\n",last_start,start));
- return (-1) ;
+ stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ;
+ stats [COLAMD_INFO1] = col ;
+ stats [COLAMD_INFO2] = Col [col].length ;
+ DEBUG0 (("colamd: col %d length %d < 0\n", col, Col [col].length)) ;
+ return (FALSE) ;
}
- Col [col].start = start ;
- Col [col].length = p [col+1] - start ;
+
Col [col].shared1.thickness = 1 ;
Col [col].shared2.score = 0 ;
Col [col].shared3.prev = EMPTY ;
Col [col].shared4.degree_next = EMPTY ;
- last_start = start ;
- }
- /* must check the end pointer for last column */
- if (p [n_col] < last_start)
- {
- /* column pointers must be non-decreasing */
- DEBUG0 (("colamd error! last p %d p [n_col] %d\n",p[col],last_start)) ;
- return (-1) ;
}
/* p [0..n_col] no longer needed, used as "head" in subsequent routines */
/* === Scan columns, compute row degrees, and check row indices ========= */
- jumbled_columns = FALSE ;
+ stats [COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/
for (row = 0 ; row < n_row ; row++)
{
@@ -917,22 +1575,28 @@ PRIVATE int init_rows_cols /* returns status code */
/* make sure row indices within range */
if (row < 0 || row >= n_row)
{
- DEBUG0 (("colamd error! col %d row %d last_row %d\n",
- col, row, last_row)) ;
- return (-1) ;
+ stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ;
+ stats [COLAMD_INFO1] = col ;
+ stats [COLAMD_INFO2] = row ;
+ stats [COLAMD_INFO3] = n_row ;
+ DEBUG0 (("colamd: row %d col %d out of bounds\n", row, col)) ;
+ return (FALSE) ;
}
- else if (row <= last_row)
+
+ if (row <= last_row || Row [row].shared2.mark == col)
{
- /* row indices are not sorted or repeated, thus cols */
- /* are jumbled */
- jumbled_columns = TRUE ;
+ /* row index are unsorted or repeated (or both), thus col */
+ /* is jumbled. This is a notice, not an error condition. */
+ stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ;
+ stats [COLAMD_INFO1] = col ;
+ stats [COLAMD_INFO2] = row ;
+ (stats [COLAMD_INFO3]) ++ ;
+ DEBUG1 (("colamd: row %d col %d unsorted/duplicate\n",row,col));
}
- /* prevent repeated row from being counted */
+
if (Row [row].shared2.mark != col)
{
Row [row].length++ ;
- Row [row].shared2.mark = col ;
- last_row = row ;
}
else
{
@@ -940,6 +1604,11 @@ PRIVATE int init_rows_cols /* returns status code */
/* it will be removed */
Col [col].length-- ;
}
+
+ /* mark the row as having been seen in this column */
+ Row [row].shared2.mark = col ;
+
+ last_row = row ;
}
}
@@ -959,7 +1628,7 @@ PRIVATE int init_rows_cols /* returns status code */
/* === Create row form ================================================== */
- if (jumbled_columns)
+ if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED)
{
/* if cols jumbled, watch for repeated row indices */
for (col = 0 ; col < n_col ; col++)
@@ -1001,8 +1670,9 @@ PRIVATE int init_rows_cols /* returns status code */
/* === See if we need to re-create columns ============================== */
- if (jumbled_columns)
+ if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED)
{
+ DEBUG0 (("colamd: reconstructing column form, matrix jumbled\n")) ;
#ifndef NDEBUG
/* make sure column lengths are correct */
@@ -1021,10 +1691,10 @@ PRIVATE int init_rows_cols /* returns status code */
}
for (col = 0 ; col < n_col ; col++)
{
- assert (p [col] == 0) ;
+ ASSERT (p [col] == 0) ;
}
/* now p is all zero (different than when debugging is turned off) */
-#endif
+#endif /* NDEBUG */
/* === Compute col pointers ========================================= */
@@ -1053,13 +1723,11 @@ PRIVATE int init_rows_cols /* returns status code */
A [(p [*rp++])++] = row ;
}
}
- return (1) ;
- }
- else
- {
- /* no columns jumbled (this is faster) */
- return (0) ;
}
+
+ /* === Done. Matrix is not (or no longer) jumbled ====================== */
+
+ return (TRUE) ;
}
@@ -1078,8 +1746,8 @@ PRIVATE void init_scoring
int n_row, /* number of rows of A */
int n_col, /* number of columns of A */
- RowInfo Row [], /* of size n_row+1 */
- ColInfo Col [], /* of size n_col+1 */
+ Colamd_Row Row [], /* of size n_row+1 */
+ Colamd_Col Col [], /* of size n_col+1 */
int A [], /* column form and row form of A */
int head [], /* of size n_col+1 */
double knobs [COLAMD_KNOBS],/* parameters */
@@ -1093,7 +1761,7 @@ PRIVATE void init_scoring
int c ; /* a column index */
int r, row ; /* a row index */
int *cp ; /* a column pointer */
- int deg ; /* degree (# entries) of a row or column */
+ int deg ; /* degree of a row or column */
int *cp_end ; /* a pointer to the end of a column */
int *new_cp ; /* new column pointer */
int col_length ; /* length of pruned column */
@@ -1105,22 +1773,23 @@ PRIVATE void init_scoring
int min_score ; /* smallest column score */
int max_deg ; /* maximum row degree */
int next_col ; /* Used to add to degree list.*/
+
#ifndef NDEBUG
int debug_count ; /* debug only. */
-#endif
+#endif /* NDEBUG */
/* === Extract knobs ==================================================== */
dense_row_count = MAX (0, MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ;
dense_col_count = MAX (0, MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ;
- DEBUG0 (("densecount: %d %d\n", dense_row_count, dense_col_count)) ;
+ DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ;
max_deg = 0 ;
n_col2 = n_col ;
n_row2 = n_row ;
/* === Kill empty columns =============================================== */
- /* Put the empty columns at the end in their natural, so that LU */
+ /* Put the empty columns at the end in their natural order, so that LU */
/* factorization can proceed as far as possible. */
for (c = n_col-1 ; c >= 0 ; c--)
{
@@ -1132,7 +1801,7 @@ PRIVATE void init_scoring
KILL_PRINCIPAL_COL (c) ;
}
}
- DEBUG0 (("null columns killed: %d\n", n_col - n_col2)) ;
+ DEBUG1 (("colamd: null columns killed: %d\n", n_col - n_col2)) ;
/* === Kill dense columns =============================================== */
@@ -1159,14 +1828,14 @@ PRIVATE void init_scoring
KILL_PRINCIPAL_COL (c) ;
}
}
- DEBUG0 (("Dense and null columns killed: %d\n", n_col - n_col2)) ;
+ DEBUG1 (("colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ;
/* === Kill dense and empty rows ======================================== */
for (r = 0 ; r < n_row ; r++)
{
deg = Row [r].shared1.degree ;
- assert (deg >= 0 && deg <= n_col) ;
+ ASSERT (deg >= 0 && deg <= n_col) ;
if (deg > dense_row_count || deg == 0)
{
/* kill a dense or empty row */
@@ -1179,7 +1848,7 @@ PRIVATE void init_scoring
max_deg = MAX (max_deg, deg) ;
}
}
- DEBUG0 (("Dense and null rows killed: %d\n", n_row - n_row2)) ;
+ DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ;
/* === Compute initial column scores ==================================== */
@@ -1222,20 +1891,21 @@ PRIVATE void init_scoring
{
/* a newly-made null column (all rows in this col are "dense" */
/* and have already been killed) */
- DEBUG0 (("Newly null killed: %d\n", c)) ;
+ DEBUG2 (("Newly null killed: %d\n", c)) ;
Col [c].shared2.order = --n_col2 ;
KILL_PRINCIPAL_COL (c) ;
}
else
{
/* set column length and set score */
- assert (score >= 0) ;
- assert (score <= n_col) ;
+ ASSERT (score >= 0) ;
+ ASSERT (score <= n_col) ;
Col [c].length = col_length ;
Col [c].shared2.score = score ;
}
}
- DEBUG0 (("Dense, null, and newly-null columns killed: %d\n",n_col-n_col2)) ;
+ DEBUG1 (("colamd: Dense, null, and newly-null columns killed: %d\n",
+ n_col-n_col2)) ;
/* At this point, all empty rows and columns are dead. All live columns */
/* are "clean" (containing no dead rows) and simplicial (no supercolumns */
@@ -1244,13 +1914,13 @@ PRIVATE void init_scoring
#ifndef NDEBUG
debug_structures (n_row, n_col, Row, Col, A, n_col2) ;
-#endif
+#endif /* NDEBUG */
/* === Initialize degree lists ========================================== */
#ifndef NDEBUG
debug_count = 0 ;
-#endif
+#endif /* NDEBUG */
/* clear the hash buckets */
for (c = 0 ; c <= n_col ; c++)
@@ -1272,11 +1942,11 @@ PRIVATE void init_scoring
score = Col [c].shared2.score ;
- assert (min_score >= 0) ;
- assert (min_score <= n_col) ;
- assert (score >= 0) ;
- assert (score <= n_col) ;
- assert (head [score] >= EMPTY) ;
+ ASSERT (min_score >= 0) ;
+ ASSERT (min_score <= n_col) ;
+ ASSERT (score >= 0) ;
+ ASSERT (score <= n_col) ;
+ ASSERT (head [score] >= EMPTY) ;
/* now add this column to dList at proper score location */
next_col = head [score] ;
@@ -1296,16 +1966,17 @@ PRIVATE void init_scoring
#ifndef NDEBUG
debug_count++ ;
-#endif
+#endif /* NDEBUG */
+
}
}
#ifndef NDEBUG
- DEBUG0 (("Live cols %d out of %d, non-princ: %d\n",
+ DEBUG1 (("colamd: Live cols %d out of %d, non-princ: %d\n",
debug_count, n_col, n_col-debug_count)) ;
- assert (debug_count == n_col2) ;
+ ASSERT (debug_count == n_col2) ;
debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2, max_deg) ;
-#endif
+#endif /* NDEBUG */
/* === Return number of remaining columns, and max row degree =========== */
@@ -1331,9 +2002,9 @@ PRIVATE int find_ordering /* return the number of garbage collections */
int n_row, /* number of rows of A */
int n_col, /* number of columns of A */
- int Alen, /* size of A, 2*nnz + elbow_room or larger */
- RowInfo Row [], /* of size n_row+1 */
- ColInfo Col [], /* of size n_col+1 */
+ int Alen, /* size of A, 2*nnz + n_col or larger */
+ Colamd_Row Row [], /* of size n_row+1 */
+ Colamd_Col Col [], /* of size n_col+1 */
int A [], /* column form and row form of A */
int head [], /* of size n_col+1 */
int n_col2, /* Remaining columns to order */
@@ -1351,8 +2022,8 @@ PRIVATE int find_ordering /* return the number of garbage collections */
int *new_cp ; /* modified column pointer */
int *new_rp ; /* modified row pointer */
int pivot_row_start ; /* pointer to start of pivot row */
- int pivot_row_degree ; /* # of columns in pivot row */
- int pivot_row_length ; /* # of supercolumns in pivot row */
+ int pivot_row_degree ; /* number of columns in pivot row */
+ int pivot_row_length ; /* number of supercolumns in pivot row */
int pivot_col_score ; /* score of pivot column */
int needed_memory ; /* free space needed for pivot row */
int *cp_end ; /* pointer to the end of a column */
@@ -1368,16 +2039,17 @@ PRIVATE int find_ordering /* return the number of garbage collections */
int row_mark ; /* Row [row].shared2.mark */
int set_difference ; /* set difference size of row with pivot row */
int min_score ; /* smallest column score */
- int col_thickness ; /* "thickness" (# of columns in a supercol) */
+ int col_thickness ; /* "thickness" (no. of columns in a supercol) */
int max_mark ; /* maximum value of tag_mark */
int pivot_col_thickness ; /* number of columns represented by pivot col */
int prev_col ; /* Used by Dlist operations. */
int next_col ; /* Used by Dlist operations. */
int ngarbage ; /* number of garbage collections performed */
+
#ifndef NDEBUG
int debug_d ; /* debug loop counter */
int debug_step = 0 ; /* debug loop counter */
-#endif
+#endif /* NDEBUG */
/* === Initialization and clear mark ==================================== */
@@ -1385,7 +2057,7 @@ PRIVATE int find_ordering /* return the number of garbage collections */
tag_mark = clear_mark (n_row, Row) ;
min_score = 0 ;
ngarbage = 0 ;
- DEBUG0 (("Ordering.. n_col2=%d\n", n_col2)) ;
+ DEBUG1 (("colamd: Ordering, n_col2=%d\n", n_col2)) ;
/* === Order the columns ================================================ */
@@ -1395,31 +2067,31 @@ PRIVATE int find_ordering /* return the number of garbage collections */
#ifndef NDEBUG
if (debug_step % 100 == 0)
{
- DEBUG0 (("\n... Step k: %d out of n_col2: %d\n", k, n_col2)) ;
+ DEBUG2 (("\n... Step k: %d out of n_col2: %d\n", k, n_col2)) ;
}
else
{
- DEBUG1 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ;
+ DEBUG3 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ;
}
debug_step++ ;
debug_deg_lists (n_row, n_col, Row, Col, head,
min_score, n_col2-k, max_deg) ;
debug_matrix (n_row, n_col, Row, Col, A) ;
-#endif
+#endif /* NDEBUG */
/* === Select pivot column, and order it ============================ */
/* make sure degree list isn't empty */
- assert (min_score >= 0) ;
- assert (min_score <= n_col) ;
- assert (head [min_score] >= EMPTY) ;
+ ASSERT (min_score >= 0) ;
+ ASSERT (min_score <= n_col) ;
+ ASSERT (head [min_score] >= EMPTY) ;
#ifndef NDEBUG
for (debug_d = 0 ; debug_d < min_score ; debug_d++)
{
- assert (head [debug_d] == EMPTY) ;
+ ASSERT (head [debug_d] == EMPTY) ;
}
-#endif
+#endif /* NDEBUG */
/* get pivot column from head of minimum degree list */
while (head [min_score] == EMPTY && min_score < n_col)
@@ -1427,7 +2099,7 @@ PRIVATE int find_ordering /* return the number of garbage collections */
min_score++ ;
}
pivot_col = head [min_score] ;
- assert (pivot_col >= 0 && pivot_col <= n_col) ;
+ ASSERT (pivot_col >= 0 && pivot_col <= n_col) ;
next_col = Col [pivot_col].shared4.degree_next ;
head [min_score] = next_col ;
if (next_col != EMPTY)
@@ -1435,7 +2107,7 @@ PRIVATE int find_ordering /* return the number of garbage collections */
Col [next_col].shared3.prev = EMPTY ;
}
- assert (COL_IS_ALIVE (pivot_col)) ;
+ ASSERT (COL_IS_ALIVE (pivot_col)) ;
DEBUG3 (("Pivot col: %d\n", pivot_col)) ;
/* remember score for defrag check */
@@ -1447,7 +2119,7 @@ PRIVATE int find_ordering /* return the number of garbage collections */
/* increment order count by column thickness */
pivot_col_thickness = Col [pivot_col].shared1.thickness ;
k += pivot_col_thickness ;
- assert (pivot_col_thickness > 0) ;
+ ASSERT (pivot_col_thickness > 0) ;
/* === Garbage_collection, if necessary ============================= */
@@ -1457,12 +2129,13 @@ PRIVATE int find_ordering /* return the number of garbage collections */
pfree = garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
ngarbage++ ;
/* after garbage collection we will have enough */
- assert (pfree + needed_memory < Alen) ;
+ ASSERT (pfree + needed_memory < Alen) ;
/* garbage collection has wiped out the Row[].shared2.mark array */
tag_mark = clear_mark (n_row, Row) ;
+
#ifndef NDEBUG
debug_matrix (n_row, n_col, Row, Col, A) ;
-#endif
+#endif /* NDEBUG */
}
/* === Compute pivot row pattern ==================================== */
@@ -1502,7 +2175,7 @@ PRIVATE int find_ordering /* return the number of garbage collections */
{
/* tag column in pivot row */
Col [col].shared1.thickness = -col_thickness ;
- assert (pfree < Alen) ;
+ ASSERT (pfree < Alen) ;
/* place column in pivot row */
A [pfree++] = col ;
pivot_row_degree += col_thickness ;
@@ -1517,7 +2190,7 @@ PRIVATE int find_ordering /* return the number of garbage collections */
#ifndef NDEBUG
DEBUG3 (("check2\n")) ;
debug_mark (n_row, Row, tag_mark, max_mark) ;
-#endif
+#endif /* NDEBUG */
/* === Kill all rows used to construct pivot row ==================== */
@@ -1528,7 +2201,7 @@ PRIVATE int find_ordering /* return the number of garbage collections */
{
/* may be killing an already dead row */
row = *cp++ ;
- DEBUG2 (("Kill row in pivot col: %d\n", row)) ;
+ DEBUG3 (("Kill row in pivot col: %d\n", row)) ;
KILL_ROW (row) ;
}
@@ -1539,15 +2212,15 @@ PRIVATE int find_ordering /* return the number of garbage collections */
{
/* pick the "pivot" row arbitrarily (first row in col) */
pivot_row = A [Col [pivot_col].start] ;
- DEBUG2 (("Pivotal row is %d\n", pivot_row)) ;
+ DEBUG3 (("Pivotal row is %d\n", pivot_row)) ;
}
else
{
/* there is no pivot row, since it is of zero length */
pivot_row = EMPTY ;
- assert (pivot_row_length == 0) ;
+ ASSERT (pivot_row_length == 0) ;
}
- assert (Col [pivot_col].length > 0 || pivot_row_length == 0) ;
+ ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ;
/* === Approximate degree computation =============================== */
@@ -1570,23 +2243,23 @@ PRIVATE int find_ordering /* return the number of garbage collections */
/* === Compute set differences ====================================== */
- DEBUG1 (("** Computing set differences phase. **\n")) ;
+ DEBUG3 (("** Computing set differences phase. **\n")) ;
/* pivot row is currently dead - it will be revived later. */
- DEBUG2 (("Pivot row: ")) ;
+ DEBUG3 (("Pivot row: ")) ;
/* for each column in pivot row */
rp = &A [pivot_row_start] ;
rp_end = rp + pivot_row_length ;
while (rp < rp_end)
{
col = *rp++ ;
- assert (COL_IS_ALIVE (col) && col != pivot_col) ;
- DEBUG2 (("Col: %d\n", col)) ;
+ ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ;
+ DEBUG3 (("Col: %d\n", col)) ;
/* clear tags used to construct pivot row pattern */
col_thickness = -Col [col].shared1.thickness ;
- assert (col_thickness > 0) ;
+ ASSERT (col_thickness > 0) ;
Col [col].shared1.thickness = col_thickness ;
/* === Remove column from degree list =========================== */
@@ -1594,9 +2267,9 @@ PRIVATE int find_ordering /* return the number of garbage collections */
cur_score = Col [col].shared2.score ;
prev_col = Col [col].shared3.prev ;
next_col = Col [col].shared4.degree_next ;
- assert (cur_score >= 0) ;
- assert (cur_score <= n_col) ;
- assert (cur_score >= EMPTY) ;
+ ASSERT (cur_score >= 0) ;
+ ASSERT (cur_score <= n_col) ;
+ ASSERT (cur_score >= EMPTY) ;
if (prev_col == EMPTY)
{
head [cur_score] = next_col ;
@@ -1624,21 +2297,21 @@ PRIVATE int find_ordering /* return the number of garbage collections */
{
continue ;
}
- assert (row != pivot_row) ;
+ ASSERT (row != pivot_row) ;
set_difference = row_mark - tag_mark ;
/* check if the row has been seen yet */
if (set_difference < 0)
{
- assert (Row [row].shared1.degree <= max_deg) ;
+ ASSERT (Row [row].shared1.degree <= max_deg) ;
set_difference = Row [row].shared1.degree ;
}
/* subtract column thickness from this row's set difference */
set_difference -= col_thickness ;
- assert (set_difference >= 0) ;
+ ASSERT (set_difference >= 0) ;
/* absorb this row if the set difference becomes zero */
if (set_difference == 0)
{
- DEBUG1 (("aggressive absorption. Row: %d\n", row)) ;
+ DEBUG3 (("aggressive absorption. Row: %d\n", row)) ;
KILL_ROW (row) ;
}
else
@@ -1652,11 +2325,11 @@ PRIVATE int find_ordering /* return the number of garbage collections */
#ifndef NDEBUG
debug_deg_lists (n_row, n_col, Row, Col, head,
min_score, n_col2-k-pivot_row_degree, max_deg) ;
-#endif
+#endif /* NDEBUG */
/* === Add up set differences for each column ======================= */
- DEBUG1 (("** Adding set differences phase. **\n")) ;
+ DEBUG3 (("** Adding set differences phase. **\n")) ;
/* for each column in pivot row */
rp = &A [pivot_row_start] ;
@@ -1665,7 +2338,7 @@ PRIVATE int find_ordering /* return the number of garbage collections */
{
/* get a column */
col = *rp++ ;
- assert (COL_IS_ALIVE (col) && col != pivot_col) ;
+ ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ;
hash = 0 ;
cur_score = 0 ;
cp = &A [Col [col].start] ;
@@ -1673,20 +2346,20 @@ PRIVATE int find_ordering /* return the number of garbage collections */
new_cp = cp ;
cp_end = cp + Col [col].length ;
- DEBUG2 (("Adding set diffs for Col: %d.\n", col)) ;
+ DEBUG4 (("Adding set diffs for Col: %d.\n", col)) ;
while (cp < cp_end)
{
/* get a row */
row = *cp++ ;
- assert(row >= 0 && row < n_row) ;
+ ASSERT(row >= 0 && row < n_row) ;
row_mark = Row [row].shared2.mark ;
/* skip if dead */
if (ROW_IS_MARKED_DEAD (row_mark))
{
continue ;
}
- assert (row_mark > tag_mark) ;
+ ASSERT (row_mark > tag_mark) ;
/* compact the column */
*new_cp++ = row ;
/* compute hash function */
@@ -1704,11 +2377,11 @@ PRIVATE int find_ordering /* return the number of garbage collections */
if (Col [col].length == 0)
{
- DEBUG1 (("further mass elimination. Col: %d\n", col)) ;
+ DEBUG4 (("further mass elimination. Col: %d\n", col)) ;
/* nothing left but the pivot row in this column */
KILL_PRINCIPAL_COL (col) ;
pivot_row_degree -= Col [col].shared1.thickness ;
- assert (pivot_row_degree >= 0) ;
+ ASSERT (pivot_row_degree >= 0) ;
/* order it */
Col [col].shared2.order = k ;
/* increment order count by column thickness */
@@ -1718,7 +2391,7 @@ PRIVATE int find_ordering /* return the number of garbage collections */
{
/* === Prepare for supercolumn detection ==================== */
- DEBUG2 (("Preparing supercol detection for Col: %d.\n", col)) ;
+ DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ;
/* save score so far */
Col [col].shared2.score = cur_score ;
@@ -1726,8 +2399,8 @@ PRIVATE int find_ordering /* return the number of garbage collections */
/* add column to hash table, for supercolumn detection */
hash %= n_col + 1 ;
- DEBUG2 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ;
- assert (hash <= n_col) ;
+ DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ;
+ ASSERT (hash <= n_col) ;
head_column = head [hash] ;
if (head_column > EMPTY)
@@ -1747,7 +2420,7 @@ PRIVATE int find_ordering /* return the number of garbage collections */
/* save hash function in Col [col].shared3.hash */
Col [col].shared3.hash = (int) hash ;
- assert (COL_IS_ALIVE (col)) ;
+ ASSERT (COL_IS_ALIVE (col)) ;
}
}
@@ -1755,12 +2428,14 @@ PRIVATE int find_ordering /* return the number of garbage collections */
/* === Supercolumn detection ======================================== */
- DEBUG1 (("** Supercolumn detection phase. **\n")) ;
+ DEBUG3 (("** Supercolumn detection phase. **\n")) ;
detect_super_cols (
+
#ifndef NDEBUG
n_col, Row,
-#endif
+#endif /* NDEBUG */
+
Col, A, head, pivot_row_start, pivot_row_length) ;
/* === Kill the pivotal column ====================================== */
@@ -1772,17 +2447,18 @@ PRIVATE int find_ordering /* return the number of garbage collections */
tag_mark += (max_deg + 1) ;
if (tag_mark >= max_mark)
{
- DEBUG1 (("clearing tag_mark\n")) ;
+ DEBUG2 (("clearing tag_mark\n")) ;
tag_mark = clear_mark (n_row, Row) ;
}
+
#ifndef NDEBUG
DEBUG3 (("check3\n")) ;
debug_mark (n_row, Row, tag_mark, max_mark) ;
-#endif
+#endif /* NDEBUG */
/* === Finalize the new pivot row, and column scores ================ */
- DEBUG1 (("** Finalize scores phase. **\n")) ;
+ DEBUG3 (("** Finalize scores phase. **\n")) ;
/* for each column in pivot row */
rp = &A [pivot_row_start] ;
@@ -1816,18 +2492,18 @@ PRIVATE int find_ordering /* return the number of garbage collections */
/* make sure score is less or equal than the max score */
cur_score = MIN (cur_score, max_score) ;
- assert (cur_score >= 0) ;
+ ASSERT (cur_score >= 0) ;
/* store updated score */
Col [col].shared2.score = cur_score ;
/* === Place column back in degree list ========================= */
- assert (min_score >= 0) ;
- assert (min_score <= n_col) ;
- assert (cur_score >= 0) ;
- assert (cur_score <= n_col) ;
- assert (head [cur_score] >= EMPTY) ;
+ ASSERT (min_score >= 0) ;
+ ASSERT (min_score <= n_col) ;
+ ASSERT (cur_score >= 0) ;
+ ASSERT (cur_score <= n_col) ;
+ ASSERT (head [cur_score] >= EMPTY) ;
next_col = head [cur_score] ;
Col [col].shared4.degree_next = next_col ;
Col [col].shared3.prev = EMPTY ;
@@ -1845,7 +2521,7 @@ PRIVATE int find_ordering /* return the number of garbage collections */
#ifndef NDEBUG
debug_deg_lists (n_row, n_col, Row, Col, head,
min_score, n_col2-k, max_deg) ;
-#endif
+#endif /* NDEBUG */
/* === Resurrect the new pivot row ================================== */
@@ -1889,7 +2565,7 @@ PRIVATE void order_children
/* === Parameters ======================================================= */
int n_col, /* number of columns of A */
- ColInfo Col [], /* of size n_col+1 */
+ Colamd_Col Col [], /* of size n_col+1 */
int p [] /* p [0 ... n_col-1] is the column permutation*/
)
{
@@ -1905,7 +2581,7 @@ PRIVATE void order_children
for (i = 0 ; i < n_col ; i++)
{
/* find an un-ordered non-principal column */
- assert (COL_IS_DEAD (i)) ;
+ ASSERT (COL_IS_DEAD (i)) ;
if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == EMPTY)
{
parent = i ;
@@ -1923,7 +2599,7 @@ PRIVATE void order_children
do
{
- assert (Col [c].shared2.order == EMPTY) ;
+ ASSERT (Col [c].shared2.order == EMPTY) ;
/* order this column */
Col [c].shared2.order = order++ ;
@@ -1992,9 +2668,10 @@ PRIVATE void detect_super_cols
#ifndef NDEBUG
/* these two parameters are only needed when debugging is enabled: */
int n_col, /* number of columns of A */
- RowInfo Row [], /* of size n_row+1 */
-#endif
- ColInfo Col [], /* of size n_col+1 */
+ Colamd_Row Row [], /* of size n_row+1 */
+#endif /* NDEBUG */
+
+ Colamd_Col Col [], /* of size n_col+1 */
int A [], /* row indices of A */
int head [], /* head of degree lists and hash buckets */
int row_start, /* pointer to set of columns to check */
@@ -2003,7 +2680,7 @@ PRIVATE void detect_super_cols
{
/* === Local variables ================================================== */
- int hash ; /* hash # for a column */
+ int hash ; /* hash value for a column */
int *rp ; /* pointer to a row */
int c ; /* a column index */
int super_c ; /* column index of the column to absorb into */
@@ -2031,7 +2708,7 @@ PRIVATE void detect_super_cols
/* get hash number for this column */
hash = Col [col].shared3.hash ;
- assert (hash <= n_col) ;
+ ASSERT (hash <= n_col) ;
/* === Get the first column in this hash bucket ===================== */
@@ -2050,8 +2727,8 @@ PRIVATE void detect_super_cols
for (super_c = first_col ; super_c != EMPTY ;
super_c = Col [super_c].shared4.hash_next)
{
- assert (COL_IS_ALIVE (super_c)) ;
- assert (Col [super_c].shared3.hash == hash) ;
+ ASSERT (COL_IS_ALIVE (super_c)) ;
+ ASSERT (Col [super_c].shared3.hash == hash) ;
length = Col [super_c].length ;
/* prev_c is the column preceding column c in the hash bucket */
@@ -2062,9 +2739,9 @@ PRIVATE void detect_super_cols
for (c = Col [super_c].shared4.hash_next ;
c != EMPTY ; c = Col [c].shared4.hash_next)
{
- assert (c != super_c) ;
- assert (COL_IS_ALIVE (c)) ;
- assert (Col [c].shared3.hash == hash) ;
+ ASSERT (c != super_c) ;
+ ASSERT (COL_IS_ALIVE (c)) ;
+ ASSERT (Col [c].shared3.hash == hash) ;
/* not identical if lengths or scores are different */
if (Col [c].length != length ||
@@ -2081,8 +2758,8 @@ PRIVATE void detect_super_cols
for (i = 0 ; i < length ; i++)
{
/* the columns are "clean" (no dead rows) */
- assert (ROW_IS_ALIVE (*cp1)) ;
- assert (ROW_IS_ALIVE (*cp2)) ;
+ ASSERT (ROW_IS_ALIVE (*cp1)) ;
+ ASSERT (ROW_IS_ALIVE (*cp2)) ;
/* row indices will same order for both supercols, */
/* no gather scatter nessasary */
if (*cp1++ != *cp2++)
@@ -2100,7 +2777,7 @@ PRIVATE void detect_super_cols
/* === Got it! two columns are identical =================== */
- assert (Col [c].shared2.score == Col [super_c].shared2.score) ;
+ ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ;
Col [super_c].shared1.thickness += Col [c].shared1.thickness ;
Col [c].shared1.parent = super_c ;
@@ -2147,8 +2824,8 @@ PRIVATE int garbage_collection /* returns the new value of pfree */
int n_row, /* number of rows */
int n_col, /* number of columns */
- RowInfo Row [], /* row info */
- ColInfo Col [], /* column info */
+ Colamd_Row Row [], /* row info */
+ Colamd_Col Col [], /* column info */
int A [], /* A [0 ... Alen-1] holds the matrix */
int *pfree /* &A [0] ... pfree is in use */
)
@@ -2164,10 +2841,10 @@ PRIVATE int garbage_collection /* returns the new value of pfree */
#ifndef NDEBUG
int debug_rows ;
- DEBUG0 (("Defrag..\n")) ;
- for (psrc = &A[0] ; psrc < pfree ; psrc++) assert (*psrc >= 0) ;
+ DEBUG2 (("Defrag..\n")) ;
+ for (psrc = &A[0] ; psrc < pfree ; psrc++) ASSERT (*psrc >= 0) ;
debug_rows = 0 ;
-#endif
+#endif /* NDEBUG */
/* === Defragment the columns =========================================== */
@@ -2179,7 +2856,7 @@ PRIVATE int garbage_collection /* returns the new value of pfree */
psrc = &A [Col [c].start] ;
/* move and compact the column */
- assert (pdest <= psrc) ;
+ ASSERT (pdest <= psrc) ;
Col [c].start = (int) (pdest - &A [0]) ;
length = Col [c].length ;
for (j = 0 ; j < length ; j++)
@@ -2203,7 +2880,7 @@ PRIVATE int garbage_collection /* returns the new value of pfree */
if (Row [r].length == 0)
{
/* this row is of zero length. cannot compact it, so kill it */
- DEBUG0 (("Defrag row kill\n")) ;
+ DEBUG3 (("Defrag row kill\n")) ;
KILL_ROW (r) ;
}
else
@@ -2211,12 +2888,14 @@ PRIVATE int garbage_collection /* returns the new value of pfree */
/* save first column index in Row [r].shared2.first_column */
psrc = &A [Row [r].start] ;
Row [r].shared2.first_column = *psrc ;
- assert (ROW_IS_ALIVE (r)) ;
+ ASSERT (ROW_IS_ALIVE (r)) ;
/* flag the start of the row with the one's complement of row */
*psrc = ONES_COMPLEMENT (r) ;
+
#ifndef NDEBUG
debug_rows++ ;
-#endif
+#endif /* NDEBUG */
+
}
}
}
@@ -2232,13 +2911,13 @@ PRIVATE int garbage_collection /* returns the new value of pfree */
psrc-- ;
/* get the row index */
r = ONES_COMPLEMENT (*psrc) ;
- assert (r >= 0 && r < n_row) ;
+ ASSERT (r >= 0 && r < n_row) ;
/* restore first column index */
*psrc = Row [r].shared2.first_column ;
- assert (ROW_IS_ALIVE (r)) ;
+ ASSERT (ROW_IS_ALIVE (r)) ;
/* move and compact the row */
- assert (pdest <= psrc) ;
+ ASSERT (pdest <= psrc) ;
Row [r].start = (int) (pdest - &A [0]) ;
length = Row [r].length ;
for (j = 0 ; j < length ; j++)
@@ -2250,13 +2929,15 @@ PRIVATE int garbage_collection /* returns the new value of pfree */
}
}
Row [r].length = (int) (pdest - &A [Row [r].start]) ;
+
#ifndef NDEBUG
debug_rows-- ;
-#endif
+#endif /* NDEBUG */
+
}
}
/* ensure we found all the rows */
- assert (debug_rows == 0) ;
+ ASSERT (debug_rows == 0) ;
/* === Return the new value of pfree ==================================== */
@@ -2278,14 +2959,13 @@ PRIVATE int clear_mark /* return the new value for tag_mark */
/* === Parameters ======================================================= */
int n_row, /* number of rows in A */
- RowInfo Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */
+ Colamd_Row Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */
)
{
/* === Local variables ================================================== */
int r ;
- DEBUG0 (("Clear mark\n")) ;
for (r = 0 ; r < n_row ; r++)
{
if (ROW_IS_ALIVE (r))
@@ -2298,7 +2978,139 @@ PRIVATE int clear_mark /* return the new value for tag_mark */
/* ========================================================================== */
-/* === debugging routines =================================================== */
+/* === print_report ========================================================= */
+/* ========================================================================== */
+
+PRIVATE void print_report
+(
+ char *method,
+ int stats [COLAMD_STATS]
+)
+{
+
+ int i1, i2, i3 ;
+
+ if (!stats)
+ {
+ PRINTF ("%s: No statistics available.\n", method) ;
+ return ;
+ }
+
+ i1 = stats [COLAMD_INFO1] ;
+ i2 = stats [COLAMD_INFO2] ;
+ i3 = stats [COLAMD_INFO3] ;
+
+ if (stats [COLAMD_STATUS] >= 0)
+ {
+ PRINTF ("%s: OK. ", method) ;
+ }
+ else
+ {
+ PRINTF ("%s: ERROR. ", method) ;
+ }
+
+ switch (stats [COLAMD_STATUS])
+ {
+
+ case COLAMD_OK_BUT_JUMBLED:
+
+ PRINTF ("Matrix has unsorted or duplicate row indices.\n") ;
+
+ PRINTF ("%s: number of duplicate or out-of-order row indices: %d\n",
+ method, i3) ;
+
+ PRINTF ("%s: last seen duplicate or out-of-order row index: %d\n",
+ method, INDEX (i2)) ;
+
+ PRINTF ("%s: last seen in column: %d",
+ method, INDEX (i1)) ;
+
+ /* no break - fall through to next case instead */
+
+ case COLAMD_OK:
+
+ PRINTF ("\n") ;
+
+ PRINTF ("%s: number of dense or empty rows ignored: %d\n",
+ method, stats [COLAMD_DENSE_ROW]) ;
+
+ PRINTF ("%s: number of dense or empty columns ignored: %d\n",
+ method, stats [COLAMD_DENSE_COL]) ;
+
+ PRINTF ("%s: number of garbage collections performed: %d\n",
+ method, stats [COLAMD_DEFRAG_COUNT]) ;
+ break ;
+
+ case COLAMD_ERROR_A_not_present:
+
+ PRINTF ("Array A (row indices of matrix) not present.\n") ;
+ break ;
+
+ case COLAMD_ERROR_p_not_present:
+
+ PRINTF ("Array p (column pointers for matrix) not present.\n") ;
+ break ;
+
+ case COLAMD_ERROR_nrow_negative:
+
+ PRINTF ("Invalid number of rows (%d).\n", i1) ;
+ break ;
+
+ case COLAMD_ERROR_ncol_negative:
+
+ PRINTF ("Invalid number of columns (%d).\n", i1) ;
+ break ;
+
+ case COLAMD_ERROR_nnz_negative:
+
+ PRINTF ("Invalid number of nonzero entries (%d).\n", i1) ;
+ break ;
+
+ case COLAMD_ERROR_p0_nonzero:
+
+ PRINTF ("Invalid column pointer, p [0] = %d, must be zero.\n", i1) ;
+ break ;
+
+ case COLAMD_ERROR_A_too_small:
+
+ PRINTF ("Array A too small.\n") ;
+ PRINTF (" Need Alen >= %d, but given only Alen = %d.\n",
+ i1, i2) ;
+ break ;
+
+ case COLAMD_ERROR_col_length_negative:
+
+ PRINTF
+ ("Column %d has a negative number of nonzero entries (%d).\n",
+ INDEX (i1), i2) ;
+ break ;
+
+ case COLAMD_ERROR_row_index_out_of_bounds:
+
+ PRINTF
+ ("Row index (row %d) out of bounds (%d to %d) in column %d.\n",
+ INDEX (i2), INDEX (0), INDEX (i3-1), INDEX (i1)) ;
+ break ;
+
+ case COLAMD_ERROR_out_of_memory:
+
+ PRINTF ("Out of memory.\n") ;
+ break ;
+
+ case COLAMD_ERROR_internal_error:
+
+ /* if this happens, there is a bug in the code */
+ PRINTF
+ ("Internal error! Please contact authors (davis at cise.ufl.edu).\n") ;
+ break ;
+ }
+}
+
+
+
+
+/* ========================================================================== */
+/* === colamd debugging routines ============================================ */
/* ========================================================================== */
/* When debugging is disabled, the remainder of this file is ignored. */
@@ -2323,8 +3135,8 @@ PRIVATE void debug_structures
int n_row,
int n_col,
- RowInfo Row [],
- ColInfo Col [],
+ Colamd_Row Row [],
+ Colamd_Col Col [],
int A [],
int n_col2
)
@@ -2351,21 +3163,21 @@ PRIVATE void debug_structures
len = Col [c].length ;
score = Col [c].shared2.score ;
DEBUG4 (("initial live col %5d %5d %5d\n", c, len, score)) ;
- assert (len > 0) ;
- assert (score >= 0) ;
- assert (Col [c].shared1.thickness == 1) ;
+ ASSERT (len > 0) ;
+ ASSERT (score >= 0) ;
+ ASSERT (Col [c].shared1.thickness == 1) ;
cp = &A [Col [c].start] ;
cp_end = cp + len ;
while (cp < cp_end)
{
r = *cp++ ;
- assert (ROW_IS_ALIVE (r)) ;
+ ASSERT (ROW_IS_ALIVE (r)) ;
}
}
else
{
i = Col [c].shared2.order ;
- assert (i >= n_col2 && i < n_col) ;
+ ASSERT (i >= n_col2 && i < n_col) ;
}
}
@@ -2376,8 +3188,8 @@ PRIVATE void debug_structures
i = 0 ;
len = Row [r].length ;
deg = Row [r].shared1.degree ;
- assert (len > 0) ;
- assert (deg > 0) ;
+ ASSERT (len > 0) ;
+ ASSERT (deg > 0) ;
rp = &A [Row [r].start] ;
rp_end = rp + len ;
while (rp < rp_end)
@@ -2388,7 +3200,7 @@ PRIVATE void debug_structures
i++ ;
}
}
- assert (i > 0) ;
+ ASSERT (i > 0) ;
}
}
}
@@ -2410,8 +3222,8 @@ PRIVATE void debug_deg_lists
int n_row,
int n_col,
- RowInfo Row [],
- ColInfo Col [],
+ Colamd_Row Row [],
+ Colamd_Col Col [],
int head [],
int min_score,
int should,
@@ -2427,7 +3239,7 @@ PRIVATE void debug_deg_lists
/* === Check the degree lists =========================================== */
- if (n_col > 10000 && debug_colamd <= 0)
+ if (n_col > 10000 && colamd_debug <= 0)
{
return ;
}
@@ -2445,17 +3257,17 @@ PRIVATE void debug_deg_lists
{
DEBUG4 ((" %d", col)) ;
have += Col [col].shared1.thickness ;
- assert (COL_IS_ALIVE (col)) ;
+ ASSERT (COL_IS_ALIVE (col)) ;
col = Col [col].shared4.degree_next ;
}
DEBUG4 (("\n")) ;
}
DEBUG4 (("should %d have %d\n", should, have)) ;
- assert (should == have) ;
+ ASSERT (should == have) ;
/* === Check the row degrees ============================================ */
- if (n_row > 10000 && debug_colamd <= 0)
+ if (n_row > 10000 && colamd_debug <= 0)
{
return ;
}
@@ -2463,7 +3275,7 @@ PRIVATE void debug_deg_lists
{
if (ROW_IS_ALIVE (row))
{
- assert (Row [row].shared1.degree <= max_deg) ;
+ ASSERT (Row [row].shared1.degree <= max_deg) ;
}
}
}
@@ -2483,7 +3295,7 @@ PRIVATE void debug_mark
/* === Parameters ======================================================= */
int n_row,
- RowInfo Row [],
+ Colamd_Row Row [],
int tag_mark,
int max_mark
)
@@ -2494,14 +3306,14 @@ PRIVATE void debug_mark
/* === Check the Row marks ============================================== */
- assert (tag_mark > 0 && tag_mark <= max_mark) ;
- if (n_row > 10000 && debug_colamd <= 0)
+ ASSERT (tag_mark > 0 && tag_mark <= max_mark) ;
+ if (n_row > 10000 && colamd_debug <= 0)
{
return ;
}
for (r = 0 ; r < n_row ; r++)
{
- assert (Row [r].shared2.mark < tag_mark) ;
+ ASSERT (Row [r].shared2.mark < tag_mark) ;
}
}
@@ -2520,8 +3332,8 @@ PRIVATE void debug_matrix
int n_row,
int n_col,
- RowInfo Row [],
- ColInfo Col [],
+ Colamd_Row Row [],
+ Colamd_Col Col [],
int A []
)
{
@@ -2536,7 +3348,7 @@ PRIVATE void debug_matrix
/* === Dump the rows and columns of the matrix ========================== */
- if (debug_colamd < 3)
+ if (colamd_debug < 3)
{
return ;
}
@@ -2555,7 +3367,7 @@ PRIVATE void debug_matrix
while (rp < rp_end)
{
c = *rp++ ;
- DEBUG3 ((" %d col %d\n", COL_IS_ALIVE (c), c)) ;
+ DEBUG4 ((" %d col %d\n", COL_IS_ALIVE (c), c)) ;
}
}
@@ -2574,10 +3386,27 @@ PRIVATE void debug_matrix
while (cp < cp_end)
{
r = *cp++ ;
- DEBUG3 ((" %d row %d\n", ROW_IS_ALIVE (r), r)) ;
+ DEBUG4 ((" %d row %d\n", ROW_IS_ALIVE (r), r)) ;
}
}
}
-#endif
+PRIVATE void colamd_get_debug
+(
+ char *method
+)
+{
+ colamd_debug = 0 ; /* no debug printing */
+
+ /* get "D" environment variable, which gives the debug printing level */
+ if (getenv ("D"))
+ {
+ colamd_debug = atoi (getenv ("D")) ;
+ }
+
+ DEBUG0 (("%s: debug version, D = %d (THIS WILL BE SLOW!)\n",
+ method, colamd_debug)) ;
+}
+
+#endif /* NDEBUG */
diff --git a/SRC/colamd.h b/SRC/colamd.h
index 0078398..6e30662 100644
--- a/SRC/colamd.h
+++ b/SRC/colamd.h
@@ -1,49 +1,200 @@
/* ========================================================================== */
-/* === colamd prototypes and definitions ==================================== */
+/* === colamd/symamd prototypes and definitions ============================= */
/* ========================================================================== */
/*
- This is the colamd include file,
+ You must include this file (colamd.h) in any routine that uses colamd,
+ symamd, or the related macros and definitions.
- http://www.cise.ufl.edu/~davis/colamd/colamd.h
+ Authors:
- for use in the colamd.c, colamdmex.c, and symamdmex.c files located at
+ The authors of the code itself are Stefan I. Larimore and Timothy A.
+ Davis (davis at cise.ufl.edu), University of Florida. The algorithm was
+ developed in collaboration with John Gilbert, Xerox PARC, and Esmond
+ Ng, Oak Ridge National Laboratory.
- http://www.cise.ufl.edu/~davis/colamd/
+ Date:
- See those files for a description of colamd and symamd, and for the
- copyright notice, which also applies to this file.
+ September 8, 2003. Version 2.3.
+
+ Acknowledgements:
+
+ This work was supported by the National Science Foundation, under
+ grants DMS-9504974 and DMS-9803599.
+
+ Notice:
+
+ Copyright (c) 1998-2003 by the University of Florida.
+ All Rights Reserved.
+
+ THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+ EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
+
+ Permission is hereby granted to use, copy, modify, and/or distribute
+ this program, provided that the Copyright, this License, and the
+ Availability of the original version is retained on all copies and made
+ accessible to the end-user of any code or package that includes COLAMD
+ or any modified version of COLAMD.
+
+ Availability:
+
+ The colamd/symamd library is available at
+
+ http://www.cise.ufl.edu/research/sparse/colamd/
+
+ This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h
+ file. It is required by the colamd.c, colamdmex.c, and symamdmex.c
+ files, and by any C code that calls the routines whose prototypes are
+ listed below, or that uses the colamd/symamd definitions listed below.
- August 3, 1998. Version 1.0.
*/
+#ifndef COLAMD_H
+#define COLAMD_H
+
+/* ========================================================================== */
+/* === Include files ======================================================== */
+/* ========================================================================== */
+
+#include <stdlib.h>
+
/* ========================================================================== */
-/* === Definitions ========================================================== */
+/* === Knob and statistics definitions ====================================== */
/* ========================================================================== */
/* size of the knobs [ ] array. Only knobs [0..1] are currently used. */
#define COLAMD_KNOBS 20
-/* number of output statistics. Only A [0..2] are currently used. */
+/* number of output statistics. Only stats [0..6] are currently used. */
#define COLAMD_STATS 20
-/* knobs [0] and A [0]: dense row knob and output statistic. */
+/* knobs [0] and stats [0]: dense row knob and output statistic. */
#define COLAMD_DENSE_ROW 0
-/* knobs [1] and A [1]: dense column knob and output statistic. */
+/* knobs [1] and stats [1]: dense column knob and output statistic. */
#define COLAMD_DENSE_COL 1
-/* A [2]: memory defragmentation count output statistic */
+/* stats [2]: memory defragmentation count output statistic */
#define COLAMD_DEFRAG_COUNT 2
-/* A [3]: whether or not the input columns were jumbled or had duplicates */
-#define COLAMD_JUMBLED_COLS 3
+/* stats [3]: colamd status: zero OK, > 0 warning or notice, < 0 error */
+#define COLAMD_STATUS 3
+
+/* stats [4..6]: error info, or info on jumbled columns */
+#define COLAMD_INFO1 4
+#define COLAMD_INFO2 5
+#define COLAMD_INFO3 6
+
+/* error codes returned in stats [3]: */
+#define COLAMD_OK (0)
+#define COLAMD_OK_BUT_JUMBLED (1)
+#define COLAMD_ERROR_A_not_present (-1)
+#define COLAMD_ERROR_p_not_present (-2)
+#define COLAMD_ERROR_nrow_negative (-3)
+#define COLAMD_ERROR_ncol_negative (-4)
+#define COLAMD_ERROR_nnz_negative (-5)
+#define COLAMD_ERROR_p0_nonzero (-6)
+#define COLAMD_ERROR_A_too_small (-7)
+#define COLAMD_ERROR_col_length_negative (-8)
+#define COLAMD_ERROR_row_index_out_of_bounds (-9)
+#define COLAMD_ERROR_out_of_memory (-10)
+#define COLAMD_ERROR_internal_error (-999)
+
+/* ========================================================================== */
+/* === Row and Column structures ============================================ */
+/* ========================================================================== */
+
+/* User code that makes use of the colamd/symamd routines need not directly */
+/* reference these structures. They are used only for the COLAMD_RECOMMENDED */
+/* macro. */
+
+typedef struct Colamd_Col_struct
+{
+ int start ; /* index for A of first row in this column, or DEAD */
+ /* if column is dead */
+ int length ; /* number of rows in this column */
+ union
+ {
+ int thickness ; /* number of original columns represented by this */
+ /* col, if the column is alive */
+ int parent ; /* parent in parent tree super-column structure, if */
+ /* the column is dead */
+ } shared1 ;
+ union
+ {
+ int score ; /* the score used to maintain heap, if col is alive */
+ int order ; /* pivot ordering of this column, if col is dead */
+ } shared2 ;
+ union
+ {
+ int headhash ; /* head of a hash bucket, if col is at the head of */
+ /* a degree list */
+ int hash ; /* hash value, if col is not in a degree list */
+ int prev ; /* previous column in degree list, if col is in a */
+ /* degree list (but not at the head of a degree list) */
+ } shared3 ;
+ union
+ {
+ int degree_next ; /* next column, if col is in a degree list */
+ int hash_next ; /* next column, if col is in a hash list */
+ } shared4 ;
+
+} Colamd_Col ;
+
+typedef struct Colamd_Row_struct
+{
+ int start ; /* index for A of first col in this row */
+ int length ; /* number of principal columns in this row */
+ union
+ {
+ int degree ; /* number of principal & non-principal columns in row */
+ int p ; /* used as a row pointer in init_rows_cols () */
+ } shared1 ;
+ union
+ {
+ int mark ; /* for computing set differences and marking dead rows*/
+ int first_column ;/* first column in row (used in garbage collection) */
+ } shared2 ;
+
+} Colamd_Row ;
+
+/* ========================================================================== */
+/* === Colamd recommended memory size ======================================= */
+/* ========================================================================== */
+
+/*
+ The recommended length Alen of the array A passed to colamd is given by
+ the COLAMD_RECOMMENDED (nnz, n_row, n_col) macro. It returns -1 if any
+ argument is negative. 2*nnz space is required for the row and column
+ indices of the matrix. COLAMD_C (n_col) + COLAMD_R (n_row) space is
+ required for the Col and Row arrays, respectively, which are internal to
+ colamd. An additional n_col space is the minimal amount of "elbow room",
+ and nnz/5 more space is recommended for run time efficiency.
+
+ This macro is not needed when using symamd.
+
+ Explicit typecast to int added Sept. 23, 2002, COLAMD version 2.2, to avoid
+ gcc -pedantic warning messages.
+*/
+
+#define COLAMD_C(n_col) ((int) (((n_col) + 1) * sizeof (Colamd_Col) / sizeof (int)))
+#define COLAMD_R(n_row) ((int) (((n_row) + 1) * sizeof (Colamd_Row) / sizeof (int)))
+
+#define COLAMD_RECOMMENDED(nnz, n_row, n_col) \
+( \
+((nnz) < 0 || (n_row) < 0 || (n_col) < 0) \
+? \
+ (-1) \
+: \
+ (2 * (nnz) + COLAMD_C (n_col) + COLAMD_R (n_row) + (n_col) + ((nnz) / 5)) \
+)
/* ========================================================================== */
/* === Prototypes of user-callable routines ================================= */
/* ========================================================================== */
-int colamd_recommended /* returns recommended value of Alen */
+int colamd_recommended /* returns recommended value of Alen, */
+ /* or (-1) if input arguments are erroneous */
(
int nnz, /* nonzeros in A */
int n_row, /* number of rows in A */
@@ -55,13 +206,41 @@ void colamd_set_defaults /* sets default parameters */
double knobs [COLAMD_KNOBS] /* parameter settings for colamd */
) ;
-int colamd /* returns TRUE if successful, FALSE otherwise*/
+int colamd /* returns (1) if successful, (0) otherwise*/
( /* A and p arguments are modified on output */
int n_row, /* number of rows in A */
int n_col, /* number of columns in A */
int Alen, /* size of the array A */
int A [], /* row indices of A, of size Alen */
int p [], /* column pointers of A, of size n_col+1 */
- double knobs [COLAMD_KNOBS] /* parameter settings for colamd */
+ double knobs [COLAMD_KNOBS],/* parameter settings for colamd */
+ int stats [COLAMD_STATS] /* colamd output statistics and error codes */
+) ;
+
+int symamd /* return (1) if OK, (0) otherwise */
+(
+ int n, /* number of rows and columns of A */
+ int A [], /* row indices of A */
+ int p [], /* column pointers of A */
+ int perm [], /* output permutation, size n_col+1 */
+ double knobs [COLAMD_KNOBS], /* parameters (uses defaults if NULL) */
+ int stats [COLAMD_STATS], /* output statistics and error codes */
+ void * (*allocate) (size_t, size_t),
+ /* pointer to calloc (ANSI C) or */
+ /* mxCalloc (for MATLAB mexFunction) */
+ void (*release) (void *)
+ /* pointer to free (ANSI C) or */
+ /* mxFree (for MATLAB mexFunction) */
+) ;
+
+void colamd_report
+(
+ int stats [COLAMD_STATS]
+) ;
+
+void symamd_report
+(
+ int stats [COLAMD_STATS]
) ;
+#endif /* COLAMD_H */
diff --git a/SRC/cpanel_bmod.c b/SRC/cpanel_bmod.c
index d73f2c5..c1246a1 100644
--- a/SRC/cpanel_bmod.c
+++ b/SRC/cpanel_bmod.c
@@ -21,7 +21,7 @@
#include <stdio.h>
#include <stdlib.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
/*
* Function prototypes
diff --git a/SRC/cpanel_dfs.c b/SRC/cpanel_dfs.c
index 6343c0b..f20a8d2 100644
--- a/SRC/cpanel_dfs.c
+++ b/SRC/cpanel_dfs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
void
cpanel_dfs (
diff --git a/SRC/cpivotL.c b/SRC/cpivotL.c
index f4640b4..db24a0d 100644
--- a/SRC/cpivotL.c
+++ b/SRC/cpivotL.c
@@ -21,7 +21,7 @@
#include <math.h>
#include <stdlib.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
#undef DEBUG
diff --git a/SRC/cpivotgrowth.c b/SRC/cpivotgrowth.c
index a077daa..63bd7bf 100644
--- a/SRC/cpivotgrowth.c
+++ b/SRC/cpivotgrowth.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
#include <math.h>
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
float
cPivotGrowth(int ncols, SuperMatrix *A, int *perm_c,
diff --git a/SRC/cpruneL.c b/SRC/cpruneL.c
index 39d3005..29f20d4 100644
--- a/SRC/cpruneL.c
+++ b/SRC/cpruneL.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
void
cpruneL(
diff --git a/SRC/creadhb.c b/SRC/creadhb.c
index cc41b06..e437d27 100644
--- a/SRC/creadhb.c
+++ b/SRC/creadhb.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -9,7 +8,7 @@
*/
#include <stdio.h>
#include <stdlib.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
/* Eat up the rest of the current line */
diff --git a/SRC/csnode_bmod.c b/SRC/csnode_bmod.c
index e552240..041737f 100644
--- a/SRC/csnode_bmod.c
+++ b/SRC/csnode_bmod.c
@@ -19,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "csp_defs.h"
+#include "slu_cdefs.h"
/*
diff --git a/SRC/csnode_dfs.c b/SRC/csnode_dfs.c
index 4892c44..19fb10c 100644
--- a/SRC/csnode_dfs.c
+++ b/SRC/csnode_dfs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
int
csnode_dfs (
diff --git a/SRC/csp_blas2.c b/SRC/csp_blas2.c
index e18e2c1..d1a0a53 100644
--- a/SRC/csp_blas2.c
+++ b/SRC/csp_blas2.c
@@ -11,7 +11,7 @@
* Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
*/
-#include "csp_defs.h"
+#include "slu_cdefs.h"
/*
* Function prototypes
@@ -132,7 +132,8 @@ sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
luptr = L_NZ_START(fsupc);
nrow = nsupr - nsupc;
- solve_ops += 4 * nsupc * (nsupc - 1);
+ /* 1 c_div costs 10 flops */
+ solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc;
solve_ops += 8 * nrow * nsupc;
if ( nsupc == 1 ) {
@@ -185,7 +186,8 @@ sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
nsupc = L_FST_SUPC(k+1) - fsupc;
luptr = L_NZ_START(fsupc);
- solve_ops += 4 * nsupc * (nsupc + 1);
+ /* 1 c_div costs 10 flops */
+ solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
if ( nsupc == 1 ) {
c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
@@ -279,7 +281,8 @@ sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
}
}
- solve_ops += 4 * nsupc * (nsupc + 1);
+ /* 1 c_div costs 10 flops */
+ solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
if ( nsupc == 1 ) {
c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
@@ -358,7 +361,8 @@ sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
}
}
- solve_ops += 4 * nsupc * (nsupc + 1);
+ /* 1 c_div costs 10 flops */
+ solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
if ( nsupc == 1 ) {
cc_conj(&temp, &Lval[luptr]);
diff --git a/SRC/csp_blas2.c.bak b/SRC/csp_blas2.c.bak
deleted file mode 100644
index bc6cb28..0000000
--- a/SRC/csp_blas2.c.bak
+++ /dev/null
@@ -1,479 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
- * File name: csp_blas2.c
- * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-
-#include "csp_defs.h"
-
-/*
- * Function prototypes
- */
-void cusolve(int, int, complex*, complex*);
-void clsolve(int, int, complex*, complex*);
-void cmatvec(int, int, int, complex*, complex*, complex*);
-
-
-int
-sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
- SuperMatrix *U, complex *x, SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * sp_ctrsv() solves one of the systems of equations
- * A*x = b, or A'*x = b,
- * where b and x are n element vectors and A is a sparse unit , or
- * non-unit, upper or lower triangular matrix.
- * No test for singularity or near-singularity is included in this
- * routine. Such tests must be performed before calling this routine.
- *
- * Parameters
- * ==========
- *
- * uplo - (input) char*
- * On entry, uplo specifies whether the matrix is an upper or
- * lower triangular matrix as follows:
- * uplo = 'U' or 'u' A is an upper triangular matrix.
- * uplo = 'L' or 'l' A is a lower triangular matrix.
- *
- * trans - (input) char*
- * On entry, trans specifies the equations to be solved as
- * follows:
- * trans = 'N' or 'n' A*x = b.
- * trans = 'T' or 't' A'*x = b.
- * trans = 'C' or 'c' A'*x = b.
- *
- * diag - (input) char*
- * On entry, diag specifies whether or not A is unit
- * triangular as follows:
- * diag = 'U' or 'u' A is assumed to be unit triangular.
- * diag = 'N' or 'n' A is not assumed to be unit
- * triangular.
- *
- * L - (input) SuperMatrix*
- * The factor L from the factorization Pr*A*Pc=L*U. Use
- * compressed row subscripts storage for supernodes,
- * i.e., L has types: Stype = SC, Dtype = SLU_C, Mtype = TRLU.
- *
- * U - (input) SuperMatrix*
- * The factor U from the factorization Pr*A*Pc=L*U.
- * U has types: Stype = NC, Dtype = SLU_C, Mtype = TRU.
- *
- * x - (input/output) complex*
- * Before entry, the incremented array X must contain the n
- * element right-hand side vector b. On exit, X is overwritten
- * with the solution vector x.
- *
- * info - (output) int*
- * If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
- _fcd ftcs1 = _cptofcd("L", strlen("L")),
- ftcs2 = _cptofcd("N", strlen("N")),
- ftcs3 = _cptofcd("U", strlen("U"));
-#endif
- SCformat *Lstore;
- NCformat *Ustore;
- complex *Lval, *Uval;
- int incx = 1, incy = 1;
- complex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
- complex comp_zero = {0.0, 0.0};
- int nrow;
- int fsupc, nsupr, nsupc, luptr, istart, irow;
- int i, k, iptr, jcol;
- complex *work;
- flops_t solve_ops;
-
- /* Test the input parameters */
- *info = 0;
- if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
- else if ( !lsame_(trans, "N") && !lsame_(trans, "T") ) *info = -2;
- else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
- else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
- else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
- if ( *info ) {
- i = -(*info);
- xerbla_("sp_ctrsv", &i);
- return 0;
- }
-
- Lstore = L->Store;
- Lval = Lstore->nzval;
- Ustore = U->Store;
- Uval = Ustore->nzval;
- solve_ops = 0;
-
- if ( !(work = complexCalloc(L->nrow)) )
- ABORT("Malloc fails for work in sp_ctrsv().");
-
- if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */
-
- if ( lsame_(uplo, "L") ) {
- /* Form x := inv(L)*x */
- if ( L->nrow == 0 ) return 0; /* Quick return */
-
- for (k = 0; k <= Lstore->nsuper; k++) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
- nrow = nsupr - nsupc;
-
- solve_ops += 4 * nsupc * (nsupc - 1);
- solve_ops += 8 * nrow * nsupc;
-
- if ( nsupc == 1 ) {
- for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
- irow = L_SUB(iptr);
- ++luptr;
- cc_mult(&comp_zero, &x[fsupc], &Lval[luptr]);
- c_sub(&x[irow], &x[irow], &comp_zero);
- }
- } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-
- CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
- &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#else
- ctrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-
- cgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
- &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#endif
-#else
- clsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-
- cmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
- &x[fsupc], &work[0] );
-#endif
-
- iptr = istart + nsupc;
- for (i = 0; i < nrow; ++i, ++iptr) {
- irow = L_SUB(iptr);
- c_sub(&x[irow], &x[irow], &work[i]); /* Scatter */
- work[i] = comp_zero;
-
- }
- }
- } /* for k ... */
-
- } else {
- /* Form x := inv(U)*x */
-
- if ( U->nrow == 0 ) return 0; /* Quick return */
-
- for (k = Lstore->nsuper; k >= 0; k--) {
- fsupc = L_FST_SUPC(k);
- nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- solve_ops += 4 * nsupc * (nsupc + 1);
-
- if ( nsupc == 1 ) {
- c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
- for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
- irow = U_SUB(i);
- cc_mult(&comp_zero, &x[fsupc], &Uval[i]);
- c_sub(&x[irow], &x[irow], &comp_zero);
- }
- } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#else
- ctrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#endif
-#else
- cusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-#endif
-
- for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
- solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
- for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1);
- i++) {
- irow = U_SUB(i);
- cc_mult(&comp_zero, &x[jcol], &Uval[i]);
- c_sub(&x[irow], &x[irow], &comp_zero);
- }
- }
- }
- } /* for k ... */
-
- }
- } else { /* Form x := inv(A')*x */
-
- if ( lsame_(uplo, "L") ) {
- /* Form x := inv(L')*x */
- if ( L->nrow == 0 ) return 0; /* Quick return */
-
- for (k = Lstore->nsuper; k >= 0; --k) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- solve_ops += 8 * (nsupr - nsupc) * nsupc;
-
- for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
- iptr = istart + nsupc;
- for (i = L_NZ_START(jcol) + nsupc;
- i < L_NZ_START(jcol+1); i++) {
- irow = L_SUB(iptr);
- cc_mult(&comp_zero, &x[irow], &Lval[i]);
- c_sub(&x[jcol], &x[jcol], &comp_zero);
- iptr++;
- }
- }
-
- if ( nsupc > 1 ) {
- solve_ops += 4 * nsupc * (nsupc - 1);
-#ifdef _CRAY
- ftcs1 = _cptofcd("L", strlen("L"));
- ftcs2 = _cptofcd("T", strlen("T"));
- ftcs3 = _cptofcd("U", strlen("U"));
- CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#else
- ctrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#endif
- }
- }
- } else {
- /* Form x := inv(U')*x */
- if ( U->nrow == 0 ) return 0; /* Quick return */
-
- for (k = 0; k <= Lstore->nsuper; k++) {
- fsupc = L_FST_SUPC(k);
- nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
- solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
- for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
- irow = U_SUB(i);
- cc_mult(&comp_zero, &x[irow], &Uval[i]);
- c_sub(&x[jcol], &x[jcol], &comp_zero);
- }
- }
-
- solve_ops += 4 * nsupc * (nsupc + 1);
-
- if ( nsupc == 1 ) {
- c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
- } else {
-#ifdef _CRAY
- ftcs1 = _cptofcd("U", strlen("U"));
- ftcs2 = _cptofcd("T", strlen("T"));
- ftcs3 = _cptofcd("N", strlen("N"));
- CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#else
- ctrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#endif
- }
- } /* for k ... */
- }
- }
-
- stat->ops[SOLVE] += solve_ops;
- SUPERLU_FREE(work);
- return 0;
-}
-
-
-
-int
-sp_cgemv(char *trans, complex alpha, SuperMatrix *A, complex *x,
- int incx, complex beta, complex *y, int incy)
-{
-/* Purpose
- =======
-
- sp_cgemv() performs one of the matrix-vector operations
- y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
- where alpha and beta are scalars, x and y are vectors and A is a
- sparse A->nrow by A->ncol matrix.
-
- Parameters
- ==========
-
- TRANS - (input) char*
- On entry, TRANS specifies the operation to be performed as
- follows:
- TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
- TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
- TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
-
- ALPHA - (input) complex
- On entry, ALPHA specifies the scalar alpha.
-
- A - (input) SuperMatrix*
- Before entry, the leading m by n part of the array A must
- contain the matrix of coefficients.
-
- X - (input) complex*, array of DIMENSION at least
- ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
- and at least
- ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
- Before entry, the incremented array X must contain the
- vector x.
-
- INCX - (input) int
- On entry, INCX specifies the increment for the elements of
- X. INCX must not be zero.
-
- BETA - (input) complex
- On entry, BETA specifies the scalar beta. When BETA is
- supplied as zero then Y need not be set on input.
-
- Y - (output) complex*, array of DIMENSION at least
- ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
- and at least
- ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
- Before entry with BETA non-zero, the incremented array Y
- must contain the vector y. On exit, Y is overwritten by the
- updated vector y.
-
- INCY - (input) int
- On entry, INCY specifies the increment for the elements of
- Y. INCY must not be zero.
-
- ==== Sparse Level 2 Blas routine.
-*/
-
- /* Local variables */
- NCformat *Astore;
- complex *Aval;
- int info;
- complex temp, temp1;
- int lenx, leny, i, j, irow;
- int iy, jx, jy, kx, ky;
- int notran;
- complex comp_zero = {0.0, 0.0};
- complex comp_one = {1.0, 0.0};
-
- notran = lsame_(trans, "N");
- Astore = A->Store;
- Aval = Astore->nzval;
-
- /* Test the input parameters */
- info = 0;
- if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
- else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
- else if (incx == 0) info = 5;
- else if (incy == 0) info = 8;
- if (info != 0) {
- xerbla_("sp_cgemv ", &info);
- return 0;
- }
-
- /* Quick return if possible. */
- if (A->nrow == 0 || A->ncol == 0 ||
- c_eq(&alpha, &comp_zero) &&
- c_eq(&beta, &comp_one))
- return 0;
-
-
- /* Set LENX and LENY, the lengths of the vectors x and y, and set
- up the start points in X and Y. */
- if (lsame_(trans, "N")) {
- lenx = A->ncol;
- leny = A->nrow;
- } else {
- lenx = A->nrow;
- leny = A->ncol;
- }
- if (incx > 0) kx = 0;
- else kx = - (lenx - 1) * incx;
- if (incy > 0) ky = 0;
- else ky = - (leny - 1) * incy;
-
- /* Start the operations. In this version the elements of A are
- accessed sequentially with one pass through A. */
- /* First form y := beta*y. */
- if ( !c_eq(&beta, &comp_one) ) {
- if (incy == 1) {
- if ( c_eq(&beta, &comp_zero) )
- for (i = 0; i < leny; ++i) y[i] = comp_zero;
- else
- for (i = 0; i < leny; ++i)
- cc_mult(&y[i], &beta, &y[i]);
- } else {
- iy = ky;
- if ( c_eq(&beta, &comp_zero) )
- for (i = 0; i < leny; ++i) {
- y[iy] = comp_zero;
- iy += incy;
- }
- else
- for (i = 0; i < leny; ++i) {
- cc_mult(&y[iy], &beta, &y[iy]);
- iy += incy;
- }
- }
- }
-
- if ( c_eq(&alpha, &comp_zero) ) return 0;
-
- if ( notran ) {
- /* Form y := alpha*A*x + y. */
- jx = kx;
- if (incy == 1) {
- for (j = 0; j < A->ncol; ++j) {
- if ( !c_eq(&x[jx], &comp_zero) ) {
- cc_mult(&temp, &alpha, &x[jx]);
- for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
- irow = Astore->rowind[i];
- cc_mult(&temp1, &temp, &Aval[i]);
- c_add(&y[irow], &y[irow], &temp1);
- }
- }
- jx += incx;
- }
- } else {
- ABORT("Not implemented.");
- }
- } else {
- /* Form y := alpha*A'*x + y. */
- jy = ky;
- if (incx == 1) {
- for (j = 0; j < A->ncol; ++j) {
- temp = comp_zero;
- for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
- irow = Astore->rowind[i];
- cc_mult(&temp1, &Aval[i], &x[irow]);
- c_add(&temp, &temp, &temp1);
- }
- cc_mult(&temp1, &alpha, &temp);
- c_add(&y[jy], &y[jy], &temp1);
- jy += incy;
- }
- } else {
- ABORT("Not implemented.");
- }
- }
- return 0;
-} /* sp_cgemv */
-
diff --git a/SRC/csp_blas3.c b/SRC/csp_blas3.c
index 8dc9f5a..e11c11d 100644
--- a/SRC/csp_blas3.c
+++ b/SRC/csp_blas3.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
* Purpose: Sparse BLAS3, using some dense BLAS3 operations.
*/
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
int
sp_cgemm(char *transa, char *transb, int m, int n, int k,
diff --git a/SRC/cutil.c b/SRC/cutil.c
index 4c7f985..bb50965 100644
--- a/SRC/cutil.c
+++ b/SRC/cutil.c
@@ -20,7 +20,7 @@
*/
#include <math.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
void
cCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
@@ -240,7 +240,8 @@ cPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
for (j = c; j < c + nsup; ++j) {
d = Astore->nzval_colptr[j];
for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
- printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d++], dp[d++]);
+ printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d], dp[d+1]);
+ d += 2;
}
}
}
@@ -267,16 +268,19 @@ void
cPrint_Dense_Matrix(char *what, SuperMatrix *A)
{
DNformat *Astore;
- register int i;
+ register int i, j, lda = Astore->lda;
float *dp;
printf("\nDense matrix %s:\n", what);
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
Astore = (DNformat *) A->Store;
dp = (float *) Astore->nzval;
- printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
+ printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
printf("\nnzval: ");
- for (i = 0; i < 2*A->nrow; ++i) printf("%f ", dp[i]);
+ for (j = 0; j < A->ncol; ++j) {
+ for (i = 0; i < 2*A->nrow; ++i) printf("%f ", dp[i + j*2*lda]);
+ printf("\n");
+ }
printf("\n");
fflush(stdout);
}
diff --git a/SRC/dcolumn_bmod.c b/SRC/dcolumn_bmod.c
index 43fc18f..0ba9270 100644
--- a/SRC/dcolumn_bmod.c
+++ b/SRC/dcolumn_bmod.c
@@ -21,7 +21,7 @@
#include <stdio.h>
#include <stdlib.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
/*
* Function prototypes
diff --git a/SRC/dcolumn_dfs.c b/SRC/dcolumn_dfs.c
index 96e6222..c644ef7 100644
--- a/SRC/dcolumn_dfs.c
+++ b/SRC/dcolumn_dfs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
/* What type of supernodes we want */
#define T2_SUPER
diff --git a/SRC/dcomplex.c b/SRC/dcomplex.c
index 85d2be3..396c15c 100644
--- a/SRC/dcomplex.c
+++ b/SRC/dcomplex.c
@@ -10,8 +10,9 @@
* This file defines common arithmetic operations for complex type.
*/
#include <math.h>
+#include <stdlib.h>
#include <stdio.h>
-#include "dcomplex.h"
+#include "slu_dcomplex.h"
/* Complex Division c = a/b */
@@ -26,8 +27,8 @@ void z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b)
abi = - abi;
if( abr <= abi ) {
if (abi == 0) {
- fprintf(stderr, "z_div.c: division by zero");
- exit (-1);
+ fprintf(stderr, "z_div.c: division by zero\n");
+ exit(-1);
}
ratio = b->r / b->i ;
den = b->i * (1 + ratio*ratio);
diff --git a/SRC/dcopy_to_ucol.c b/SRC/dcopy_to_ucol.c
index 09670df..453e33a 100644
--- a/SRC/dcopy_to_ucol.c
+++ b/SRC/dcopy_to_ucol.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
int
dcopy_to_ucol(
diff --git a/SRC/dgscon.c b/SRC/dgscon.c
index 9c313b5..059da61 100644
--- a/SRC/dgscon.c
+++ b/SRC/dgscon.c
@@ -11,7 +11,7 @@
* History: Modified from lapack routines DGECON.
*/
#include <math.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
void
dgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/dgsequ.c b/SRC/dgsequ.c
index ac569b9..0daee10 100644
--- a/SRC/dgsequ.c
+++ b/SRC/dgsequ.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
* History: Modified from LAPACK routine DGEEQU
*/
#include <math.h>
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
void
dgsequ(SuperMatrix *A, double *r, double *c, double *rowcnd,
diff --git a/SRC/dgsrfs.c b/SRC/dgsrfs.c
index 922093b..a71cc38 100644
--- a/SRC/dgsrfs.c
+++ b/SRC/dgsrfs.c
@@ -11,7 +11,7 @@
* History: Modified from lapack routine DGERFS
*/
#include <math.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
void
dgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/dgssv.c b/SRC/dgssv.c
index e7725a4..99e84dd 100644
--- a/SRC/dgssv.c
+++ b/SRC/dgssv.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -7,7 +6,7 @@
* October 15, 2003
*
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
void
dgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
diff --git a/SRC/dgssvx.c b/SRC/dgssvx.c
index a4baab3..b7e1a1c 100644
--- a/SRC/dgssvx.c
+++ b/SRC/dgssvx.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
void
dgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
diff --git a/SRC/dgstrf.c b/SRC/dgstrf.c
index 5696cff..aba4c0b 100644
--- a/SRC/dgstrf.c
+++ b/SRC/dgstrf.c
@@ -19,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
void
dgstrf (superlu_options_t *options, SuperMatrix *A, double drop_tol,
@@ -182,8 +182,8 @@ dgstrf (superlu_options_t *options, SuperMatrix *A, double drop_tol,
*/
/* Local working arrays */
NCPformat *Astore;
- int *iperm_r; /* inverse of perm_r;
- used when options->Fact == SamePattern_SameRowPerm */
+ int *iperm_r = NULL; /* inverse of perm_r; used when
+ options->Fact == SamePattern_SameRowPerm */
int *iperm_c; /* inverse of perm_c */
int *iwork;
double *dwork;
diff --git a/SRC/dgstrs.c b/SRC/dgstrs.c
index 4807a51..04cb38d 100644
--- a/SRC/dgstrs.c
+++ b/SRC/dgstrs.c
@@ -19,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
/*
diff --git a/SRC/dgstrs.c.bak b/SRC/dgstrs.c.bak
deleted file mode 100644
index 04efbbd..0000000
--- a/SRC/dgstrs.c.bak
+++ /dev/null
@@ -1,334 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
- Copyright (c) 1994 by Xerox Corporation. All rights reserved.
-
- THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
- EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
-
- Permission is hereby granted to use or copy this program for any
- purpose, provided the above notices are retained on all copies.
- Permission to modify the code and to distribute modified code is
- granted, provided the above notices are retained, and a notice that
- the code was modified is included with the above copyright notice.
-*/
-
-#include "dsp_defs.h"
-
-
-/*
- * Function prototypes
- */
-void dusolve(int, int, double*, double*);
-void dlsolve(int, int, double*, double*);
-void dmatvec(int, int, int, double*, double*, double*);
-
-
-void
-dgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
- int *perm_c, int *perm_r, SuperMatrix *B,
- SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * DGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- * DGSTRF.
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans (input) trans_t
- * Specifies the form of the system of equations:
- * = NOTRANS: A * X = B (No transpose)
- * = TRANS: A'* X = B (Transpose)
- * = CONJ: A**H * X = B (Conjugate transpose)
- *
- * L (input) SuperMatrix*
- * The factor L from the factorization Pr*A*Pc=L*U as computed by
- * dgstrf(). Use compressed row subscripts storage for supernodes,
- * i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- *
- * U (input) SuperMatrix*
- * The factor U from the factorization Pr*A*Pc=L*U as computed by
- * dgstrf(). Use column-wise storage scheme, i.e., U has types:
- * Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
- *
- * perm_c (input) int*, dimension (L->ncol)
- * Column permutation vector, which defines the
- * permutation matrix Pc; perm_c[i] = j means column i of A is
- * in position j in A*Pc.
- *
- * perm_r (input) int*, dimension (L->nrow)
- * Row permutation vector, which defines the permutation matrix Pr;
- * perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B (input/output) SuperMatrix*
- * B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
- * On entry, the right hand side matrix.
- * On exit, the solution matrix if info = 0;
- *
- * stat (output) SuperLUStat_t*
- * Record the statistics on runtime and floating-point operation count.
- * See util.h for the definition of 'SuperLUStat_t'.
- *
- * info (output) int*
- * = 0: successful exit
- * < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
- _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-#endif
- int incx = 1, incy = 1;
-#ifdef USE_VENDOR_BLAS
- double alpha = 1.0, beta = 1.0;
- double *work_col;
-#endif
- DNformat *Bstore;
- double *Bmat;
- SCformat *Lstore;
- NCformat *Ustore;
- double *Lval, *Uval;
- int fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
- int i, j, k, iptr, jcol, n, ldb, nrhs;
- double *work, *rhs_work, *soln;
- flops_t solve_ops;
- void dprint_soln();
-
- /* Test input parameters ... */
- *info = 0;
- Bstore = B->Store;
- ldb = Bstore->lda;
- nrhs = B->ncol;
- if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
- else if ( L->nrow != L->ncol || L->nrow < 0 ||
- L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU )
- *info = -2;
- else if ( U->nrow != U->ncol || U->nrow < 0 ||
- U->Stype != SLU_NC || U->Dtype != SLU_D || U->Mtype != SLU_TRU )
- *info = -3;
- else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
- B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
- *info = -6;
- if ( *info ) {
- i = -(*info);
- xerbla_("dgstrs", &i);
- return;
- }
-
- n = L->nrow;
- work = doubleCalloc(n * nrhs);
- if ( !work ) ABORT("Malloc fails for local work[].");
- soln = doubleMalloc(n);
- if ( !soln ) ABORT("Malloc fails for local soln[].");
-
- Bmat = Bstore->nzval;
- Lstore = L->Store;
- Lval = Lstore->nzval;
- Ustore = U->Store;
- Uval = Ustore->nzval;
- solve_ops = 0;
-
- if ( trans == NOTRANS ) {
- /* Permute right hand sides to form Pr*B */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- /* Forward solve PLy=Pb. */
- for (k = 0; k <= Lstore->nsuper; k++) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- nrow = nsupr - nsupc;
-
- solve_ops += nsupc * (nsupc - 1) * nrhs;
- solve_ops += 2 * nrow * nsupc * nrhs;
-
- if ( nsupc == 1 ) {
- for (j = 0; j < nrhs; j++) {
- rhs_work = &Bmat[j*ldb];
- luptr = L_NZ_START(fsupc);
- for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
- irow = L_SUB(iptr);
- ++luptr;
- rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
- }
- }
- } else {
- luptr = L_NZ_START(fsupc);
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- ftcs1 = _cptofcd("L", strlen("L"));
- ftcs2 = _cptofcd("N", strlen("N"));
- ftcs3 = _cptofcd("U", strlen("U"));
- STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-
- SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha,
- &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
- &beta, &work[0], &n );
-#else
- dtrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-
- dgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha,
- &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
- &beta, &work[0], &n );
-#endif
- for (j = 0; j < nrhs; j++) {
- rhs_work = &Bmat[j*ldb];
- work_col = &work[j*n];
- iptr = istart + nsupc;
- for (i = 0; i < nrow; i++) {
- irow = L_SUB(iptr);
- rhs_work[irow] -= work_col[i]; /* Scatter */
- work_col[i] = 0.0;
- iptr++;
- }
- }
-#else
- for (j = 0; j < nrhs; j++) {
- rhs_work = &Bmat[j*ldb];
- dlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
- dmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
- &rhs_work[fsupc], &work[0] );
-
- iptr = istart + nsupc;
- for (i = 0; i < nrow; i++) {
- irow = L_SUB(iptr);
- rhs_work[irow] -= work[i];
- work[i] = 0.0;
- iptr++;
- }
- }
-#endif
- } /* else ... */
- } /* for L-solve */
-
-#ifdef DEBUG
- printf("After L-solve: y=\n");
- dprint_soln(n, nrhs, Bmat);
-#endif
-
- /*
- * Back solve Ux=y.
- */
- for (k = Lstore->nsuper; k >= 0; k--) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- solve_ops += nsupc * (nsupc + 1) * nrhs;
-
- if ( nsupc == 1 ) {
- rhs_work = &Bmat[0];
- for (j = 0; j < nrhs; j++) {
- rhs_work[fsupc] /= Lval[luptr];
- rhs_work += ldb;
- }
- } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- ftcs1 = _cptofcd("L", strlen("L"));
- ftcs2 = _cptofcd("U", strlen("U"));
- ftcs3 = _cptofcd("N", strlen("N"));
- STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#else
- dtrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#endif
-#else
- for (j = 0; j < nrhs; j++)
- dusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-#endif
- }
-
- for (j = 0; j < nrhs; ++j) {
- rhs_work = &Bmat[j*ldb];
- for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
- solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
- for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
- irow = U_SUB(i);
- rhs_work[irow] -= rhs_work[jcol] * Uval[i];
- }
- }
- }
-
- } /* for U-solve */
-
-#ifdef DEBUG
- printf("After U-solve: x=\n");
- dprint_soln(n, nrhs, Bmat);
-#endif
-
- /* Compute the final solution X := Pc*X. */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- stat->ops[SOLVE] = solve_ops;
-
- } else { /* Solve A'*X=B */
- /* Permute right hand sides to form Pc'*B. */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- stat->ops[SOLVE] = 0;
-
- for (k = 0; k < nrhs; ++k) {
-
- /* Multiply by inv(U'). */
- sp_dtrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-
- /* Multiply by inv(L'). */
- sp_dtrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-
- }
-
- /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- }
-
- SUPERLU_FREE(work);
- SUPERLU_FREE(soln);
-}
-
-/*
- * Diagnostic print of the solution vector
- */
-void
-dprint_soln(int n, int nrhs, double *soln)
-{
- int i;
-
- for (i = 0; i < n; i++)
- printf("\t%d: %.4f\n", i, soln[i]);
-}
diff --git a/SRC/dgstrsL.c b/SRC/dgstrsL.c
index e13b111..c7f20e5 100644
--- a/SRC/dgstrsL.c
+++ b/SRC/dgstrsL.c
@@ -20,8 +20,8 @@
the code was modified is included with the above copyright notice.
*/
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
+#include "slu_util.h"
/*
diff --git a/SRC/dlacon.c b/SRC/dlacon.c
index d5dd354..932b891 100644
--- a/SRC/dlacon.c
+++ b/SRC/dlacon.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,7 +7,7 @@
*
*/
#include <math.h>
-#include "Cnames.h"
+#include "slu_Cnames.h"
int
dlacon_(int *n, double *v, double *x, int *isgn, double *est, int *kase)
diff --git a/SRC/dlamch.c b/SRC/dlamch.c
index 12b41b4..53c1f90 100644
--- a/SRC/dlamch.c
+++ b/SRC/dlamch.c
@@ -1,4 +1,6 @@
#include <stdio.h>
+#include "slu_Cnames.h"
+
#define TRUE_ (1)
#define FALSE_ (0)
#define abs(x) ((x) >= 0 ? (x) : -(x))
diff --git a/SRC/dlangs.c b/SRC/dlangs.c
index 5a642ca..1dd5dfc 100644
--- a/SRC/dlangs.c
+++ b/SRC/dlangs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
* History: Modified from lapack routine DLANGE
*/
#include <math.h>
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
double dlangs(char *norm, SuperMatrix *A)
{
diff --git a/SRC/dlaqgs.c b/SRC/dlaqgs.c
index 4873a91..6a7a7b8 100644
--- a/SRC/dlaqgs.c
+++ b/SRC/dlaqgs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,7 +11,7 @@
* History: Modified from LAPACK routine DLAQGE
*/
#include <math.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
void
dlaqgs(SuperMatrix *A, double *r, double *c,
diff --git a/SRC/dmemory.c b/SRC/dmemory.c
index c2e24a6..c56cb10 100644
--- a/SRC/dmemory.c
+++ b/SRC/dmemory.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
/* Constants */
#define NO_MEMTYPE 4 /* 0: lusup;
@@ -193,9 +193,10 @@ dLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
dSetupSpace(work, lwork, &Glu->MemModel);
}
-#ifdef DEBUG
- printf("dLUMemInit() called: annz %d, MemModel %d\n",
- annz, Glu->MemModel);
+#if ( PRNTlevel >= 1 )
+ printf("dLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n",
+ FILL, nzlmax, nzumax);
+ fflush(stdout);
#endif
/* Integer pointers for L\U factors */
@@ -234,6 +235,11 @@ dLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
printf("Not enough memory to perform factorization.\n");
return (dmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
}
+#if ( PRNTlevel >= 1)
+ printf("dLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n",
+ nzlmax, nzumax);
+ fflush(stdout);
+#endif
lusup = (double *) dexpand( &nzlumax, LUSUP, 0, 0, Glu );
ucol = (double *) dexpand( &nzumax, UCOL, 0, 0, Glu );
lsub = (int *) dexpand( &nzlmax, LSUB, 0, 0, Glu );
@@ -476,8 +482,7 @@ void
else lword = sizeof(double);
if ( Glu->MemModel == SYSTEM ) {
- new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
-/* new_mem = (void *) calloc(new_len, lword); */
+ new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
if ( no_expand != 0 ) {
tries = 0;
if ( keep_prev ) {
@@ -487,8 +492,7 @@ void
if ( ++tries > 10 ) return (NULL);
alpha = Reduce(alpha);
new_len = alpha * *prev_len;
- new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
-/* new_mem = (void *) calloc(new_len, lword); */
+ new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
}
}
if ( type == LSUB || type == USUB ) {
@@ -641,7 +645,7 @@ dallocateA(int n, int nnz, double **a, int **asub, int **xa)
double *doubleMalloc(int n)
{
double *buf;
- buf = (double *) SUPERLU_MALLOC(n * sizeof(double));
+ buf = (double *) SUPERLU_MALLOC((size_t)n * sizeof(double));
if ( !buf ) {
ABORT("SUPERLU_MALLOC failed for buf in doubleMalloc()\n");
}
@@ -653,7 +657,7 @@ double *doubleCalloc(int n)
double *buf;
register int i;
double zero = 0.0;
- buf = (double *) SUPERLU_MALLOC(n * sizeof(double));
+ buf = (double *) SUPERLU_MALLOC((size_t)n * sizeof(double));
if ( !buf ) {
ABORT("SUPERLU_MALLOC failed for buf in doubleCalloc()\n");
}
diff --git a/SRC/dmyblas2.c b/SRC/dmyblas2.c
index e6bbdd1..e02660a 100644
--- a/SRC/dmyblas2.c
+++ b/SRC/dmyblas2.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
diff --git a/SRC/dpanel_bmod.c b/SRC/dpanel_bmod.c
index bdc8c77..0019fe8 100644
--- a/SRC/dpanel_bmod.c
+++ b/SRC/dpanel_bmod.c
@@ -21,7 +21,7 @@
#include <stdio.h>
#include <stdlib.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
/*
* Function prototypes
diff --git a/SRC/dpanel_dfs.c b/SRC/dpanel_dfs.c
index da2f18c..6a4c742 100644
--- a/SRC/dpanel_dfs.c
+++ b/SRC/dpanel_dfs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
void
dpanel_dfs (
diff --git a/SRC/dpivotL.c b/SRC/dpivotL.c
index 9263427..bf43e6a 100644
--- a/SRC/dpivotL.c
+++ b/SRC/dpivotL.c
@@ -21,7 +21,7 @@
#include <math.h>
#include <stdlib.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
#undef DEBUG
diff --git a/SRC/dpivotgrowth.c b/SRC/dpivotgrowth.c
index ac943c1..41924f7 100644
--- a/SRC/dpivotgrowth.c
+++ b/SRC/dpivotgrowth.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
#include <math.h>
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
double
dPivotGrowth(int ncols, SuperMatrix *A, int *perm_c,
diff --git a/SRC/dpruneL.c b/SRC/dpruneL.c
index 1e7d53d..c782ca1 100644
--- a/SRC/dpruneL.c
+++ b/SRC/dpruneL.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
void
dpruneL(
diff --git a/SRC/dreadhb.c b/SRC/dreadhb.c
index 44d6ced..b32a225 100644
--- a/SRC/dreadhb.c
+++ b/SRC/dreadhb.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -9,7 +8,7 @@
*/
#include <stdio.h>
#include <stdlib.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
/* Eat up the rest of the current line */
diff --git a/SRC/dsnode_bmod.c b/SRC/dsnode_bmod.c
index 3e259ac..ec06144 100644
--- a/SRC/dsnode_bmod.c
+++ b/SRC/dsnode_bmod.c
@@ -19,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
/*
diff --git a/SRC/dsnode_dfs.c b/SRC/dsnode_dfs.c
index aab16d7..3823e85 100644
--- a/SRC/dsnode_dfs.c
+++ b/SRC/dsnode_dfs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
int
dsnode_dfs (
diff --git a/SRC/dsp_blas2.c b/SRC/dsp_blas2.c
index 52162db..420c349 100644
--- a/SRC/dsp_blas2.c
+++ b/SRC/dsp_blas2.c
@@ -11,7 +11,7 @@
* Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
/*
* Function prototypes
diff --git a/SRC/dsp_blas2.c.bak b/SRC/dsp_blas2.c.bak
deleted file mode 100644
index 5133ec6..0000000
--- a/SRC/dsp_blas2.c.bak
+++ /dev/null
@@ -1,469 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
- * File name: dsp_blas2.c
- * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-
-#include "dsp_defs.h"
-
-/*
- * Function prototypes
- */
-void dusolve(int, int, double*, double*);
-void dlsolve(int, int, double*, double*);
-void dmatvec(int, int, int, double*, double*, double*);
-
-
-int
-sp_dtrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
- SuperMatrix *U, double *x, SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * sp_dtrsv() solves one of the systems of equations
- * A*x = b, or A'*x = b,
- * where b and x are n element vectors and A is a sparse unit , or
- * non-unit, upper or lower triangular matrix.
- * No test for singularity or near-singularity is included in this
- * routine. Such tests must be performed before calling this routine.
- *
- * Parameters
- * ==========
- *
- * uplo - (input) char*
- * On entry, uplo specifies whether the matrix is an upper or
- * lower triangular matrix as follows:
- * uplo = 'U' or 'u' A is an upper triangular matrix.
- * uplo = 'L' or 'l' A is a lower triangular matrix.
- *
- * trans - (input) char*
- * On entry, trans specifies the equations to be solved as
- * follows:
- * trans = 'N' or 'n' A*x = b.
- * trans = 'T' or 't' A'*x = b.
- * trans = 'C' or 'c' A'*x = b.
- *
- * diag - (input) char*
- * On entry, diag specifies whether or not A is unit
- * triangular as follows:
- * diag = 'U' or 'u' A is assumed to be unit triangular.
- * diag = 'N' or 'n' A is not assumed to be unit
- * triangular.
- *
- * L - (input) SuperMatrix*
- * The factor L from the factorization Pr*A*Pc=L*U. Use
- * compressed row subscripts storage for supernodes,
- * i.e., L has types: Stype = SC, Dtype = SLU_D, Mtype = TRLU.
- *
- * U - (input) SuperMatrix*
- * The factor U from the factorization Pr*A*Pc=L*U.
- * U has types: Stype = NC, Dtype = SLU_D, Mtype = TRU.
- *
- * x - (input/output) double*
- * Before entry, the incremented array X must contain the n
- * element right-hand side vector b. On exit, X is overwritten
- * with the solution vector x.
- *
- * info - (output) int*
- * If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
- _fcd ftcs1 = _cptofcd("L", strlen("L")),
- ftcs2 = _cptofcd("N", strlen("N")),
- ftcs3 = _cptofcd("U", strlen("U"));
-#endif
- SCformat *Lstore;
- NCformat *Ustore;
- double *Lval, *Uval;
- int incx = 1, incy = 1;
- double alpha = 1.0, beta = 1.0;
- int nrow;
- int fsupc, nsupr, nsupc, luptr, istart, irow;
- int i, k, iptr, jcol;
- double *work;
- flops_t solve_ops;
-
- /* Test the input parameters */
- *info = 0;
- if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
- else if ( !lsame_(trans, "N") && !lsame_(trans, "T") ) *info = -2;
- else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
- else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
- else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
- if ( *info ) {
- i = -(*info);
- xerbla_("sp_dtrsv", &i);
- return 0;
- }
-
- Lstore = L->Store;
- Lval = Lstore->nzval;
- Ustore = U->Store;
- Uval = Ustore->nzval;
- solve_ops = 0;
-
- if ( !(work = doubleCalloc(L->nrow)) )
- ABORT("Malloc fails for work in sp_dtrsv().");
-
- if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */
-
- if ( lsame_(uplo, "L") ) {
- /* Form x := inv(L)*x */
- if ( L->nrow == 0 ) return 0; /* Quick return */
-
- for (k = 0; k <= Lstore->nsuper; k++) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
- nrow = nsupr - nsupc;
-
- solve_ops += nsupc * (nsupc - 1);
- solve_ops += 2 * nrow * nsupc;
-
- if ( nsupc == 1 ) {
- for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
- irow = L_SUB(iptr);
- ++luptr;
- x[irow] -= x[fsupc] * Lval[luptr];
- }
- } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-
- SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
- &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#else
- dtrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-
- dgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
- &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#endif
-#else
- dlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-
- dmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
- &x[fsupc], &work[0] );
-#endif
-
- iptr = istart + nsupc;
- for (i = 0; i < nrow; ++i, ++iptr) {
- irow = L_SUB(iptr);
- x[irow] -= work[i]; /* Scatter */
- work[i] = 0.0;
-
- }
- }
- } /* for k ... */
-
- } else {
- /* Form x := inv(U)*x */
-
- if ( U->nrow == 0 ) return 0; /* Quick return */
-
- for (k = Lstore->nsuper; k >= 0; k--) {
- fsupc = L_FST_SUPC(k);
- nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- solve_ops += nsupc * (nsupc + 1);
-
- if ( nsupc == 1 ) {
- x[fsupc] /= Lval[luptr];
- for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
- irow = U_SUB(i);
- x[irow] -= x[fsupc] * Uval[i];
- }
- } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#else
- dtrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#endif
-#else
- dusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-#endif
-
- for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
- solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
- for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1);
- i++) {
- irow = U_SUB(i);
- x[irow] -= x[jcol] * Uval[i];
- }
- }
- }
- } /* for k ... */
-
- }
- } else { /* Form x := inv(A')*x */
-
- if ( lsame_(uplo, "L") ) {
- /* Form x := inv(L')*x */
- if ( L->nrow == 0 ) return 0; /* Quick return */
-
- for (k = Lstore->nsuper; k >= 0; --k) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- solve_ops += 2 * (nsupr - nsupc) * nsupc;
-
- for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
- iptr = istart + nsupc;
- for (i = L_NZ_START(jcol) + nsupc;
- i < L_NZ_START(jcol+1); i++) {
- irow = L_SUB(iptr);
- x[jcol] -= x[irow] * Lval[i];
- iptr++;
- }
- }
-
- if ( nsupc > 1 ) {
- solve_ops += nsupc * (nsupc - 1);
-#ifdef _CRAY
- ftcs1 = _cptofcd("L", strlen("L"));
- ftcs2 = _cptofcd("T", strlen("T"));
- ftcs3 = _cptofcd("U", strlen("U"));
- STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#else
- dtrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#endif
- }
- }
- } else {
- /* Form x := inv(U')*x */
- if ( U->nrow == 0 ) return 0; /* Quick return */
-
- for (k = 0; k <= Lstore->nsuper; k++) {
- fsupc = L_FST_SUPC(k);
- nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
- solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
- for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
- irow = U_SUB(i);
- x[jcol] -= x[irow] * Uval[i];
- }
- }
-
- solve_ops += nsupc * (nsupc + 1);
-
- if ( nsupc == 1 ) {
- x[fsupc] /= Lval[luptr];
- } else {
-#ifdef _CRAY
- ftcs1 = _cptofcd("U", strlen("U"));
- ftcs2 = _cptofcd("T", strlen("T"));
- ftcs3 = _cptofcd("N", strlen("N"));
- STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#else
- dtrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#endif
- }
- } /* for k ... */
- }
- }
-
- stat->ops[SOLVE] += solve_ops;
- SUPERLU_FREE(work);
- return 0;
-}
-
-
-
-
-int
-sp_dgemv(char *trans, double alpha, SuperMatrix *A, double *x,
- int incx, double beta, double *y, int incy)
-{
-/* Purpose
- =======
-
- sp_dgemv() performs one of the matrix-vector operations
- y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
- where alpha and beta are scalars, x and y are vectors and A is a
- sparse A->nrow by A->ncol matrix.
-
- Parameters
- ==========
-
- TRANS - (input) char*
- On entry, TRANS specifies the operation to be performed as
- follows:
- TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
- TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
- TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
-
- ALPHA - (input) double
- On entry, ALPHA specifies the scalar alpha.
-
- A - (input) SuperMatrix*
- Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
- Currently, the type of A can be:
- Stype = NC or NCP; Dtype = SLU_D; Mtype = GE.
- In the future, more general A can be handled.
-
- X - (input) double*, array of DIMENSION at least
- ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
- and at least
- ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
- Before entry, the incremented array X must contain the
- vector x.
-
- INCX - (input) int
- On entry, INCX specifies the increment for the elements of
- X. INCX must not be zero.
-
- BETA - (input) double
- On entry, BETA specifies the scalar beta. When BETA is
- supplied as zero then Y need not be set on input.
-
- Y - (output) double*, array of DIMENSION at least
- ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
- and at least
- ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
- Before entry with BETA non-zero, the incremented array Y
- must contain the vector y. On exit, Y is overwritten by the
- updated vector y.
-
- INCY - (input) int
- On entry, INCY specifies the increment for the elements of
- Y. INCY must not be zero.
-
- ==== Sparse Level 2 Blas routine.
-*/
-
- /* Local variables */
- NCformat *Astore;
- double *Aval;
- int info;
- double temp;
- int lenx, leny, i, j, irow;
- int iy, jx, jy, kx, ky;
- int notran;
-
- notran = lsame_(trans, "N");
- Astore = A->Store;
- Aval = Astore->nzval;
-
- /* Test the input parameters */
- info = 0;
- if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
- else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
- else if (incx == 0) info = 5;
- else if (incy == 0) info = 8;
- if (info != 0) {
- xerbla_("sp_dgemv ", &info);
- return 0;
- }
-
- /* Quick return if possible. */
- if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
- return 0;
-
- /* Set LENX and LENY, the lengths of the vectors x and y, and set
- up the start points in X and Y. */
- if (lsame_(trans, "N")) {
- lenx = A->ncol;
- leny = A->nrow;
- } else {
- lenx = A->nrow;
- leny = A->ncol;
- }
- if (incx > 0) kx = 0;
- else kx = - (lenx - 1) * incx;
- if (incy > 0) ky = 0;
- else ky = - (leny - 1) * incy;
-
- /* Start the operations. In this version the elements of A are
- accessed sequentially with one pass through A. */
- /* First form y := beta*y. */
- if (beta != 1.) {
- if (incy == 1) {
- if (beta == 0.)
- for (i = 0; i < leny; ++i) y[i] = 0.;
- else
- for (i = 0; i < leny; ++i) y[i] = beta * y[i];
- } else {
- iy = ky;
- if (beta == 0.)
- for (i = 0; i < leny; ++i) {
- y[iy] = 0.;
- iy += incy;
- }
- else
- for (i = 0; i < leny; ++i) {
- y[iy] = beta * y[iy];
- iy += incy;
- }
- }
- }
-
- if (alpha == 0.) return 0;
-
- if ( notran ) {
- /* Form y := alpha*A*x + y. */
- jx = kx;
- if (incy == 1) {
- for (j = 0; j < A->ncol; ++j) {
- if (x[jx] != 0.) {
- temp = alpha * x[jx];
- for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
- irow = Astore->rowind[i];
- y[irow] += temp * Aval[i];
- }
- }
- jx += incx;
- }
- } else {
- ABORT("Not implemented.");
- }
- } else {
- /* Form y := alpha*A'*x + y. */
- jy = ky;
- if (incx == 1) {
- for (j = 0; j < A->ncol; ++j) {
- temp = 0.;
- for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
- irow = Astore->rowind[i];
- temp += Aval[i] * x[irow];
- }
- y[jy] += alpha * temp;
- jy += incy;
- }
- } else {
- ABORT("Not implemented.");
- }
- }
- return 0;
-} /* sp_dgemv */
-
-
-
diff --git a/SRC/dsp_blas3.c b/SRC/dsp_blas3.c
index 7057b79..3aaf3c7 100644
--- a/SRC/dsp_blas3.c
+++ b/SRC/dsp_blas3.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
* Purpose: Sparse BLAS3, using some dense BLAS3 operations.
*/
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
int
sp_dgemm(char *transa, char *transb, int m, int n, int k,
diff --git a/SRC/dutil.c b/SRC/dutil.c
index f4221a8..6956c29 100644
--- a/SRC/dutil.c
+++ b/SRC/dutil.c
@@ -20,7 +20,7 @@
*/
#include <math.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
void
dCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
@@ -267,16 +267,19 @@ void
dPrint_Dense_Matrix(char *what, SuperMatrix *A)
{
DNformat *Astore;
- register int i;
+ register int i, j, lda = Astore->lda;
double *dp;
printf("\nDense matrix %s:\n", what);
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
Astore = (DNformat *) A->Store;
dp = (double *) Astore->nzval;
- printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
+ printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
printf("\nnzval: ");
- for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]);
+ for (j = 0; j < A->ncol; ++j) {
+ for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i + j*lda]);
+ printf("\n");
+ }
printf("\n");
fflush(stdout);
}
diff --git a/SRC/dzsum1.c b/SRC/dzsum1.c
index 7c00e5a..1f0c8a8 100644
--- a/SRC/dzsum1.c
+++ b/SRC/dzsum1.c
@@ -1,4 +1,5 @@
-#include "dcomplex.h"
+#include "slu_Cnames.h"
+#include "slu_dcomplex.h"
double dzsum1_(int *n, doublecomplex *cx, int *incx)
{
diff --git a/SRC/get_perm_c.c b/SRC/get_perm_c.c
index 19dbc7b..fa8fe6b 100644
--- a/SRC/get_perm_c.c
+++ b/SRC/get_perm_c.c
@@ -5,7 +5,7 @@
* November 15, 1997
*
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
#include "colamd.h"
extern int genmmd_(int *, int *, int *, int *, int *, int *, int *,
@@ -22,12 +22,11 @@ get_colamd(
)
{
int Alen, *A, i, info, *p;
- double *knobs;
+ double knobs[COLAMD_KNOBS];
+ int stats[COLAMD_STATS];
Alen = colamd_recommended(nnz, m, n);
- if ( !(knobs = (double *) SUPERLU_MALLOC(COLAMD_KNOBS * sizeof(double))) )
- ABORT("Malloc fails for knobs");
colamd_set_defaults(knobs);
if (!(A = (int *) SUPERLU_MALLOC(Alen * sizeof(int))) )
@@ -36,12 +35,11 @@ get_colamd(
ABORT("Malloc fails for p[]");
for (i = 0; i <= n; ++i) p[i] = colptr[i];
for (i = 0; i < nnz; ++i) A[i] = rowind[i];
- info = colamd(m, n, Alen, A, p, knobs);
+ info = colamd(m, n, Alen, A, p, knobs, stats);
if ( info == FALSE ) ABORT("COLAMD failed");
for (i = 0; i < n; ++i) perm_c[p[i]] = i;
- SUPERLU_FREE(knobs);
SUPERLU_FREE(A);
SUPERLU_FREE(p);
}
@@ -434,13 +432,12 @@ get_perm_c(int ispec, SuperMatrix *A, int *perm_c)
/* Transform perm_c into 0-based indexing. */
for (i = 0; i < n; ++i) --perm_c[i];
- SUPERLU_FREE(b_colptr);
- SUPERLU_FREE(b_rowind);
SUPERLU_FREE(invp);
SUPERLU_FREE(dhead);
SUPERLU_FREE(qsize);
SUPERLU_FREE(llist);
SUPERLU_FREE(marker);
+ SUPERLU_FREE(b_rowind);
t = SuperLU_timer_() - t;
/* printf("call GENMMD time = %8.3f\n", t);*/
@@ -449,4 +446,5 @@ get_perm_c(int ispec, SuperMatrix *A, int *perm_c)
for (i = 0; i < n; ++i) perm_c[i] = i;
}
+ SUPERLU_FREE(b_colptr);
}
diff --git a/SRC/heap_relax_snode.c b/SRC/heap_relax_snode.c
index f731b64..1a40e26 100644
--- a/SRC/heap_relax_snode.c
+++ b/SRC/heap_relax_snode.c
@@ -18,7 +18,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
void
heap_relax_snode (
diff --git a/SRC/icmax1.c b/SRC/icmax1.c
index b29d5ee..1e254b0 100644
--- a/SRC/icmax1.c
+++ b/SRC/icmax1.c
@@ -1,5 +1,6 @@
#include <math.h>
-#include "scomplex.h"
+#include "slu_scomplex.h"
+#include "slu_Cnames.h"
int icmax1_(int *n, complex *cx, int *incx)
{
diff --git a/SRC/izmax1.c b/SRC/izmax1.c
index c31bd66..1a73e1f 100644
--- a/SRC/izmax1.c
+++ b/SRC/izmax1.c
@@ -1,4 +1,6 @@
-#include "dcomplex.h"
+#include <math.h>
+#include "slu_Cnames.h"
+#include "slu_dcomplex.h"
int
izmax1_(int *n, doublecomplex *cx, int *incx)
@@ -60,17 +62,17 @@ izmax1_(int *n, doublecomplex *cx, int *incx)
/* CODE FOR INCREMENT NOT EQUAL TO 1 */
ix = 1;
- smax = (d__1 = CX(1).r, abs(d__1));
+ smax = (d__1 = CX(1).r, fabs(d__1));
ix += *incx;
i__1 = *n;
for (i = 2; i <= *n; ++i) {
i__2 = ix;
- if ((d__1 = CX(ix).r, abs(d__1)) <= smax) {
+ if ((d__1 = CX(ix).r, fabs(d__1)) <= smax) {
goto L10;
}
ret_val = i;
i__2 = ix;
- smax = (d__1 = CX(ix).r, abs(d__1));
+ smax = (d__1 = CX(ix).r, fabs(d__1));
L10:
ix += *incx;
/* L20: */
@@ -80,16 +82,16 @@ L10:
/* CODE FOR INCREMENT EQUAL TO 1 */
L30:
- smax = (d__1 = CX(1).r, abs(d__1));
+ smax = (d__1 = CX(1).r, fabs(d__1));
i__1 = *n;
for (i = 2; i <= *n; ++i) {
i__2 = i;
- if ((d__1 = CX(i).r, abs(d__1)) <= smax) {
+ if ((d__1 = CX(i).r, fabs(d__1)) <= smax) {
goto L40;
}
ret_val = i;
i__2 = i;
- smax = (d__1 = CX(i).r, abs(d__1));
+ smax = (d__1 = CX(i).r, fabs(d__1));
L40:
;
}
diff --git a/SRC/lsame.c b/SRC/lsame.c
index fba47c6..113c6d0 100644
--- a/SRC/lsame.c
+++ b/SRC/lsame.c
@@ -1,3 +1,5 @@
+#include "slu_Cnames.h"
+
int lsame_(char *ca, char *cb)
{
/* -- LAPACK auxiliary routine (version 2.0) --
diff --git a/SRC/memory.c b/SRC/memory.c
index c5e7831..25868f6 100644
--- a/SRC/memory.c
+++ b/SRC/memory.c
@@ -8,7 +8,7 @@
/** Precision-independent memory-related routines.
(Shared by [sdcz]memory.c) **/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
#if ( DEBUGlevel>=1 ) /* Debug malloc/free. */
@@ -16,6 +16,7 @@ int superlu_malloc_total = 0;
#define PAD_FACTOR 2
#define DWORD (sizeof(double)) /* Be sure it's no smaller than double. */
+/* size_t is usually defined as 'unsigned long' */
void *superlu_malloc(size_t size)
{
@@ -23,7 +24,7 @@ void *superlu_malloc(size_t size)
buf = (char *) malloc(size + DWORD);
if ( !buf ) {
- printf("superlu_malloc fails: malloc_total %.0f MB, size %d\n",
+ printf("superlu_malloc fails: malloc_total %.0f MB, size %ld\n",
superlu_malloc_total*1e-6, size);
ABORT("superlu_malloc: out of memory");
}
diff --git a/SRC/colamd.c b/SRC/old_colamd.c
similarity index 100%
copy from SRC/colamd.c
copy to SRC/old_colamd.c
diff --git a/SRC/colamd.h b/SRC/old_colamd.h
similarity index 100%
copy from SRC/colamd.h
copy to SRC/old_colamd.h
diff --git a/SRC/relax_snode.c b/SRC/relax_snode.c
index f2bc0e5..ef20127 100644
--- a/SRC/relax_snode.c
+++ b/SRC/relax_snode.c
@@ -18,7 +18,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
void
relax_snode (
diff --git a/SRC/scolumn_bmod.c b/SRC/scolumn_bmod.c
index 1914626..303b3d4 100644
--- a/SRC/scolumn_bmod.c
+++ b/SRC/scolumn_bmod.c
@@ -21,7 +21,7 @@
#include <stdio.h>
#include <stdlib.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
/*
* Function prototypes
diff --git a/SRC/scolumn_dfs.c b/SRC/scolumn_dfs.c
index c29f260..923b25d 100644
--- a/SRC/scolumn_dfs.c
+++ b/SRC/scolumn_dfs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
/* What type of supernodes we want */
#define T2_SUPER
diff --git a/SRC/scomplex.c b/SRC/scomplex.c
index 8cbbeea..d353281 100644
--- a/SRC/scomplex.c
+++ b/SRC/scomplex.c
@@ -10,8 +10,9 @@
* This file defines common arithmetic operations for complex type.
*/
#include <math.h>
+#include <stdlib.h>
#include <stdio.h>
-#include "scomplex.h"
+#include "slu_scomplex.h"
/* Complex Division c = a/b */
@@ -26,8 +27,8 @@ void c_div(complex *c, complex *a, complex *b)
abi = - abi;
if( abr <= abi ) {
if (abi == 0) {
- fprintf(stderr, "z_div.c: division by zero");
- exit (-1);
+ fprintf(stderr, "z_div.c: division by zero\n");
+ exit(-1);
}
ratio = b->r / b->i ;
den = b->i * (1 + ratio*ratio);
diff --git a/SRC/scopy_to_ucol.c b/SRC/scopy_to_ucol.c
index 99de989..daed16a 100644
--- a/SRC/scopy_to_ucol.c
+++ b/SRC/scopy_to_ucol.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
int
scopy_to_ucol(
diff --git a/SRC/scsum1.c b/SRC/scsum1.c
index 963ba21..5ab8fc2 100644
--- a/SRC/scsum1.c
+++ b/SRC/scsum1.c
@@ -1,4 +1,5 @@
-#include "scomplex.h"
+#include "slu_Cnames.h"
+#include "slu_scomplex.h"
double scsum1_(int *n, complex *cx, int *incx)
{
diff --git a/SRC/sgscon.c b/SRC/sgscon.c
index f000021..9080602 100644
--- a/SRC/sgscon.c
+++ b/SRC/sgscon.c
@@ -11,7 +11,7 @@
* History: Modified from lapack routines SGECON.
*/
#include <math.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
void
sgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/sgsequ.c b/SRC/sgsequ.c
index 47408b7..10a2ffc 100644
--- a/SRC/sgsequ.c
+++ b/SRC/sgsequ.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
* History: Modified from LAPACK routine SGEEQU
*/
#include <math.h>
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
void
sgsequ(SuperMatrix *A, float *r, float *c, float *rowcnd,
diff --git a/SRC/sgsrfs.c b/SRC/sgsrfs.c
index 42c2d98..9d03b04 100644
--- a/SRC/sgsrfs.c
+++ b/SRC/sgsrfs.c
@@ -11,7 +11,7 @@
* History: Modified from lapack routine SGERFS
*/
#include <math.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
void
sgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/sgssv.c b/SRC/sgssv.c
index 703f1bc..2e622bf 100644
--- a/SRC/sgssv.c
+++ b/SRC/sgssv.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -7,7 +6,7 @@
* October 15, 2003
*
*/
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
void
sgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
diff --git a/SRC/sgssvx.c b/SRC/sgssvx.c
index 7658789..f611b9f 100644
--- a/SRC/sgssvx.c
+++ b/SRC/sgssvx.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
void
sgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
diff --git a/SRC/sgstrf.c b/SRC/sgstrf.c
index 93894dc..b65f93d 100644
--- a/SRC/sgstrf.c
+++ b/SRC/sgstrf.c
@@ -19,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
void
sgstrf (superlu_options_t *options, SuperMatrix *A, float drop_tol,
@@ -182,8 +182,8 @@ sgstrf (superlu_options_t *options, SuperMatrix *A, float drop_tol,
*/
/* Local working arrays */
NCPformat *Astore;
- int *iperm_r; /* inverse of perm_r;
- used when options->Fact == SamePattern_SameRowPerm */
+ int *iperm_r = NULL; /* inverse of perm_r; used when
+ options->Fact == SamePattern_SameRowPerm */
int *iperm_c; /* inverse of perm_c */
int *iwork;
float *swork;
diff --git a/SRC/sgstrs.c b/SRC/sgstrs.c
index 3a72f5e..367e088 100644
--- a/SRC/sgstrs.c
+++ b/SRC/sgstrs.c
@@ -19,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
/*
diff --git a/SRC/sgstrs.c.bak b/SRC/sgstrs.c.bak
deleted file mode 100644
index f2977eb..0000000
--- a/SRC/sgstrs.c.bak
+++ /dev/null
@@ -1,334 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
- Copyright (c) 1994 by Xerox Corporation. All rights reserved.
-
- THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
- EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
-
- Permission is hereby granted to use or copy this program for any
- purpose, provided the above notices are retained on all copies.
- Permission to modify the code and to distribute modified code is
- granted, provided the above notices are retained, and a notice that
- the code was modified is included with the above copyright notice.
-*/
-
-#include "ssp_defs.h"
-
-
-/*
- * Function prototypes
- */
-void susolve(int, int, float*, float*);
-void slsolve(int, int, float*, float*);
-void smatvec(int, int, int, float*, float*, float*);
-
-
-void
-sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
- int *perm_c, int *perm_r, SuperMatrix *B,
- SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * SGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- * SGSTRF.
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans (input) trans_t
- * Specifies the form of the system of equations:
- * = NOTRANS: A * X = B (No transpose)
- * = TRANS: A'* X = B (Transpose)
- * = CONJ: A**H * X = B (Conjugate transpose)
- *
- * L (input) SuperMatrix*
- * The factor L from the factorization Pr*A*Pc=L*U as computed by
- * sgstrf(). Use compressed row subscripts storage for supernodes,
- * i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
- *
- * U (input) SuperMatrix*
- * The factor U from the factorization Pr*A*Pc=L*U as computed by
- * sgstrf(). Use column-wise storage scheme, i.e., U has types:
- * Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
- *
- * perm_c (input) int*, dimension (L->ncol)
- * Column permutation vector, which defines the
- * permutation matrix Pc; perm_c[i] = j means column i of A is
- * in position j in A*Pc.
- *
- * perm_r (input) int*, dimension (L->nrow)
- * Row permutation vector, which defines the permutation matrix Pr;
- * perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B (input/output) SuperMatrix*
- * B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
- * On entry, the right hand side matrix.
- * On exit, the solution matrix if info = 0;
- *
- * stat (output) SuperLUStat_t*
- * Record the statistics on runtime and floating-point operation count.
- * See util.h for the definition of 'SuperLUStat_t'.
- *
- * info (output) int*
- * = 0: successful exit
- * < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
- _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-#endif
- int incx = 1, incy = 1;
-#ifdef USE_VENDOR_BLAS
- float alpha = 1.0, beta = 1.0;
- float *work_col;
-#endif
- DNformat *Bstore;
- float *Bmat;
- SCformat *Lstore;
- NCformat *Ustore;
- float *Lval, *Uval;
- int fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
- int i, j, k, iptr, jcol, n, ldb, nrhs;
- float *work, *rhs_work, *soln;
- flops_t solve_ops;
- void sprint_soln();
-
- /* Test input parameters ... */
- *info = 0;
- Bstore = B->Store;
- ldb = Bstore->lda;
- nrhs = B->ncol;
- if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
- else if ( L->nrow != L->ncol || L->nrow < 0 ||
- L->Stype != SLU_SC || L->Dtype != SLU_S || L->Mtype != SLU_TRLU )
- *info = -2;
- else if ( U->nrow != U->ncol || U->nrow < 0 ||
- U->Stype != SLU_NC || U->Dtype != SLU_S || U->Mtype != SLU_TRU )
- *info = -3;
- else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
- B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
- *info = -6;
- if ( *info ) {
- i = -(*info);
- xerbla_("sgstrs", &i);
- return;
- }
-
- n = L->nrow;
- work = floatCalloc(n * nrhs);
- if ( !work ) ABORT("Malloc fails for local work[].");
- soln = floatMalloc(n);
- if ( !soln ) ABORT("Malloc fails for local soln[].");
-
- Bmat = Bstore->nzval;
- Lstore = L->Store;
- Lval = Lstore->nzval;
- Ustore = U->Store;
- Uval = Ustore->nzval;
- solve_ops = 0;
-
- if ( trans == NOTRANS ) {
- /* Permute right hand sides to form Pr*B */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- /* Forward solve PLy=Pb. */
- for (k = 0; k <= Lstore->nsuper; k++) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- nrow = nsupr - nsupc;
-
- solve_ops += nsupc * (nsupc - 1) * nrhs;
- solve_ops += 2 * nrow * nsupc * nrhs;
-
- if ( nsupc == 1 ) {
- for (j = 0; j < nrhs; j++) {
- rhs_work = &Bmat[j*ldb];
- luptr = L_NZ_START(fsupc);
- for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
- irow = L_SUB(iptr);
- ++luptr;
- rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
- }
- }
- } else {
- luptr = L_NZ_START(fsupc);
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- ftcs1 = _cptofcd("L", strlen("L"));
- ftcs2 = _cptofcd("N", strlen("N"));
- ftcs3 = _cptofcd("U", strlen("U"));
- STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-
- SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha,
- &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
- &beta, &work[0], &n );
-#else
- strsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-
- sgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha,
- &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
- &beta, &work[0], &n );
-#endif
- for (j = 0; j < nrhs; j++) {
- rhs_work = &Bmat[j*ldb];
- work_col = &work[j*n];
- iptr = istart + nsupc;
- for (i = 0; i < nrow; i++) {
- irow = L_SUB(iptr);
- rhs_work[irow] -= work_col[i]; /* Scatter */
- work_col[i] = 0.0;
- iptr++;
- }
- }
-#else
- for (j = 0; j < nrhs; j++) {
- rhs_work = &Bmat[j*ldb];
- slsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
- smatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
- &rhs_work[fsupc], &work[0] );
-
- iptr = istart + nsupc;
- for (i = 0; i < nrow; i++) {
- irow = L_SUB(iptr);
- rhs_work[irow] -= work[i];
- work[i] = 0.0;
- iptr++;
- }
- }
-#endif
- } /* else ... */
- } /* for L-solve */
-
-#ifdef DEBUG
- printf("After L-solve: y=\n");
- sprint_soln(n, nrhs, Bmat);
-#endif
-
- /*
- * Back solve Ux=y.
- */
- for (k = Lstore->nsuper; k >= 0; k--) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- solve_ops += nsupc * (nsupc + 1) * nrhs;
-
- if ( nsupc == 1 ) {
- rhs_work = &Bmat[0];
- for (j = 0; j < nrhs; j++) {
- rhs_work[fsupc] /= Lval[luptr];
- rhs_work += ldb;
- }
- } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- ftcs1 = _cptofcd("L", strlen("L"));
- ftcs2 = _cptofcd("U", strlen("U"));
- ftcs3 = _cptofcd("N", strlen("N"));
- STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#else
- strsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#endif
-#else
- for (j = 0; j < nrhs; j++)
- susolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-#endif
- }
-
- for (j = 0; j < nrhs; ++j) {
- rhs_work = &Bmat[j*ldb];
- for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
- solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
- for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
- irow = U_SUB(i);
- rhs_work[irow] -= rhs_work[jcol] * Uval[i];
- }
- }
- }
-
- } /* for U-solve */
-
-#ifdef DEBUG
- printf("After U-solve: x=\n");
- sprint_soln(n, nrhs, Bmat);
-#endif
-
- /* Compute the final solution X := Pc*X. */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- stat->ops[SOLVE] = solve_ops;
-
- } else { /* Solve A'*X=B */
- /* Permute right hand sides to form Pc'*B. */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- stat->ops[SOLVE] = 0;
-
- for (k = 0; k < nrhs; ++k) {
-
- /* Multiply by inv(U'). */
- sp_strsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-
- /* Multiply by inv(L'). */
- sp_strsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-
- }
-
- /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- }
-
- SUPERLU_FREE(work);
- SUPERLU_FREE(soln);
-}
-
-/*
- * Diagnostic print of the solution vector
- */
-void
-sprint_soln(int n, int nrhs, float *soln)
-{
- int i;
-
- for (i = 0; i < n; i++)
- printf("\t%d: %.4f\n", i, soln[i]);
-}
diff --git a/SRC/slacon.c b/SRC/slacon.c
index 0dafbb2..ccf4d3a 100644
--- a/SRC/slacon.c
+++ b/SRC/slacon.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,7 +7,7 @@
*
*/
#include <math.h>
-#include "Cnames.h"
+#include "slu_Cnames.h"
int
slacon_(int *n, float *v, float *x, int *isgn, float *est, int *kase)
diff --git a/SRC/slamch.c b/SRC/slamch.c
index 4e44ad4..2581c0d 100644
--- a/SRC/slamch.c
+++ b/SRC/slamch.c
@@ -1,4 +1,6 @@
#include <stdio.h>
+#include "slu_Cnames.h"
+
#define TRUE_ (1)
#define FALSE_ (0)
#define min(a,b) ((a) <= (b) ? (a) : (b))
diff --git a/SRC/slangs.c b/SRC/slangs.c
index 63d0d66..a680db4 100644
--- a/SRC/slangs.c
+++ b/SRC/slangs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
* History: Modified from lapack routine SLANGE
*/
#include <math.h>
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
float slangs(char *norm, SuperMatrix *A)
{
diff --git a/SRC/slaqgs.c b/SRC/slaqgs.c
index f5287cb..f65931e 100644
--- a/SRC/slaqgs.c
+++ b/SRC/slaqgs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
* History: Modified from LAPACK routine SLAQGE
*/
#include <math.h>
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
void
slaqgs(SuperMatrix *A, float *r, float *c,
diff --git a/SRC/Cnames.h b/SRC/slu_Cnames.h
similarity index 82%
rename from SRC/Cnames.h
rename to SRC/slu_Cnames.h
index 7a6d2da..8b8df4e 100644
--- a/SRC/Cnames.h
+++ b/SRC/slu_Cnames.h
@@ -67,6 +67,7 @@
* FORTRAN CALL C DECLARATION
* call dgemm(...) void dgemm__(...)
*/
+/* BLAS */
#define sasum_ sasum__
#define isamax_ isamax__
#define scopy_ scopy__
@@ -127,8 +128,26 @@
#define zhemv_ zhemv__
#define zher2_ zher2__
+/* LAPACK */
+#define dlamch_ dlamch__
+#define slamch_ slamch__
+#define xerbla_ xerbla__
+#define lsame_ lsame__
+#define dlacon_ dlacon__
+#define slacon_ slacon__
+#define icmax1_ icmax1__
+#define scsum1_ scsum1__
+#define clacon_ clacon__
+#define dzsum1_ dzsum1__
+#define izmax1_ izmax1__
+#define zlacon_ zlacon__
+
+/* Fortran interface */
#define c_bridge_dgssv_ c_bridge_dgssv__
+#define c_fortran_sgssv_ c_fortran_sgssv__
#define c_fortran_dgssv_ c_fortran_dgssv__
+#define c_fortran_cgssv_ c_fortran_cgssv__
+#define c_fortran_zgssv_ c_fortran_zgssv__
#endif
#if (F77_CALL_C == UPCASE)
@@ -139,6 +158,7 @@
* FORTRAN CALL C DECLARATION
* call dgemm(...) void DGEMM(...)
*/
+/* BLAS */
#define sasum_ SASUM
#define isamax_ ISAMAX
#define scopy_ SCOPY
@@ -199,8 +219,26 @@
#define zhemv_ CHEMV
#define zher2_ CHER2
+/* LAPACK */
+#define dlamch_ DLAMCH
+#define slamch_ SLAMCH
+#define xerbla_ XERBLA
+#define lsame_ LSAME
+#define dlacon_ DLACON
+#define slacon_ SLACON
+#define icmax1_ ICMAX1
+#define scsum1_ SCSUM1
+#define clacon_ CLACON
+#define dzsum1_ DZSUM1
+#define izmax1_ IZMAX1
+#define zlacon_ ZLACON
+
+/* Fortran interface */
#define c_bridge_dgssv_ C_BRIDGE_DGSSV
+#define c_fortran_sgssv_ C_FORTRAN_SGSSV
#define c_fortran_dgssv_ C_FORTRAN_DGSSV
+#define c_fortran_cgssv_ C_FORTRAN_CGSSV
+#define c_fortran_zgssv_ C_FORTRAN_ZGSSV
#endif
#if (F77_CALL_C == NOCHANGE)
@@ -211,6 +249,7 @@
* FORTRAN CALL C DECLARATION
* call dgemm(...) void dgemm(...)
*/
+/* BLAS */
#define sasum_ sasum
#define isamax_ isamax
#define scopy_ scopy
@@ -271,8 +310,26 @@
#define zhemv_ zhemv
#define zher2_ zher2
+/* LAPACK */
+#define dlamch_ dlamch
+#define slamch_ slamch
+#define xerbla_ xerbla
+#define lsame_ lsame
+#define dlacon_ dlacon
+#define slacon_ slacon
+#define icmax1_ icmax1
+#define scsum1_ scsum1
+#define clacon_ clacon
+#define dzsum1_ dzsum1
+#define izmax1_ izmax1
+#define zlacon_ zlacon
+
+/* Fortran interface */
#define c_bridge_dgssv_ c_bridge_dgssv
+#define c_fortran_sgssv_ c_fortran_sgssv
#define c_fortran_dgssv_ c_fortran_dgssv
+#define c_fortran_cgssv_ c_fortran_cgssv
+#define c_fortran_zgssv_ c_fortran_zgssv
#endif
#endif /* __SUPERLU_CNAMES */
diff --git a/SRC/csp_defs.h b/SRC/slu_cdefs.h
similarity index 99%
rename from SRC/csp_defs.h
rename to SRC/slu_cdefs.h
index d8b5def..31bb482 100644
--- a/SRC/csp_defs.h
+++ b/SRC/slu_cdefs.h
@@ -23,10 +23,10 @@
/* Define my integer type int_t */
typedef int int_t; /* default */
-#include "Cnames.h"
+#include "slu_Cnames.h"
#include "supermatrix.h"
-#include "util.h"
-#include "scomplex.h"
+#include "slu_util.h"
+#include "slu_scomplex.h"
/*
diff --git a/SRC/dcomplex.h b/SRC/slu_dcomplex.h
similarity index 100%
rename from SRC/dcomplex.h
rename to SRC/slu_dcomplex.h
diff --git a/SRC/dsp_defs.h b/SRC/slu_ddefs.h
similarity index 99%
rename from SRC/dsp_defs.h
rename to SRC/slu_ddefs.h
index e02bd0f..292622f 100644
--- a/SRC/dsp_defs.h
+++ b/SRC/slu_ddefs.h
@@ -23,9 +23,9 @@
/* Define my integer type int_t */
typedef int int_t; /* default */
-#include "Cnames.h"
+#include "slu_Cnames.h"
#include "supermatrix.h"
-#include "util.h"
+#include "slu_util.h"
/*
diff --git a/SRC/scomplex.h b/SRC/slu_scomplex.h
similarity index 100%
rename from SRC/scomplex.h
rename to SRC/slu_scomplex.h
diff --git a/SRC/ssp_defs.h b/SRC/slu_sdefs.h
similarity index 99%
rename from SRC/ssp_defs.h
rename to SRC/slu_sdefs.h
index 34dd266..b9cff8c 100644
--- a/SRC/ssp_defs.h
+++ b/SRC/slu_sdefs.h
@@ -23,9 +23,9 @@
/* Define my integer type int_t */
typedef int int_t; /* default */
-#include "Cnames.h"
+#include "slu_Cnames.h"
#include "supermatrix.h"
-#include "util.h"
+#include "slu_util.h"
/*
diff --git a/SRC/util.h b/SRC/slu_util.h
similarity index 97%
rename from SRC/util.h
rename to SRC/slu_util.h
index f16ff89..bf115c6 100644
--- a/SRC/util.h
+++ b/SRC/slu_util.h
@@ -93,7 +93,6 @@ typedef enum {
RCOND, /* estimate reciprocal condition number */
SOLVE, /* forward and back solves */
REFINE, /* perform iterative refinement */
- FLOAT, /* time spent in floating-point operations */
TRSV, /* fraction of FACT spent in xTRSV */
GEMV, /* fraction of FACT spent in xGEMV */
FERR, /* estimate error bounds after iterative refinement */
@@ -126,10 +125,9 @@ typedef unsigned char Logical;
* assuming that a factorization of a matrix with the same
* sparsity pattern and similar numerical values was performed
* prior to this one. Therefore, this factorization will reuse
- * both row and column scaling factors R and C, and the
- * both row and column permutation vectors perm_r and perm_c,
- * distributed data structure set up from the previous symbolic
- * factorization.
+ * both row and column scaling factors R and C, both row and
+ * column permutation vectors perm_r and perm_c, and the
+ * data structure set up from the previous symbolic factorization.
* = FACTORED: On entry, L, U, perm_r and perm_c contain the
* factored form of A. If DiagScale is not NOEQUIL, the matrix
* A has been equilibrated with scaling factors R and C.
diff --git a/SRC/zsp_defs.h b/SRC/slu_zdefs.h
similarity index 99%
rename from SRC/zsp_defs.h
rename to SRC/slu_zdefs.h
index f0450d4..a201ff5 100644
--- a/SRC/zsp_defs.h
+++ b/SRC/slu_zdefs.h
@@ -23,10 +23,10 @@
/* Define my integer type int_t */
typedef int int_t; /* default */
-#include "Cnames.h"
+#include "slu_Cnames.h"
#include "supermatrix.h"
-#include "util.h"
-#include "dcomplex.h"
+#include "slu_util.h"
+#include "slu_dcomplex.h"
/*
diff --git a/SRC/smemory.c b/SRC/smemory.c
index 79da748..0278615 100644
--- a/SRC/smemory.c
+++ b/SRC/smemory.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
/* Constants */
#define NO_MEMTYPE 4 /* 0: lusup;
@@ -193,9 +193,10 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
sSetupSpace(work, lwork, &Glu->MemModel);
}
-#ifdef DEBUG
- printf("sLUMemInit() called: annz %d, MemModel %d\n",
- annz, Glu->MemModel);
+#if ( PRNTlevel >= 1 )
+ printf("sLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n",
+ FILL, nzlmax, nzumax);
+ fflush(stdout);
#endif
/* Integer pointers for L\U factors */
@@ -234,6 +235,11 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
printf("Not enough memory to perform factorization.\n");
return (smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
}
+#if ( PRNTlevel >= 1)
+ printf("sLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n",
+ nzlmax, nzumax);
+ fflush(stdout);
+#endif
lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
ucol = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
lsub = (int *) sexpand( &nzlmax, LSUB, 0, 0, Glu );
@@ -476,8 +482,7 @@ void
else lword = sizeof(float);
if ( Glu->MemModel == SYSTEM ) {
- new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
-/* new_mem = (void *) calloc(new_len, lword); */
+ new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
if ( no_expand != 0 ) {
tries = 0;
if ( keep_prev ) {
@@ -487,8 +492,7 @@ void
if ( ++tries > 10 ) return (NULL);
alpha = Reduce(alpha);
new_len = alpha * *prev_len;
- new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
-/* new_mem = (void *) calloc(new_len, lword); */
+ new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
}
}
if ( type == LSUB || type == USUB ) {
@@ -641,7 +645,7 @@ sallocateA(int n, int nnz, float **a, int **asub, int **xa)
float *floatMalloc(int n)
{
float *buf;
- buf = (float *) SUPERLU_MALLOC(n * sizeof(float));
+ buf = (float *) SUPERLU_MALLOC((size_t)n * sizeof(float));
if ( !buf ) {
ABORT("SUPERLU_MALLOC failed for buf in floatMalloc()\n");
}
@@ -653,7 +657,7 @@ float *floatCalloc(int n)
float *buf;
register int i;
float zero = 0.0;
- buf = (float *) SUPERLU_MALLOC(n * sizeof(float));
+ buf = (float *) SUPERLU_MALLOC((size_t)n * sizeof(float));
if ( !buf ) {
ABORT("SUPERLU_MALLOC failed for buf in floatCalloc()\n");
}
diff --git a/SRC/smyblas2.c b/SRC/smyblas2.c
index 729e17f..00f65c5 100644
--- a/SRC/smyblas2.c
+++ b/SRC/smyblas2.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
diff --git a/SRC/sp_coletree.c b/SRC/sp_coletree.c
index 8f65623..1685661 100644
--- a/SRC/sp_coletree.c
+++ b/SRC/sp_coletree.c
@@ -3,7 +3,7 @@
#include <stdio.h>
#include <stdlib.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
/*
* Implementation of disjoint set union routines.
diff --git a/SRC/sp_ienv.c b/SRC/sp_ienv.c
index 516b7b2..0680e02 100644
--- a/SRC/sp_ienv.c
+++ b/SRC/sp_ienv.c
@@ -2,6 +2,8 @@
* File name: sp_ienv.c
* History: Modified from lapack routine ILAENV
*/
+#include "slu_Cnames.h"
+
int
sp_ienv(int ispec)
{
diff --git a/SRC/sp_preorder.c b/SRC/sp_preorder.c
index 17ad84c..524a8ee 100644
--- a/SRC/sp_preorder.c
+++ b/SRC/sp_preorder.c
@@ -1,4 +1,4 @@
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
void
sp_preorder(superlu_options_t *options, SuperMatrix *A, int *perm_c,
diff --git a/SRC/spanel_bmod.c b/SRC/spanel_bmod.c
index 7cfbc28..e98ac9b 100644
--- a/SRC/spanel_bmod.c
+++ b/SRC/spanel_bmod.c
@@ -21,7 +21,7 @@
#include <stdio.h>
#include <stdlib.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
/*
* Function prototypes
diff --git a/SRC/spanel_dfs.c b/SRC/spanel_dfs.c
index 7f5f3c7..cb4417c 100644
--- a/SRC/spanel_dfs.c
+++ b/SRC/spanel_dfs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
void
spanel_dfs (
diff --git a/SRC/spivotL.c b/SRC/spivotL.c
index 6243065..9c300a4 100644
--- a/SRC/spivotL.c
+++ b/SRC/spivotL.c
@@ -21,7 +21,7 @@
#include <math.h>
#include <stdlib.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
#undef DEBUG
diff --git a/SRC/spivotgrowth.c b/SRC/spivotgrowth.c
index 188ddcc..6aac212 100644
--- a/SRC/spivotgrowth.c
+++ b/SRC/spivotgrowth.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
#include <math.h>
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
float
sPivotGrowth(int ncols, SuperMatrix *A, int *perm_c,
diff --git a/SRC/spruneL.c b/SRC/spruneL.c
index 5970270..6a32424 100644
--- a/SRC/spruneL.c
+++ b/SRC/spruneL.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
void
spruneL(
diff --git a/SRC/sreadhb.c b/SRC/sreadhb.c
index 9f8dd03..e0cf193 100644
--- a/SRC/sreadhb.c
+++ b/SRC/sreadhb.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -9,7 +8,7 @@
*/
#include <stdio.h>
#include <stdlib.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
/* Eat up the rest of the current line */
diff --git a/SRC/ssnode_bmod.c b/SRC/ssnode_bmod.c
index 1b11eda..6ba0f52 100644
--- a/SRC/ssnode_bmod.c
+++ b/SRC/ssnode_bmod.c
@@ -19,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
/*
diff --git a/SRC/ssnode_dfs.c b/SRC/ssnode_dfs.c
index 95a51be..eb14fc0 100644
--- a/SRC/ssnode_dfs.c
+++ b/SRC/ssnode_dfs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
int
ssnode_dfs (
diff --git a/SRC/ssp_blas2.c b/SRC/ssp_blas2.c
index b7917c9..174db34 100644
--- a/SRC/ssp_blas2.c
+++ b/SRC/ssp_blas2.c
@@ -11,7 +11,7 @@
* Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
*/
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
/*
* Function prototypes
diff --git a/SRC/ssp_blas2.c.bak b/SRC/ssp_blas2.c.bak
deleted file mode 100644
index 994de34..0000000
--- a/SRC/ssp_blas2.c.bak
+++ /dev/null
@@ -1,469 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
- * File name: ssp_blas2.c
- * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-
-#include "ssp_defs.h"
-
-/*
- * Function prototypes
- */
-void susolve(int, int, float*, float*);
-void slsolve(int, int, float*, float*);
-void smatvec(int, int, int, float*, float*, float*);
-
-
-int
-sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
- SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * sp_strsv() solves one of the systems of equations
- * A*x = b, or A'*x = b,
- * where b and x are n element vectors and A is a sparse unit , or
- * non-unit, upper or lower triangular matrix.
- * No test for singularity or near-singularity is included in this
- * routine. Such tests must be performed before calling this routine.
- *
- * Parameters
- * ==========
- *
- * uplo - (input) char*
- * On entry, uplo specifies whether the matrix is an upper or
- * lower triangular matrix as follows:
- * uplo = 'U' or 'u' A is an upper triangular matrix.
- * uplo = 'L' or 'l' A is a lower triangular matrix.
- *
- * trans - (input) char*
- * On entry, trans specifies the equations to be solved as
- * follows:
- * trans = 'N' or 'n' A*x = b.
- * trans = 'T' or 't' A'*x = b.
- * trans = 'C' or 'c' A'*x = b.
- *
- * diag - (input) char*
- * On entry, diag specifies whether or not A is unit
- * triangular as follows:
- * diag = 'U' or 'u' A is assumed to be unit triangular.
- * diag = 'N' or 'n' A is not assumed to be unit
- * triangular.
- *
- * L - (input) SuperMatrix*
- * The factor L from the factorization Pr*A*Pc=L*U. Use
- * compressed row subscripts storage for supernodes,
- * i.e., L has types: Stype = SC, Dtype = SLU_S, Mtype = TRLU.
- *
- * U - (input) SuperMatrix*
- * The factor U from the factorization Pr*A*Pc=L*U.
- * U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.
- *
- * x - (input/output) float*
- * Before entry, the incremented array X must contain the n
- * element right-hand side vector b. On exit, X is overwritten
- * with the solution vector x.
- *
- * info - (output) int*
- * If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
- _fcd ftcs1 = _cptofcd("L", strlen("L")),
- ftcs2 = _cptofcd("N", strlen("N")),
- ftcs3 = _cptofcd("U", strlen("U"));
-#endif
- SCformat *Lstore;
- NCformat *Ustore;
- float *Lval, *Uval;
- int incx = 1, incy = 1;
- float alpha = 1.0, beta = 1.0;
- int nrow;
- int fsupc, nsupr, nsupc, luptr, istart, irow;
- int i, k, iptr, jcol;
- float *work;
- flops_t solve_ops;
-
- /* Test the input parameters */
- *info = 0;
- if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
- else if ( !lsame_(trans, "N") && !lsame_(trans, "T") ) *info = -2;
- else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
- else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
- else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
- if ( *info ) {
- i = -(*info);
- xerbla_("sp_strsv", &i);
- return 0;
- }
-
- Lstore = L->Store;
- Lval = Lstore->nzval;
- Ustore = U->Store;
- Uval = Ustore->nzval;
- solve_ops = 0;
-
- if ( !(work = floatCalloc(L->nrow)) )
- ABORT("Malloc fails for work in sp_strsv().");
-
- if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */
-
- if ( lsame_(uplo, "L") ) {
- /* Form x := inv(L)*x */
- if ( L->nrow == 0 ) return 0; /* Quick return */
-
- for (k = 0; k <= Lstore->nsuper; k++) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
- nrow = nsupr - nsupc;
-
- solve_ops += nsupc * (nsupc - 1);
- solve_ops += 2 * nrow * nsupc;
-
- if ( nsupc == 1 ) {
- for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
- irow = L_SUB(iptr);
- ++luptr;
- x[irow] -= x[fsupc] * Lval[luptr];
- }
- } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-
- SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
- &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#else
- strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-
- sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
- &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#endif
-#else
- slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-
- smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
- &x[fsupc], &work[0] );
-#endif
-
- iptr = istart + nsupc;
- for (i = 0; i < nrow; ++i, ++iptr) {
- irow = L_SUB(iptr);
- x[irow] -= work[i]; /* Scatter */
- work[i] = 0.0;
-
- }
- }
- } /* for k ... */
-
- } else {
- /* Form x := inv(U)*x */
-
- if ( U->nrow == 0 ) return 0; /* Quick return */
-
- for (k = Lstore->nsuper; k >= 0; k--) {
- fsupc = L_FST_SUPC(k);
- nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- solve_ops += nsupc * (nsupc + 1);
-
- if ( nsupc == 1 ) {
- x[fsupc] /= Lval[luptr];
- for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
- irow = U_SUB(i);
- x[irow] -= x[fsupc] * Uval[i];
- }
- } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#else
- strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#endif
-#else
- susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-#endif
-
- for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
- solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
- for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1);
- i++) {
- irow = U_SUB(i);
- x[irow] -= x[jcol] * Uval[i];
- }
- }
- }
- } /* for k ... */
-
- }
- } else { /* Form x := inv(A')*x */
-
- if ( lsame_(uplo, "L") ) {
- /* Form x := inv(L')*x */
- if ( L->nrow == 0 ) return 0; /* Quick return */
-
- for (k = Lstore->nsuper; k >= 0; --k) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- solve_ops += 2 * (nsupr - nsupc) * nsupc;
-
- for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
- iptr = istart + nsupc;
- for (i = L_NZ_START(jcol) + nsupc;
- i < L_NZ_START(jcol+1); i++) {
- irow = L_SUB(iptr);
- x[jcol] -= x[irow] * Lval[i];
- iptr++;
- }
- }
-
- if ( nsupc > 1 ) {
- solve_ops += nsupc * (nsupc - 1);
-#ifdef _CRAY
- ftcs1 = _cptofcd("L", strlen("L"));
- ftcs2 = _cptofcd("T", strlen("T"));
- ftcs3 = _cptofcd("U", strlen("U"));
- STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#else
- strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#endif
- }
- }
- } else {
- /* Form x := inv(U')*x */
- if ( U->nrow == 0 ) return 0; /* Quick return */
-
- for (k = 0; k <= Lstore->nsuper; k++) {
- fsupc = L_FST_SUPC(k);
- nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
- solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
- for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
- irow = U_SUB(i);
- x[jcol] -= x[irow] * Uval[i];
- }
- }
-
- solve_ops += nsupc * (nsupc + 1);
-
- if ( nsupc == 1 ) {
- x[fsupc] /= Lval[luptr];
- } else {
-#ifdef _CRAY
- ftcs1 = _cptofcd("U", strlen("U"));
- ftcs2 = _cptofcd("T", strlen("T"));
- ftcs3 = _cptofcd("N", strlen("N"));
- STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#else
- strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#endif
- }
- } /* for k ... */
- }
- }
-
- stat->ops[SOLVE] += solve_ops;
- SUPERLU_FREE(work);
- return 0;
-}
-
-
-
-
-int
-sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x,
- int incx, float beta, float *y, int incy)
-{
-/* Purpose
- =======
-
- sp_sgemv() performs one of the matrix-vector operations
- y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
- where alpha and beta are scalars, x and y are vectors and A is a
- sparse A->nrow by A->ncol matrix.
-
- Parameters
- ==========
-
- TRANS - (input) char*
- On entry, TRANS specifies the operation to be performed as
- follows:
- TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
- TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
- TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
-
- ALPHA - (input) float
- On entry, ALPHA specifies the scalar alpha.
-
- A - (input) SuperMatrix*
- Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
- Currently, the type of A can be:
- Stype = NC or NCP; Dtype = SLU_S; Mtype = GE.
- In the future, more general A can be handled.
-
- X - (input) float*, array of DIMENSION at least
- ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
- and at least
- ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
- Before entry, the incremented array X must contain the
- vector x.
-
- INCX - (input) int
- On entry, INCX specifies the increment for the elements of
- X. INCX must not be zero.
-
- BETA - (input) float
- On entry, BETA specifies the scalar beta. When BETA is
- supplied as zero then Y need not be set on input.
-
- Y - (output) float*, array of DIMENSION at least
- ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
- and at least
- ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
- Before entry with BETA non-zero, the incremented array Y
- must contain the vector y. On exit, Y is overwritten by the
- updated vector y.
-
- INCY - (input) int
- On entry, INCY specifies the increment for the elements of
- Y. INCY must not be zero.
-
- ==== Sparse Level 2 Blas routine.
-*/
-
- /* Local variables */
- NCformat *Astore;
- float *Aval;
- int info;
- float temp;
- int lenx, leny, i, j, irow;
- int iy, jx, jy, kx, ky;
- int notran;
-
- notran = lsame_(trans, "N");
- Astore = A->Store;
- Aval = Astore->nzval;
-
- /* Test the input parameters */
- info = 0;
- if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
- else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
- else if (incx == 0) info = 5;
- else if (incy == 0) info = 8;
- if (info != 0) {
- xerbla_("sp_sgemv ", &info);
- return 0;
- }
-
- /* Quick return if possible. */
- if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
- return 0;
-
- /* Set LENX and LENY, the lengths of the vectors x and y, and set
- up the start points in X and Y. */
- if (lsame_(trans, "N")) {
- lenx = A->ncol;
- leny = A->nrow;
- } else {
- lenx = A->nrow;
- leny = A->ncol;
- }
- if (incx > 0) kx = 0;
- else kx = - (lenx - 1) * incx;
- if (incy > 0) ky = 0;
- else ky = - (leny - 1) * incy;
-
- /* Start the operations. In this version the elements of A are
- accessed sequentially with one pass through A. */
- /* First form y := beta*y. */
- if (beta != 1.) {
- if (incy == 1) {
- if (beta == 0.)
- for (i = 0; i < leny; ++i) y[i] = 0.;
- else
- for (i = 0; i < leny; ++i) y[i] = beta * y[i];
- } else {
- iy = ky;
- if (beta == 0.)
- for (i = 0; i < leny; ++i) {
- y[iy] = 0.;
- iy += incy;
- }
- else
- for (i = 0; i < leny; ++i) {
- y[iy] = beta * y[iy];
- iy += incy;
- }
- }
- }
-
- if (alpha == 0.) return 0;
-
- if ( notran ) {
- /* Form y := alpha*A*x + y. */
- jx = kx;
- if (incy == 1) {
- for (j = 0; j < A->ncol; ++j) {
- if (x[jx] != 0.) {
- temp = alpha * x[jx];
- for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
- irow = Astore->rowind[i];
- y[irow] += temp * Aval[i];
- }
- }
- jx += incx;
- }
- } else {
- ABORT("Not implemented.");
- }
- } else {
- /* Form y := alpha*A'*x + y. */
- jy = ky;
- if (incx == 1) {
- for (j = 0; j < A->ncol; ++j) {
- temp = 0.;
- for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
- irow = Astore->rowind[i];
- temp += Aval[i] * x[irow];
- }
- y[jy] += alpha * temp;
- jy += incy;
- }
- } else {
- ABORT("Not implemented.");
- }
- }
- return 0;
-} /* sp_sgemv */
-
-
-
diff --git a/SRC/ssp_blas3.c b/SRC/ssp_blas3.c
index 9b45292..6a416a5 100644
--- a/SRC/ssp_blas3.c
+++ b/SRC/ssp_blas3.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
* Purpose: Sparse BLAS3, using some dense BLAS3 operations.
*/
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
int
sp_sgemm(char *transa, char *transb, int m, int n, int k,
diff --git a/SRC/sutil.c b/SRC/sutil.c
index 1a66061..a023a3f 100644
--- a/SRC/sutil.c
+++ b/SRC/sutil.c
@@ -20,7 +20,7 @@
*/
#include <math.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
void
sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
@@ -267,16 +267,19 @@ void
sPrint_Dense_Matrix(char *what, SuperMatrix *A)
{
DNformat *Astore;
- register int i;
+ register int i, j, lda = Astore->lda;
float *dp;
printf("\nDense matrix %s:\n", what);
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
Astore = (DNformat *) A->Store;
dp = (float *) Astore->nzval;
- printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
+ printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
printf("\nnzval: ");
- for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i]);
+ for (j = 0; j < A->ncol; ++j) {
+ for (i = 0; i < A->nrow; ++i) printf("%f ", dp[i + j*lda]);
+ printf("\n");
+ }
printf("\n");
fflush(stdout);
}
diff --git a/SRC/util.c b/SRC/util.c
index a95e1ef..bfe5f88 100644
--- a/SRC/util.c
+++ b/SRC/util.c
@@ -19,8 +19,7 @@
*/
#include <math.h>
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
/*
* Global statistics variale
@@ -49,6 +48,24 @@ void set_default_options(superlu_options_t *options)
options->PrintStat = YES;
}
+/*
+ * Print the options setting.
+ */
+void print_options(superlu_options_t *options)
+{
+ printf(".. options:\n");
+ printf("\tFact\t %8d\n", options->Fact);
+ printf("\tEquil\t %8d\n", options->Equil);
+ printf("\tColPerm\t %8d\n", options->ColPerm);
+ printf("\tDiagPivotThresh %8.4f\n", options->DiagPivotThresh);
+ printf("\tTrans\t %8d\n", options->Trans);
+ printf("\tIterRefine\t%4d\n", options->IterRefine);
+ printf("\tSymmetricMode\t%4d\n", options->SymmetricMode);
+ printf("\tPivotGrowth\t%4d\n", options->PivotGrowth);
+ printf("\tConditionNumber\t%4d\n", options->ConditionNumber);
+ printf("..\n");
+}
+
/* Deallocate the structure pointing to the actual storage of the matrix. */
void
Destroy_SuperMatrix_Store(SuperMatrix *A)
diff --git a/SRC/xerbla.c b/SRC/xerbla.c
index c598282..bffd66b 100644
--- a/SRC/xerbla.c
+++ b/SRC/xerbla.c
@@ -1,3 +1,6 @@
+#include <stdio.h>
+#include "slu_Cnames.h"
+
/* Subroutine */ int xerbla_(char *srname, int *info)
{
/* -- LAPACK auxiliary routine (version 2.0) --
diff --git a/SRC/zcolumn_bmod.c b/SRC/zcolumn_bmod.c
index 7f2ef75..2082ad6 100644
--- a/SRC/zcolumn_bmod.c
+++ b/SRC/zcolumn_bmod.c
@@ -21,7 +21,7 @@
#include <stdio.h>
#include <stdlib.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
/*
* Function prototypes
diff --git a/SRC/zcolumn_dfs.c b/SRC/zcolumn_dfs.c
index bfae8a0..92a20e2 100644
--- a/SRC/zcolumn_dfs.c
+++ b/SRC/zcolumn_dfs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
/* What type of supernodes we want */
#define T2_SUPER
diff --git a/SRC/zcopy_to_ucol.c b/SRC/zcopy_to_ucol.c
index 7c8969b..e5731cf 100644
--- a/SRC/zcopy_to_ucol.c
+++ b/SRC/zcopy_to_ucol.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
int
zcopy_to_ucol(
diff --git a/SRC/zgscon.c b/SRC/zgscon.c
index f811069..4e254c8 100644
--- a/SRC/zgscon.c
+++ b/SRC/zgscon.c
@@ -11,7 +11,7 @@
* History: Modified from lapack routines ZGECON.
*/
#include <math.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
void
zgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/zgsequ.c b/SRC/zgsequ.c
index 659ef1a..40e64e9 100644
--- a/SRC/zgsequ.c
+++ b/SRC/zgsequ.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
* History: Modified from LAPACK routine ZGEEQU
*/
#include <math.h>
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
void
zgsequ(SuperMatrix *A, double *r, double *c, double *rowcnd,
diff --git a/SRC/zgsrfs.c b/SRC/zgsrfs.c
index 6c655fd..9cb57fd 100644
--- a/SRC/zgsrfs.c
+++ b/SRC/zgsrfs.c
@@ -11,7 +11,7 @@
* History: Modified from lapack routine ZGERFS
*/
#include <math.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
void
zgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/zgssv.c b/SRC/zgssv.c
index 4494ce7..dbbd870 100644
--- a/SRC/zgssv.c
+++ b/SRC/zgssv.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -7,7 +6,7 @@
* October 15, 2003
*
*/
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
void
zgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
diff --git a/SRC/zgssvx.c b/SRC/zgssvx.c
index 6549da1..65ea538 100644
--- a/SRC/zgssvx.c
+++ b/SRC/zgssvx.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
void
zgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
@@ -455,7 +455,7 @@ printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
Astore->nzval, Astore->colind, Astore->rowptr,
SLU_NC, A->Dtype, A->Mtype);
if ( notran ) { /* Reverse the transpose argument. */
- trant = CONJ;
+ trant = TRANS;
notran = 0;
} else {
trant = NOTRANS;
diff --git a/SRC/zgstrf.c b/SRC/zgstrf.c
index 2a68b45..5cfef9b 100644
--- a/SRC/zgstrf.c
+++ b/SRC/zgstrf.c
@@ -19,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
void
zgstrf (superlu_options_t *options, SuperMatrix *A, double drop_tol,
@@ -182,8 +182,8 @@ zgstrf (superlu_options_t *options, SuperMatrix *A, double drop_tol,
*/
/* Local working arrays */
NCPformat *Astore;
- int *iperm_r; /* inverse of perm_r;
- used when options->Fact == SamePattern_SameRowPerm */
+ int *iperm_r = NULL; /* inverse of perm_r; used when
+ options->Fact == SamePattern_SameRowPerm */
int *iperm_c; /* inverse of perm_c */
int *iwork;
doublecomplex *zwork;
diff --git a/SRC/zgstrs.c b/SRC/zgstrs.c
index 95bcba7..a9a5b65 100644
--- a/SRC/zgstrs.c
+++ b/SRC/zgstrs.c
@@ -19,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
/*
diff --git a/SRC/zgstrs.c.bak b/SRC/zgstrs.c.bak
deleted file mode 100644
index 40dc89c..0000000
--- a/SRC/zgstrs.c.bak
+++ /dev/null
@@ -1,339 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
- Copyright (c) 1994 by Xerox Corporation. All rights reserved.
-
- THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
- EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
-
- Permission is hereby granted to use or copy this program for any
- purpose, provided the above notices are retained on all copies.
- Permission to modify the code and to distribute modified code is
- granted, provided the above notices are retained, and a notice that
- the code was modified is included with the above copyright notice.
-*/
-
-#include "zsp_defs.h"
-
-
-/*
- * Function prototypes
- */
-void zusolve(int, int, doublecomplex*, doublecomplex*);
-void zlsolve(int, int, doublecomplex*, doublecomplex*);
-void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*);
-
-
-void
-zgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
- int *perm_c, int *perm_r, SuperMatrix *B,
- SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * ZGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- * ZGSTRF.
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans (input) trans_t
- * Specifies the form of the system of equations:
- * = NOTRANS: A * X = B (No transpose)
- * = TRANS: A'* X = B (Transpose)
- * = CONJ: A**H * X = B (Conjugate transpose)
- *
- * L (input) SuperMatrix*
- * The factor L from the factorization Pr*A*Pc=L*U as computed by
- * zgstrf(). Use compressed row subscripts storage for supernodes,
- * i.e., L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
- *
- * U (input) SuperMatrix*
- * The factor U from the factorization Pr*A*Pc=L*U as computed by
- * zgstrf(). Use column-wise storage scheme, i.e., U has types:
- * Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_TRU.
- *
- * perm_c (input) int*, dimension (L->ncol)
- * Column permutation vector, which defines the
- * permutation matrix Pc; perm_c[i] = j means column i of A is
- * in position j in A*Pc.
- *
- * perm_r (input) int*, dimension (L->nrow)
- * Row permutation vector, which defines the permutation matrix Pr;
- * perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B (input/output) SuperMatrix*
- * B has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
- * On entry, the right hand side matrix.
- * On exit, the solution matrix if info = 0;
- *
- * stat (output) SuperLUStat_t*
- * Record the statistics on runtime and floating-point operation count.
- * See util.h for the definition of 'SuperLUStat_t'.
- *
- * info (output) int*
- * = 0: successful exit
- * < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
- _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-#endif
- int incx = 1, incy = 1;
-#ifdef USE_VENDOR_BLAS
- doublecomplex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
- doublecomplex *work_col;
-#endif
- doublecomplex temp_comp;
- DNformat *Bstore;
- doublecomplex *Bmat;
- SCformat *Lstore;
- NCformat *Ustore;
- doublecomplex *Lval, *Uval;
- int fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
- int i, j, k, iptr, jcol, n, ldb, nrhs;
- doublecomplex *work, *rhs_work, *soln;
- flops_t solve_ops;
- void zprint_soln();
-
- /* Test input parameters ... */
- *info = 0;
- Bstore = B->Store;
- ldb = Bstore->lda;
- nrhs = B->ncol;
- if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
- else if ( L->nrow != L->ncol || L->nrow < 0 ||
- L->Stype != SLU_SC || L->Dtype != SLU_Z || L->Mtype != SLU_TRLU )
- *info = -2;
- else if ( U->nrow != U->ncol || U->nrow < 0 ||
- U->Stype != SLU_NC || U->Dtype != SLU_Z || U->Mtype != SLU_TRU )
- *info = -3;
- else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
- B->Stype != SLU_DN || B->Dtype != SLU_Z || B->Mtype != SLU_GE )
- *info = -6;
- if ( *info ) {
- i = -(*info);
- xerbla_("zgstrs", &i);
- return;
- }
-
- n = L->nrow;
- work = doublecomplexCalloc(n * nrhs);
- if ( !work ) ABORT("Malloc fails for local work[].");
- soln = doublecomplexMalloc(n);
- if ( !soln ) ABORT("Malloc fails for local soln[].");
-
- Bmat = Bstore->nzval;
- Lstore = L->Store;
- Lval = Lstore->nzval;
- Ustore = U->Store;
- Uval = Ustore->nzval;
- solve_ops = 0;
-
- if ( trans == NOTRANS ) {
- /* Permute right hand sides to form Pr*B */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- /* Forward solve PLy=Pb. */
- for (k = 0; k <= Lstore->nsuper; k++) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- nrow = nsupr - nsupc;
-
- solve_ops += 4 * nsupc * (nsupc - 1) * nrhs;
- solve_ops += 8 * nrow * nsupc * nrhs;
-
- if ( nsupc == 1 ) {
- for (j = 0; j < nrhs; j++) {
- rhs_work = &Bmat[j*ldb];
- luptr = L_NZ_START(fsupc);
- for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
- irow = L_SUB(iptr);
- ++luptr;
- zz_mult(&temp_comp, &rhs_work[fsupc], &Lval[luptr]);
- z_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
- }
- }
- } else {
- luptr = L_NZ_START(fsupc);
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- ftcs1 = _cptofcd("L", strlen("L"));
- ftcs2 = _cptofcd("N", strlen("N"));
- ftcs3 = _cptofcd("U", strlen("U"));
- CTRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-
- CGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha,
- &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
- &beta, &work[0], &n );
-#else
- ztrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-
- zgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha,
- &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
- &beta, &work[0], &n );
-#endif
- for (j = 0; j < nrhs; j++) {
- rhs_work = &Bmat[j*ldb];
- work_col = &work[j*n];
- iptr = istart + nsupc;
- for (i = 0; i < nrow; i++) {
- irow = L_SUB(iptr);
- z_sub(&rhs_work[irow], &rhs_work[irow], &work_col[i]);
- work_col[i].r = 0.0;
- work_col[i].i = 0.0;
- iptr++;
- }
- }
-#else
- for (j = 0; j < nrhs; j++) {
- rhs_work = &Bmat[j*ldb];
- zlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
- zmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
- &rhs_work[fsupc], &work[0] );
-
- iptr = istart + nsupc;
- for (i = 0; i < nrow; i++) {
- irow = L_SUB(iptr);
- z_sub(&rhs_work[irow], &rhs_work[irow], &work[i]);
- work[i].r = 0.;
- work[i].i = 0.;
- iptr++;
- }
- }
-#endif
- } /* else ... */
- } /* for L-solve */
-
-#ifdef DEBUG
- printf("After L-solve: y=\n");
- zprint_soln(n, nrhs, Bmat);
-#endif
-
- /*
- * Back solve Ux=y.
- */
- for (k = Lstore->nsuper; k >= 0; k--) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- solve_ops += 4 * nsupc * (nsupc + 1) * nrhs;
-
- if ( nsupc == 1 ) {
- rhs_work = &Bmat[0];
- for (j = 0; j < nrhs; j++) {
- z_div(&rhs_work[fsupc], &rhs_work[fsupc], &Lval[luptr]);
- rhs_work += ldb;
- }
- } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- ftcs1 = _cptofcd("L", strlen("L"));
- ftcs2 = _cptofcd("U", strlen("U"));
- ftcs3 = _cptofcd("N", strlen("N"));
- CTRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#else
- ztrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
- &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#endif
-#else
- for (j = 0; j < nrhs; j++)
- zusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-#endif
- }
-
- for (j = 0; j < nrhs; ++j) {
- rhs_work = &Bmat[j*ldb];
- for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
- solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
- for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
- irow = U_SUB(i);
- zz_mult(&temp_comp, &rhs_work[jcol], &Uval[i]);
- z_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
- }
- }
- }
-
- } /* for U-solve */
-
-#ifdef DEBUG
- printf("After U-solve: x=\n");
- zprint_soln(n, nrhs, Bmat);
-#endif
-
- /* Compute the final solution X := Pc*X. */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- stat->ops[SOLVE] = solve_ops;
-
- } else { /* Solve A'*X=B */
- /* Permute right hand sides to form Pc'*B. */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- stat->ops[SOLVE] = 0;
-
- for (k = 0; k < nrhs; ++k) {
-
- /* Multiply by inv(U'). */
- sp_ztrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-
- /* Multiply by inv(L'). */
- sp_ztrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-
- }
-
- /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
- for (i = 0; i < nrhs; i++) {
- rhs_work = &Bmat[i*ldb];
- for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
- for (k = 0; k < n; k++) rhs_work[k] = soln[k];
- }
-
- }
-
- SUPERLU_FREE(work);
- SUPERLU_FREE(soln);
-}
-
-/*
- * Diagnostic print of the solution vector
- */
-void
-zprint_soln(int n, int nrhs, doublecomplex *soln)
-{
- int i;
-
- for (i = 0; i < n; i++)
- printf("\t%d: %.4f\n", i, soln[i]);
-}
diff --git a/SRC/zlacon.c b/SRC/zlacon.c
index e240371..19382a2 100644
--- a/SRC/zlacon.c
+++ b/SRC/zlacon.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,8 @@
*
*/
#include <math.h>
-#include "Cnames.h"
-#include "dcomplex.h"
+#include "slu_Cnames.h"
+#include "slu_dcomplex.h"
int
zlacon_(int *n, doublecomplex *v, doublecomplex *x, double *est, int *kase)
diff --git a/SRC/zlangs.c b/SRC/zlangs.c
index e178c6f..ad09d3d 100644
--- a/SRC/zlangs.c
+++ b/SRC/zlangs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
* History: Modified from lapack routine ZLANGE
*/
#include <math.h>
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
double zlangs(char *norm, SuperMatrix *A)
{
diff --git a/SRC/zlaqgs.c b/SRC/zlaqgs.c
index d28393d..5b9d503 100644
--- a/SRC/zlaqgs.c
+++ b/SRC/zlaqgs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
* History: Modified from LAPACK routine ZLAQGE
*/
#include <math.h>
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
void
zlaqgs(SuperMatrix *A, double *r, double *c,
diff --git a/SRC/zmemory.c b/SRC/zmemory.c
index 0c79e9e..02ac640 100644
--- a/SRC/zmemory.c
+++ b/SRC/zmemory.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
/* Constants */
#define NO_MEMTYPE 4 /* 0: lusup;
@@ -193,9 +193,10 @@ zLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
zSetupSpace(work, lwork, &Glu->MemModel);
}
-#ifdef DEBUG
- printf("zLUMemInit() called: annz %d, MemModel %d\n",
- annz, Glu->MemModel);
+#if ( PRNTlevel >= 1 )
+ printf("zLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n",
+ FILL, nzlmax, nzumax);
+ fflush(stdout);
#endif
/* Integer pointers for L\U factors */
@@ -234,6 +235,11 @@ zLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
printf("Not enough memory to perform factorization.\n");
return (zmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
}
+#if ( PRNTlevel >= 1)
+ printf("zLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n",
+ nzlmax, nzumax);
+ fflush(stdout);
+#endif
lusup = (doublecomplex *) zexpand( &nzlumax, LUSUP, 0, 0, Glu );
ucol = (doublecomplex *) zexpand( &nzumax, UCOL, 0, 0, Glu );
lsub = (int *) zexpand( &nzlmax, LSUB, 0, 0, Glu );
@@ -476,8 +482,7 @@ void
else lword = sizeof(doublecomplex);
if ( Glu->MemModel == SYSTEM ) {
- new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
-/* new_mem = (void *) calloc(new_len, lword); */
+ new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
if ( no_expand != 0 ) {
tries = 0;
if ( keep_prev ) {
@@ -487,8 +492,7 @@ void
if ( ++tries > 10 ) return (NULL);
alpha = Reduce(alpha);
new_len = alpha * *prev_len;
- new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
-/* new_mem = (void *) calloc(new_len, lword); */
+ new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
}
}
if ( type == LSUB || type == USUB ) {
@@ -641,7 +645,7 @@ zallocateA(int n, int nnz, doublecomplex **a, int **asub, int **xa)
doublecomplex *doublecomplexMalloc(int n)
{
doublecomplex *buf;
- buf = (doublecomplex *) SUPERLU_MALLOC(n * sizeof(doublecomplex));
+ buf = (doublecomplex *) SUPERLU_MALLOC((size_t)n * sizeof(doublecomplex));
if ( !buf ) {
ABORT("SUPERLU_MALLOC failed for buf in doublecomplexMalloc()\n");
}
@@ -653,7 +657,7 @@ doublecomplex *doublecomplexCalloc(int n)
doublecomplex *buf;
register int i;
doublecomplex zero = {0.0, 0.0};
- buf = (doublecomplex *) SUPERLU_MALLOC(n * sizeof(doublecomplex));
+ buf = (doublecomplex *) SUPERLU_MALLOC((size_t)n * sizeof(doublecomplex));
if ( !buf ) {
ABORT("SUPERLU_MALLOC failed for buf in doublecomplexCalloc()\n");
}
diff --git a/SRC/zmemory.c b/SRC/zmemory.c.bak
similarity index 98%
copy from SRC/zmemory.c
copy to SRC/zmemory.c.bak
index 0c79e9e..874e3df 100644
--- a/SRC/zmemory.c
+++ b/SRC/zmemory.c.bak
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
/* Constants */
#define NO_MEMTYPE 4 /* 0: lusup;
@@ -193,9 +193,10 @@ zLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
zSetupSpace(work, lwork, &Glu->MemModel);
}
-#ifdef DEBUG
- printf("zLUMemInit() called: annz %d, MemModel %d\n",
- annz, Glu->MemModel);
+#if ( PRNTlevel >= 1 )
+ printf("zLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n",
+ FILL, nzlmax, nzumax);
+ fflush(stdout);
#endif
/* Integer pointers for L\U factors */
@@ -234,6 +235,11 @@ zLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
printf("Not enough memory to perform factorization.\n");
return (zmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
}
+#if ( PRNTlevel >= 1)
+ printf("zzLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n",
+ nzlmax, nzumax);
+ fflush(stdout);
+#endif
lusup = (doublecomplex *) zexpand( &nzlumax, LUSUP, 0, 0, Glu );
ucol = (doublecomplex *) zexpand( &nzumax, UCOL, 0, 0, Glu );
lsub = (int *) zexpand( &nzlmax, LSUB, 0, 0, Glu );
diff --git a/SRC/zmyblas2.c b/SRC/zmyblas2.c
index 59450cd..45c67c1 100644
--- a/SRC/zmyblas2.c
+++ b/SRC/zmyblas2.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -14,7 +13,7 @@
* Note:
* This is only used when the system lacks an efficient BLAS library.
*/
-#include "dcomplex.h"
+#include "slu_dcomplex.h"
/*
* Solves a dense UNIT lower triangular system. The unit lower
diff --git a/SRC/zpanel_bmod.c b/SRC/zpanel_bmod.c
index 658c945..f910635 100644
--- a/SRC/zpanel_bmod.c
+++ b/SRC/zpanel_bmod.c
@@ -21,7 +21,7 @@
#include <stdio.h>
#include <stdlib.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
/*
* Function prototypes
diff --git a/SRC/zpanel_dfs.c b/SRC/zpanel_dfs.c
index c9ed6ce..3e535a8 100644
--- a/SRC/zpanel_dfs.c
+++ b/SRC/zpanel_dfs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
void
zpanel_dfs (
diff --git a/SRC/zpivotL.c b/SRC/zpivotL.c
index 26afc4c..20aacda 100644
--- a/SRC/zpivotL.c
+++ b/SRC/zpivotL.c
@@ -21,7 +21,7 @@
#include <math.h>
#include <stdlib.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
#undef DEBUG
diff --git a/SRC/zpivotgrowth.c b/SRC/zpivotgrowth.c
index 59e0c82..b8afeef 100644
--- a/SRC/zpivotgrowth.c
+++ b/SRC/zpivotgrowth.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
#include <math.h>
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
double
zPivotGrowth(int ncols, SuperMatrix *A, int *perm_c,
diff --git a/SRC/zpruneL.c b/SRC/zpruneL.c
index ee24f7f..25d003c 100644
--- a/SRC/zpruneL.c
+++ b/SRC/zpruneL.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
void
zpruneL(
diff --git a/SRC/zreadhb.c b/SRC/zreadhb.c
index c98951a..fcccab6 100644
--- a/SRC/zreadhb.c
+++ b/SRC/zreadhb.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -9,7 +8,7 @@
*/
#include <stdio.h>
#include <stdlib.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
/* Eat up the rest of the current line */
diff --git a/SRC/zsnode_bmod.c b/SRC/zsnode_bmod.c
index 8ec938b..c36d0fa 100644
--- a/SRC/zsnode_bmod.c
+++ b/SRC/zsnode_bmod.c
@@ -19,7 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
/*
diff --git a/SRC/zsnode_dfs.c b/SRC/zsnode_dfs.c
index b1bec95..a6bab8f 100644
--- a/SRC/zsnode_dfs.c
+++ b/SRC/zsnode_dfs.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
the code was modified is included with the above copyright notice.
*/
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
int
zsnode_dfs (
diff --git a/SRC/zsp_blas2.c b/SRC/zsp_blas2.c
index 18470c5..e94c519 100644
--- a/SRC/zsp_blas2.c
+++ b/SRC/zsp_blas2.c
@@ -11,7 +11,7 @@
* Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
*/
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
/*
* Function prototypes
@@ -132,7 +132,8 @@ sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
luptr = L_NZ_START(fsupc);
nrow = nsupr - nsupc;
- solve_ops += 4 * nsupc * (nsupc - 1);
+ /* 1 z_div costs 10 flops */
+ solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc;
solve_ops += 8 * nrow * nsupc;
if ( nsupc == 1 ) {
@@ -185,7 +186,8 @@ sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
nsupc = L_FST_SUPC(k+1) - fsupc;
luptr = L_NZ_START(fsupc);
- solve_ops += 4 * nsupc * (nsupc + 1);
+ /* 1 z_div costs 10 flops */
+ solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
if ( nsupc == 1 ) {
z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
@@ -279,7 +281,8 @@ sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
}
}
- solve_ops += 4 * nsupc * (nsupc + 1);
+ /* 1 z_div costs 10 flops */
+ solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
if ( nsupc == 1 ) {
z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
@@ -358,7 +361,8 @@ sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
}
}
- solve_ops += 4 * nsupc * (nsupc + 1);
+ /* 1 z_div costs 10 flops */
+ solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
if ( nsupc == 1 ) {
zz_conj(&temp, &Lval[luptr]);
diff --git a/SRC/zsp_blas2.c.bak b/SRC/zsp_blas2.c.bak
deleted file mode 100644
index 5ab0334..0000000
--- a/SRC/zsp_blas2.c.bak
+++ /dev/null
@@ -1,479 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
- * File name: zsp_blas2.c
- * Purpose: Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-
-#include "zsp_defs.h"
-
-/*
- * Function prototypes
- */
-void zusolve(int, int, doublecomplex*, doublecomplex*);
-void zlsolve(int, int, doublecomplex*, doublecomplex*);
-void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*);
-
-
-int
-sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
- SuperMatrix *U, doublecomplex *x, SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * sp_ztrsv() solves one of the systems of equations
- * A*x = b, or A'*x = b,
- * where b and x are n element vectors and A is a sparse unit , or
- * non-unit, upper or lower triangular matrix.
- * No test for singularity or near-singularity is included in this
- * routine. Such tests must be performed before calling this routine.
- *
- * Parameters
- * ==========
- *
- * uplo - (input) char*
- * On entry, uplo specifies whether the matrix is an upper or
- * lower triangular matrix as follows:
- * uplo = 'U' or 'u' A is an upper triangular matrix.
- * uplo = 'L' or 'l' A is a lower triangular matrix.
- *
- * trans - (input) char*
- * On entry, trans specifies the equations to be solved as
- * follows:
- * trans = 'N' or 'n' A*x = b.
- * trans = 'T' or 't' A'*x = b.
- * trans = 'C' or 'c' A'*x = b.
- *
- * diag - (input) char*
- * On entry, diag specifies whether or not A is unit
- * triangular as follows:
- * diag = 'U' or 'u' A is assumed to be unit triangular.
- * diag = 'N' or 'n' A is not assumed to be unit
- * triangular.
- *
- * L - (input) SuperMatrix*
- * The factor L from the factorization Pr*A*Pc=L*U. Use
- * compressed row subscripts storage for supernodes,
- * i.e., L has types: Stype = SC, Dtype = SLU_Z, Mtype = TRLU.
- *
- * U - (input) SuperMatrix*
- * The factor U from the factorization Pr*A*Pc=L*U.
- * U has types: Stype = NC, Dtype = SLU_Z, Mtype = TRU.
- *
- * x - (input/output) doublecomplex*
- * Before entry, the incremented array X must contain the n
- * element right-hand side vector b. On exit, X is overwritten
- * with the solution vector x.
- *
- * info - (output) int*
- * If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
- _fcd ftcs1 = _cptofcd("L", strlen("L")),
- ftcs2 = _cptofcd("N", strlen("N")),
- ftcs3 = _cptofcd("U", strlen("U"));
-#endif
- SCformat *Lstore;
- NCformat *Ustore;
- doublecomplex *Lval, *Uval;
- int incx = 1, incy = 1;
- doublecomplex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
- doublecomplex comp_zero = {0.0, 0.0};
- int nrow;
- int fsupc, nsupr, nsupc, luptr, istart, irow;
- int i, k, iptr, jcol;
- doublecomplex *work;
- flops_t solve_ops;
-
- /* Test the input parameters */
- *info = 0;
- if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
- else if ( !lsame_(trans, "N") && !lsame_(trans, "T") ) *info = -2;
- else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
- else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
- else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
- if ( *info ) {
- i = -(*info);
- xerbla_("sp_ztrsv", &i);
- return 0;
- }
-
- Lstore = L->Store;
- Lval = Lstore->nzval;
- Ustore = U->Store;
- Uval = Ustore->nzval;
- solve_ops = 0;
-
- if ( !(work = doublecomplexCalloc(L->nrow)) )
- ABORT("Malloc fails for work in sp_ztrsv().");
-
- if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */
-
- if ( lsame_(uplo, "L") ) {
- /* Form x := inv(L)*x */
- if ( L->nrow == 0 ) return 0; /* Quick return */
-
- for (k = 0; k <= Lstore->nsuper; k++) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
- nrow = nsupr - nsupc;
-
- solve_ops += 4 * nsupc * (nsupc - 1);
- solve_ops += 8 * nrow * nsupc;
-
- if ( nsupc == 1 ) {
- for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
- irow = L_SUB(iptr);
- ++luptr;
- zz_mult(&comp_zero, &x[fsupc], &Lval[luptr]);
- z_sub(&x[irow], &x[irow], &comp_zero);
- }
- } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-
- CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
- &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#else
- ztrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-
- zgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
- &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#endif
-#else
- zlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-
- zmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
- &x[fsupc], &work[0] );
-#endif
-
- iptr = istart + nsupc;
- for (i = 0; i < nrow; ++i, ++iptr) {
- irow = L_SUB(iptr);
- z_sub(&x[irow], &x[irow], &work[i]); /* Scatter */
- work[i] = comp_zero;
-
- }
- }
- } /* for k ... */
-
- } else {
- /* Form x := inv(U)*x */
-
- if ( U->nrow == 0 ) return 0; /* Quick return */
-
- for (k = Lstore->nsuper; k >= 0; k--) {
- fsupc = L_FST_SUPC(k);
- nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- solve_ops += 4 * nsupc * (nsupc + 1);
-
- if ( nsupc == 1 ) {
- z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
- for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
- irow = U_SUB(i);
- zz_mult(&comp_zero, &x[fsupc], &Uval[i]);
- z_sub(&x[irow], &x[irow], &comp_zero);
- }
- } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
- CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#else
- ztrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#endif
-#else
- zusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-#endif
-
- for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
- solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
- for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1);
- i++) {
- irow = U_SUB(i);
- zz_mult(&comp_zero, &x[jcol], &Uval[i]);
- z_sub(&x[irow], &x[irow], &comp_zero);
- }
- }
- }
- } /* for k ... */
-
- }
- } else { /* Form x := inv(A')*x */
-
- if ( lsame_(uplo, "L") ) {
- /* Form x := inv(L')*x */
- if ( L->nrow == 0 ) return 0; /* Quick return */
-
- for (k = Lstore->nsuper; k >= 0; --k) {
- fsupc = L_FST_SUPC(k);
- istart = L_SUB_START(fsupc);
- nsupr = L_SUB_START(fsupc+1) - istart;
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- solve_ops += 8 * (nsupr - nsupc) * nsupc;
-
- for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
- iptr = istart + nsupc;
- for (i = L_NZ_START(jcol) + nsupc;
- i < L_NZ_START(jcol+1); i++) {
- irow = L_SUB(iptr);
- zz_mult(&comp_zero, &x[irow], &Lval[i]);
- z_sub(&x[jcol], &x[jcol], &comp_zero);
- iptr++;
- }
- }
-
- if ( nsupc > 1 ) {
- solve_ops += 4 * nsupc * (nsupc - 1);
-#ifdef _CRAY
- ftcs1 = _cptofcd("L", strlen("L"));
- ftcs2 = _cptofcd("T", strlen("T"));
- ftcs3 = _cptofcd("U", strlen("U"));
- CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#else
- ztrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#endif
- }
- }
- } else {
- /* Form x := inv(U')*x */
- if ( U->nrow == 0 ) return 0; /* Quick return */
-
- for (k = 0; k <= Lstore->nsuper; k++) {
- fsupc = L_FST_SUPC(k);
- nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
- nsupc = L_FST_SUPC(k+1) - fsupc;
- luptr = L_NZ_START(fsupc);
-
- for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
- solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
- for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
- irow = U_SUB(i);
- zz_mult(&comp_zero, &x[irow], &Uval[i]);
- z_sub(&x[jcol], &x[jcol], &comp_zero);
- }
- }
-
- solve_ops += 4 * nsupc * (nsupc + 1);
-
- if ( nsupc == 1 ) {
- z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
- } else {
-#ifdef _CRAY
- ftcs1 = _cptofcd("U", strlen("U"));
- ftcs2 = _cptofcd("T", strlen("T"));
- ftcs3 = _cptofcd("N", strlen("N"));
- CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#else
- ztrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
- &x[fsupc], &incx);
-#endif
- }
- } /* for k ... */
- }
- }
-
- stat->ops[SOLVE] += solve_ops;
- SUPERLU_FREE(work);
- return 0;
-}
-
-
-
-int
-sp_zgemv(char *trans, doublecomplex alpha, SuperMatrix *A, doublecomplex *x,
- int incx, doublecomplex beta, doublecomplex *y, int incy)
-{
-/* Purpose
- =======
-
- sp_zgemv() performs one of the matrix-vector operations
- y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y,
- where alpha and beta are scalars, x and y are vectors and A is a
- sparse A->nrow by A->ncol matrix.
-
- Parameters
- ==========
-
- TRANS - (input) char*
- On entry, TRANS specifies the operation to be performed as
- follows:
- TRANS = 'N' or 'n' y := alpha*A*x + beta*y.
- TRANS = 'T' or 't' y := alpha*A'*x + beta*y.
- TRANS = 'C' or 'c' y := alpha*A'*x + beta*y.
-
- ALPHA - (input) doublecomplex
- On entry, ALPHA specifies the scalar alpha.
-
- A - (input) SuperMatrix*
- Before entry, the leading m by n part of the array A must
- contain the matrix of coefficients.
-
- X - (input) doublecomplex*, array of DIMENSION at least
- ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
- and at least
- ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
- Before entry, the incremented array X must contain the
- vector x.
-
- INCX - (input) int
- On entry, INCX specifies the increment for the elements of
- X. INCX must not be zero.
-
- BETA - (input) doublecomplex
- On entry, BETA specifies the scalar beta. When BETA is
- supplied as zero then Y need not be set on input.
-
- Y - (output) doublecomplex*, array of DIMENSION at least
- ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
- and at least
- ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
- Before entry with BETA non-zero, the incremented array Y
- must contain the vector y. On exit, Y is overwritten by the
- updated vector y.
-
- INCY - (input) int
- On entry, INCY specifies the increment for the elements of
- Y. INCY must not be zero.
-
- ==== Sparse Level 2 Blas routine.
-*/
-
- /* Local variables */
- NCformat *Astore;
- doublecomplex *Aval;
- int info;
- doublecomplex temp, temp1;
- int lenx, leny, i, j, irow;
- int iy, jx, jy, kx, ky;
- int notran;
- doublecomplex comp_zero = {0.0, 0.0};
- doublecomplex comp_one = {1.0, 0.0};
-
- notran = lsame_(trans, "N");
- Astore = A->Store;
- Aval = Astore->nzval;
-
- /* Test the input parameters */
- info = 0;
- if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
- else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
- else if (incx == 0) info = 5;
- else if (incy == 0) info = 8;
- if (info != 0) {
- xerbla_("sp_zgemv ", &info);
- return 0;
- }
-
- /* Quick return if possible. */
- if (A->nrow == 0 || A->ncol == 0 ||
- z_eq(&alpha, &comp_zero) &&
- z_eq(&beta, &comp_one))
- return 0;
-
-
- /* Set LENX and LENY, the lengths of the vectors x and y, and set
- up the start points in X and Y. */
- if (lsame_(trans, "N")) {
- lenx = A->ncol;
- leny = A->nrow;
- } else {
- lenx = A->nrow;
- leny = A->ncol;
- }
- if (incx > 0) kx = 0;
- else kx = - (lenx - 1) * incx;
- if (incy > 0) ky = 0;
- else ky = - (leny - 1) * incy;
-
- /* Start the operations. In this version the elements of A are
- accessed sequentially with one pass through A. */
- /* First form y := beta*y. */
- if ( !z_eq(&beta, &comp_one) ) {
- if (incy == 1) {
- if ( z_eq(&beta, &comp_zero) )
- for (i = 0; i < leny; ++i) y[i] = comp_zero;
- else
- for (i = 0; i < leny; ++i)
- zz_mult(&y[i], &beta, &y[i]);
- } else {
- iy = ky;
- if ( z_eq(&beta, &comp_zero) )
- for (i = 0; i < leny; ++i) {
- y[iy] = comp_zero;
- iy += incy;
- }
- else
- for (i = 0; i < leny; ++i) {
- zz_mult(&y[iy], &beta, &y[iy]);
- iy += incy;
- }
- }
- }
-
- if ( z_eq(&alpha, &comp_zero) ) return 0;
-
- if ( notran ) {
- /* Form y := alpha*A*x + y. */
- jx = kx;
- if (incy == 1) {
- for (j = 0; j < A->ncol; ++j) {
- if ( !z_eq(&x[jx], &comp_zero) ) {
- zz_mult(&temp, &alpha, &x[jx]);
- for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
- irow = Astore->rowind[i];
- zz_mult(&temp1, &temp, &Aval[i]);
- z_add(&y[irow], &y[irow], &temp1);
- }
- }
- jx += incx;
- }
- } else {
- ABORT("Not implemented.");
- }
- } else {
- /* Form y := alpha*A'*x + y. */
- jy = ky;
- if (incx == 1) {
- for (j = 0; j < A->ncol; ++j) {
- temp = comp_zero;
- for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
- irow = Astore->rowind[i];
- zz_mult(&temp1, &Aval[i], &x[irow]);
- z_add(&temp, &temp, &temp1);
- }
- zz_mult(&temp1, &alpha, &temp);
- z_add(&y[jy], &y[jy], &temp1);
- jy += incy;
- }
- } else {
- ABORT("Not implemented.");
- }
- }
- return 0;
-} /* sp_zgemv */
-
diff --git a/SRC/zsp_blas3.c b/SRC/zsp_blas3.c
index 9825161..5dddf5a 100644
--- a/SRC/zsp_blas3.c
+++ b/SRC/zsp_blas3.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
* Purpose: Sparse BLAS3, using some dense BLAS3 operations.
*/
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
int
sp_zgemm(char *transa, char *transb, int m, int n, int k,
diff --git a/SRC/zutil.c b/SRC/zutil.c
index d7c76b4..8e9dcba 100644
--- a/SRC/zutil.c
+++ b/SRC/zutil.c
@@ -20,7 +20,7 @@
*/
#include <math.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
void
zCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
@@ -240,7 +240,8 @@ zPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
for (j = c; j < c + nsup; ++j) {
d = Astore->nzval_colptr[j];
for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
- printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d++], dp[d++]);
+ printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d], dp[d+1]);
+ d += 2;
}
}
}
@@ -267,16 +268,19 @@ void
zPrint_Dense_Matrix(char *what, SuperMatrix *A)
{
DNformat *Astore;
- register int i;
+ register int i, j, lda = Astore->lda;
double *dp;
printf("\nDense matrix %s:\n", what);
printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
Astore = (DNformat *) A->Store;
dp = (double *) Astore->nzval;
- printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
+ printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
printf("\nnzval: ");
- for (i = 0; i < 2*A->nrow; ++i) printf("%f ", dp[i]);
+ for (j = 0; j < A->ncol; ++j) {
+ for (i = 0; i < 2*A->nrow; ++i) printf("%f ", dp[i + j*2*lda]);
+ printf("\n");
+ }
printf("\n");
fflush(stdout);
}
diff --git a/TESTING/MATGEN/Cnames.h b/TESTING/MATGEN/Cnames.h
deleted file mode 120000
index 0398527..0000000
--- a/TESTING/MATGEN/Cnames.h
+++ /dev/null
@@ -1 +0,0 @@
-../../SRC/Cnames.h
\ No newline at end of file
diff --git a/TESTING/MATGEN/clatb4.c b/TESTING/MATGEN/clatb4.c
index ad2c3dd..2a5fba0 100644
--- a/TESTING/MATGEN/clatb4.c
+++ b/TESTING/MATGEN/clatb4.c
@@ -3,6 +3,7 @@
-lf2c -lm (in that order)
*/
+#include <string.h>
#include "f2c.h"
/* Table of constant values */
diff --git a/TESTING/MATGEN/dlatb4.c b/TESTING/MATGEN/dlatb4.c
index c26760a..a1f4399 100644
--- a/TESTING/MATGEN/dlatb4.c
+++ b/TESTING/MATGEN/dlatb4.c
@@ -3,6 +3,7 @@
-lf2c -lm (in that order)
*/
+#include <string.h>
#include "f2c.h"
/* Table of constant values */
diff --git a/TESTING/MATGEN/f2c.h b/TESTING/MATGEN/f2c.h
index caa33e1..3116864 100644
--- a/TESTING/MATGEN/f2c.h
+++ b/TESTING/MATGEN/f2c.h
@@ -4,7 +4,7 @@
- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
-#include "Cnames.h"
+#include "slu_Cnames.h"
#ifndef F2C_INCLUDE
#define F2C_INCLUDE
diff --git a/TESTING/MATGEN/slatb4.c b/TESTING/MATGEN/slatb4.c
index 33387a5..4f814ef 100644
--- a/TESTING/MATGEN/slatb4.c
+++ b/TESTING/MATGEN/slatb4.c
@@ -3,6 +3,7 @@
-lf2c -lm (in that order)
*/
+#include <string.h>
#include "f2c.h"
/* Table of constant values */
diff --git a/TESTING/MATGEN/slu_Cnames.h b/TESTING/MATGEN/slu_Cnames.h
new file mode 120000
index 0000000..e9e2972
--- /dev/null
+++ b/TESTING/MATGEN/slu_Cnames.h
@@ -0,0 +1 @@
+../../SRC/slu_Cnames.h
\ No newline at end of file
diff --git a/TESTING/MATGEN/zlatb4.c b/TESTING/MATGEN/zlatb4.c
index a16117d..4bfba46 100644
--- a/TESTING/MATGEN/zlatb4.c
+++ b/TESTING/MATGEN/zlatb4.c
@@ -3,6 +3,7 @@
-lf2c -lm (in that order)
*/
+#include <string.h>
#include "f2c.h"
/* Table of constant values */
diff --git a/TESTING/Makefile b/TESTING/Makefile
index e8967e4..226de42 100644
--- a/TESTING/Makefile
+++ b/TESTING/Makefile
@@ -49,7 +49,10 @@ CLINTST = cdrive.o sp_cconvert.o \
ZLINTST = zdrive.o sp_zconvert.o \
sp_zget01.o sp_zget02.o sp_zget04.o sp_zget07.o
-all: single double complex complex16
+all: testmat single double complex complex16
+
+testmat:
+ (cd MATGEN; $(MAKE))
single: ./stest stest.out
diff --git a/TESTING/cdrive.c b/TESTING/cdrive.c
index 2d9502f..7b487af 100644
--- a/TESTING/cdrive.c
+++ b/TESTING/cdrive.c
@@ -11,7 +11,7 @@
* Purpose: MAIN test program
*/
#include <string.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
#define NTESTS 5 /* Number of test types */
#define NTYPES 11 /* Number of matrix types */
@@ -21,6 +21,10 @@
#define FMT2 "%10s:fact=%4d, trans=%4d, equed=%c, n=%d, imat=%d, test(%d)=%12.5g\n"
#define FMT3 "%10s:info=%d, izero=%d, n=%d, nrhs=%d, imat=%d, nfail=%d\n"
+static void
+parse_command_line(int argc, char *argv[], char *matrix_type,
+ int *n, int *w, int *relax, int *nrhs, int *maxsuper,
+ int *rowblk, int *colblk, int *lwork, float *u);
main(int argc, char *argv[])
{
@@ -83,7 +87,6 @@ main(int argc, char *argv[])
static trans_t transs[] = {NOTRANS, TRANS, CONJ};
/* Some function prototypes */
- static void parse_command_line();
extern int sp_cget01(int, int, SuperMatrix *, SuperMatrix *,
SuperMatrix *, int *, float *);
extern int sp_cget02(trans_t, int, int, int, SuperMatrix *, complex *,
@@ -500,7 +503,7 @@ main(int argc, char *argv[])
static void
parse_command_line(int argc, char *argv[], char *matrix_type,
int *n, int *w, int *relax, int *nrhs, int *maxsuper,
- int *rowblk, int *colblk, int *lwork, double *u)
+ int *rowblk, int *colblk, int *lwork, float *u)
{
int c;
extern char *optarg;
diff --git a/TESTING/ddrive.c b/TESTING/ddrive.c
index ef24d43..afa058e 100644
--- a/TESTING/ddrive.c
+++ b/TESTING/ddrive.c
@@ -11,7 +11,7 @@
* Purpose: MAIN test program
*/
#include <string.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
#define NTESTS 5 /* Number of test types */
#define NTYPES 11 /* Number of matrix types */
@@ -21,6 +21,10 @@
#define FMT2 "%10s:fact=%4d, trans=%4d, equed=%c, n=%d, imat=%d, test(%d)=%12.5g\n"
#define FMT3 "%10s:info=%d, izero=%d, n=%d, nrhs=%d, imat=%d, nfail=%d\n"
+static void
+parse_command_line(int argc, char *argv[], char *matrix_type,
+ int *n, int *w, int *relax, int *nrhs, int *maxsuper,
+ int *rowblk, int *colblk, int *lwork, double *u);
main(int argc, char *argv[])
{
@@ -83,7 +87,6 @@ main(int argc, char *argv[])
static trans_t transs[] = {NOTRANS, TRANS, CONJ};
/* Some function prototypes */
- static void parse_command_line();
extern int sp_dget01(int, int, SuperMatrix *, SuperMatrix *,
SuperMatrix *, int *, double *);
extern int sp_dget02(trans_t, int, int, int, SuperMatrix *, double *,
diff --git a/TESTING/sdrive.c b/TESTING/sdrive.c
index 74d9fd0..0afb3e2 100644
--- a/TESTING/sdrive.c
+++ b/TESTING/sdrive.c
@@ -11,7 +11,7 @@
* Purpose: MAIN test program
*/
#include <string.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
#define NTESTS 5 /* Number of test types */
#define NTYPES 11 /* Number of matrix types */
@@ -21,6 +21,10 @@
#define FMT2 "%10s:fact=%4d, trans=%4d, equed=%c, n=%d, imat=%d, test(%d)=%12.5g\n"
#define FMT3 "%10s:info=%d, izero=%d, n=%d, nrhs=%d, imat=%d, nfail=%d\n"
+static void
+parse_command_line(int argc, char *argv[], char *matrix_type,
+ int *n, int *w, int *relax, int *nrhs, int *maxsuper,
+ int *rowblk, int *colblk, int *lwork, float *u);
main(int argc, char *argv[])
{
@@ -83,7 +87,6 @@ main(int argc, char *argv[])
static trans_t transs[] = {NOTRANS, TRANS, CONJ};
/* Some function prototypes */
- static void parse_command_line();
extern int sp_sget01(int, int, SuperMatrix *, SuperMatrix *,
SuperMatrix *, int *, float *);
extern int sp_sget02(trans_t, int, int, int, SuperMatrix *, float *,
@@ -500,7 +503,7 @@ main(int argc, char *argv[])
static void
parse_command_line(int argc, char *argv[], char *matrix_type,
int *n, int *w, int *relax, int *nrhs, int *maxsuper,
- int *rowblk, int *colblk, int *lwork, double *u)
+ int *rowblk, int *colblk, int *lwork, float *u)
{
int c;
extern char *optarg;
diff --git a/TESTING/sp_cconvert.c b/TESTING/sp_cconvert.c
index e4aab06..243400d 100644
--- a/TESTING/sp_cconvert.c
+++ b/TESTING/sp_cconvert.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
/*
* Convert a full matrix into a sparse matrix format.
diff --git a/TESTING/sp_cget01.c b/TESTING/sp_cget01.c
index c1bf55a..8d642c2 100644
--- a/TESTING/sp_cget01.c
+++ b/TESTING/sp_cget01.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
#include <math.h>
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
int sp_cget01(int m, int n, SuperMatrix *A, SuperMatrix *L,
SuperMatrix *U, int *perm_r, float *resid)
diff --git a/TESTING/sp_cget02.c b/TESTING/sp_cget02.c
index a3416f5..a45c8e8 100644
--- a/TESTING/sp_cget02.c
+++ b/TESTING/sp_cget02.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "csp_defs.h"
+#include "slu_cdefs.h"
int sp_cget02(trans_t trans, int m, int n, int nrhs, SuperMatrix *A,
complex *x, int ldx, complex *b, int ldb, float *resid)
diff --git a/TESTING/sp_cget04.c b/TESTING/sp_cget04.c
index 655d247..9291c94 100644
--- a/TESTING/sp_cget04.c
+++ b/TESTING/sp_cget04.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
#include <math.h>
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
int sp_cget04(int n, int nrhs, complex *x, int ldx, complex *xact,
int ldxact, float rcond, float *resid)
diff --git a/TESTING/sp_cget07.c b/TESTING/sp_cget07.c
index 9d23660..2e69f03 100644
--- a/TESTING/sp_cget07.c
+++ b/TESTING/sp_cget07.c
@@ -7,7 +7,7 @@
*
*/
#include <math.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
int sp_cget07(trans_t trans, int n, int nrhs, SuperMatrix *A, complex *b,
int ldb, complex *x, int ldx, complex *xact,
diff --git a/TESTING/sp_dconvert.c b/TESTING/sp_dconvert.c
index 7391e69..dec3b32 100644
--- a/TESTING/sp_dconvert.c
+++ b/TESTING/sp_dconvert.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
/*
* Convert a full matrix into a sparse matrix format.
diff --git a/TESTING/sp_dget01.c b/TESTING/sp_dget01.c
index 54a31ed..d6b8594 100644
--- a/TESTING/sp_dget01.c
+++ b/TESTING/sp_dget01.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
#include <math.h>
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
int sp_dget01(int m, int n, SuperMatrix *A, SuperMatrix *L,
SuperMatrix *U, int *perm_r, double *resid)
diff --git a/TESTING/sp_dget02.c b/TESTING/sp_dget02.c
index eab3ec4..9d08dda 100644
--- a/TESTING/sp_dget02.c
+++ b/TESTING/sp_dget02.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
int sp_dget02(trans_t trans, int m, int n, int nrhs, SuperMatrix *A,
double *x, int ldx, double *b, int ldb, double *resid)
diff --git a/TESTING/sp_dget04.c b/TESTING/sp_dget04.c
index dd3d1f4..dc88ed8 100644
--- a/TESTING/sp_dget04.c
+++ b/TESTING/sp_dget04.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
#include <math.h>
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
int sp_dget04(int n, int nrhs, double *x, int ldx, double *xact,
int ldxact, double rcond, double *resid)
diff --git a/TESTING/sp_dget07.c b/TESTING/sp_dget07.c
index ca78c22..2737c22 100644
--- a/TESTING/sp_dget07.c
+++ b/TESTING/sp_dget07.c
@@ -7,7 +7,7 @@
*
*/
#include <math.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
int sp_dget07(trans_t trans, int n, int nrhs, SuperMatrix *A, double *b,
int ldb, double *x, int ldx, double *xact,
diff --git a/TESTING/sp_ienv.c b/TESTING/sp_ienv.c
index a0a8509..480531b 100644
--- a/TESTING/sp_ienv.c
+++ b/TESTING/sp_ienv.c
@@ -2,6 +2,8 @@
* File name: sp_ienv.c
* History: Modified from lapack routine ILAENV
*/
+#include "slu_Cnames.h"
+
int
sp_ienv(int ispec)
{
diff --git a/TESTING/sp_sconvert.c b/TESTING/sp_sconvert.c
index 4c51a07..0b35d78 100644
--- a/TESTING/sp_sconvert.c
+++ b/TESTING/sp_sconvert.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
/*
* Convert a full matrix into a sparse matrix format.
diff --git a/TESTING/sp_sget01.c b/TESTING/sp_sget01.c
index 07c8d2e..5ab57b5 100644
--- a/TESTING/sp_sget01.c
+++ b/TESTING/sp_sget01.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
#include <math.h>
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
int sp_sget01(int m, int n, SuperMatrix *A, SuperMatrix *L,
SuperMatrix *U, int *perm_r, float *resid)
diff --git a/TESTING/sp_sget02.c b/TESTING/sp_sget02.c
index 892f068..90826d1 100644
--- a/TESTING/sp_sget02.c
+++ b/TESTING/sp_sget02.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
int sp_sget02(trans_t trans, int m, int n, int nrhs, SuperMatrix *A,
float *x, int ldx, float *b, int ldb, float *resid)
diff --git a/TESTING/sp_sget04.c b/TESTING/sp_sget04.c
index d8c3c7a..cb4e6dc 100644
--- a/TESTING/sp_sget04.c
+++ b/TESTING/sp_sget04.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
#include <math.h>
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
int sp_sget04(int n, int nrhs, float *x, int ldx, float *xact,
int ldxact, float rcond, float *resid)
diff --git a/TESTING/sp_sget07.c b/TESTING/sp_sget07.c
index aaf9776..f7d86be 100644
--- a/TESTING/sp_sget07.c
+++ b/TESTING/sp_sget07.c
@@ -7,7 +7,7 @@
*
*/
#include <math.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
int sp_sget07(trans_t trans, int n, int nrhs, SuperMatrix *A, float *b,
int ldb, float *x, int ldx, float *xact,
diff --git a/TESTING/sp_zconvert.c b/TESTING/sp_zconvert.c
index 8f4e866..4e1d17e 100644
--- a/TESTING/sp_zconvert.c
+++ b/TESTING/sp_zconvert.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
/*
* Convert a full matrix into a sparse matrix format.
diff --git a/TESTING/sp_zget01.c b/TESTING/sp_zget01.c
index 97c46d4..7ee93e2 100644
--- a/TESTING/sp_zget01.c
+++ b/TESTING/sp_zget01.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
#include <math.h>
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
int sp_zget01(int m, int n, SuperMatrix *A, SuperMatrix *L,
SuperMatrix *U, int *perm_r, double *resid)
diff --git a/TESTING/sp_zget02.c b/TESTING/sp_zget02.c
index 4bc9756..ed24d21 100644
--- a/TESTING/sp_zget02.c
+++ b/TESTING/sp_zget02.c
@@ -6,7 +6,7 @@
* October 15, 2003
*
*/
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
int sp_zget02(trans_t trans, int m, int n, int nrhs, SuperMatrix *A,
doublecomplex *x, int ldx, doublecomplex *b, int ldb, double *resid)
diff --git a/TESTING/sp_zget04.c b/TESTING/sp_zget04.c
index fc5a820..3a2632d 100644
--- a/TESTING/sp_zget04.c
+++ b/TESTING/sp_zget04.c
@@ -1,5 +1,4 @@
-
/*
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
*
*/
#include <math.h>
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
int sp_zget04(int n, int nrhs, doublecomplex *x, int ldx, doublecomplex *xact,
int ldxact, double rcond, double *resid)
diff --git a/TESTING/sp_zget07.c b/TESTING/sp_zget07.c
index c41fdcb..5f9ab12 100644
--- a/TESTING/sp_zget07.c
+++ b/TESTING/sp_zget07.c
@@ -7,7 +7,7 @@
*
*/
#include <math.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
int sp_zget07(trans_t trans, int n, int nrhs, SuperMatrix *A, doublecomplex *b,
int ldb, doublecomplex *x, int ldx, doublecomplex *xact,
diff --git a/TESTING/zdrive.c b/TESTING/zdrive.c
index 166f310..1269fb0 100644
--- a/TESTING/zdrive.c
+++ b/TESTING/zdrive.c
@@ -11,7 +11,7 @@
* Purpose: MAIN test program
*/
#include <string.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
#define NTESTS 5 /* Number of test types */
#define NTYPES 11 /* Number of matrix types */
@@ -21,6 +21,10 @@
#define FMT2 "%10s:fact=%4d, trans=%4d, equed=%c, n=%d, imat=%d, test(%d)=%12.5g\n"
#define FMT3 "%10s:info=%d, izero=%d, n=%d, nrhs=%d, imat=%d, nfail=%d\n"
+static void
+parse_command_line(int argc, char *argv[], char *matrix_type,
+ int *n, int *w, int *relax, int *nrhs, int *maxsuper,
+ int *rowblk, int *colblk, int *lwork, double *u);
main(int argc, char *argv[])
{
@@ -83,7 +87,6 @@ main(int argc, char *argv[])
static trans_t transs[] = {NOTRANS, TRANS, CONJ};
/* Some function prototypes */
- static void parse_command_line();
extern int sp_zget01(int, int, SuperMatrix *, SuperMatrix *,
SuperMatrix *, int *, double *);
extern int sp_zget02(trans_t, int, int, int, SuperMatrix *, doublecomplex *,
diff --git a/make.inc b/make.inc
index 51fda64..58b609a 100644
--- a/make.inc
+++ b/make.inc
@@ -14,16 +14,12 @@
#
############################################################################
#
-# The machine (platform) identifier to append to the library names
-#
-PLAT = _solaris
-
#
# The name of the libraries to be created/linked to
#
-TMGLIB = tmglib$(PLAT).a
-SUPERLULIB = superlu$(PLAT).a
-BLASLIB = ../blas$(PLAT).a
+TMGLIB = libtmglib.a
+SUPERLULIB = libsuperlu_3.0.a
+BLASLIB = ../libblas.a
#
# The archiver and the flag(s) to use when building archive (library)
@@ -47,4 +43,4 @@ CDEFS = -DAdd_
#
# The directory in which Matlab is installed
#
-MATLAB = /usr/sww/matlab
+MATLAB = /usr/sww/pkg/matlab
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/superlu.git
More information about the debian-science-commits
mailing list