[superlu] 05/11: Imported Upstream version 3.0+20070106

Tue May 17 19:22:58 UTC 2016

This is an automated email from the git hooks/post-receive script.

nschloe-guest pushed a commit to branch master
in repository superlu.

commit 5e31076dda557ee3dc0735e83c786670d3087ce4
Author: Nico Schlömer <nico.schloemer at gmail.com>
Date:   Tue May 17 21:22:24 2016 +0200

    Imported Upstream version 3.0+20070106
---
 CBLAS/Cnames.h                                     |    1 -
 CBLAS/Makefile                                     |    2 +-
 CBLAS/cmyblas2.c                                   |  183 --
 CBLAS/dmyblas2.c                                   |  225 ---
 CBLAS/f2c.h                                        |    2 +-
 CBLAS/slu_Cnames.h                                 |    1 +
 CBLAS/smyblas2.c                                   |  225 ---
 CBLAS/zmyblas2.c                                   |  183 --
 EXAMPLE/Makefile                                   |   64 +-
 EXAMPLE/clinsol.c                                  |    2 +-
 EXAMPLE/clinsol1.c                                 |    2 +-
 EXAMPLE/clinsolx.c                                 |    2 +-
 EXAMPLE/clinsolx1.c                                |    2 +-
 EXAMPLE/clinsolx2.c                                |    2 +-
 EXAMPLE/dlinsol.c                                  |    2 +-
 EXAMPLE/dlinsol1.c                                 |    2 +-
 EXAMPLE/dlinsolx.c                                 |    2 +-
 EXAMPLE/dlinsolx1.c                                |    2 +-
 EXAMPLE/dlinsolx2.c                                |    2 +-
 EXAMPLE/dreadtriple.c                              |  118 ++
 EXAMPLE/slinsol.c                                  |    2 +-
 EXAMPLE/slinsol1.c                                 |    2 +-
 EXAMPLE/slinsolx.c                                 |    2 +-
 EXAMPLE/slinsolx1.c                                |    2 +-
 EXAMPLE/slinsolx2.c                                |    2 +-
 EXAMPLE/sp_ienv.c                                  |    2 +
 EXAMPLE/superlu.c                                  |    2 +-
 EXAMPLE/zlinsol.c                                  |    2 +-
 EXAMPLE/zlinsol1.c                                 |    2 +-
 EXAMPLE/zlinsolx.c                                 |    2 +-
 EXAMPLE/zlinsolx1.c                                |    2 +-
 EXAMPLE/zlinsolx2.c                                |    2 +-
 EXAMPLE/zreadtriple.c                              |   90 +
 FORTRAN/Makefile                                   |    3 +-
 FORTRAN/{c_fortran_dgssv.c => c_fortran_cgssv.c}   |   30 +-
 FORTRAN/c_fortran_dgssv.c                          |   17 +-
 .../{c_fortran_dgssv.c => c_fortran_dgssv.c.bak}   |    2 +-
 FORTRAN/{c_fortran_dgssv.c => c_fortran_sgssv.c}   |   30 +-
 FORTRAN/{c_fortran_dgssv.c => c_fortran_zgssv.c}   |   28 +-
 FORTRAN/f77_main.f                                 |    4 +-
 FORTRAN/f77exm.out                                 |    9 +
 INSTALL/Makefile                                   |    6 +-
 MAKE_INC/make.alpha                                |    4 +-
 MAKE_INC/make.cray                                 |    4 +-
 MAKE_INC/make.hppa                                 |    4 +-
 MAKE_INC/make.inc                                  |    6 +-
 MAKE_INC/make.linux                                |    6 +-
 MAKE_INC/make.rs6k                                 |    4 +-
 MAKE_INC/make.sgi                                  |    6 +-
 MAKE_INC/make.solaris                              |    8 +-
 MAKE_INC/make.sp                                   |    4 +-
 MAKE_INC/make.sun4                                 |    8 +-
 MATLAB/mexlusolve.c                                |    2 +-
 MATLAB/mexsuperlu.c                                |    2 +-
 README                                             |    2 +-
 SRC/Makefile                                       |   68 +-
 SRC/ccolumn_bmod.c                                 |    2 +-
 SRC/ccolumn_dfs.c                                  |    3 +-
 SRC/ccopy_to_ucol.c                                |    4 +-
 SRC/cgscon.c                                       |    2 +-
 SRC/cgsequ.c                                       |    4 +-
 SRC/cgsrfs.c                                       |    2 +-
 SRC/cgssv.c                                        |    3 +-
 SRC/cgssvx.c                                       |    4 +-
 SRC/cgstrf.c                                       |    6 +-
 SRC/cgstrs.c                                       |    2 +-
 SRC/cgstrs.c.bak                                   |  339 ----
 SRC/clacon.c                                       |    5 +-
 SRC/clangs.c                                       |    4 +-
 SRC/claqgs.c                                       |    4 +-
 SRC/cmemory.c                                      |   24 +-
 SRC/cmyblas2.c                                     |    3 +-
 SRC/colamd.c                                       | 1961 ++++++++++++++------
 SRC/colamd.h                                       |  215 ++-
 SRC/cpanel_bmod.c                                  |    2 +-
 SRC/cpanel_dfs.c                                   |    4 +-
 SRC/cpivotL.c                                      |    2 +-
 SRC/cpivotgrowth.c                                 |    4 +-
 SRC/cpruneL.c                                      |    4 +-
 SRC/creadhb.c                                      |    3 +-
 SRC/csnode_bmod.c                                  |    2 +-
 SRC/csnode_dfs.c                                   |    4 +-
 SRC/csp_blas2.c                                    |   14 +-
 SRC/csp_blas2.c.bak                                |  479 -----
 SRC/csp_blas3.c                                    |    4 +-
 SRC/cutil.c                                        |   14 +-
 SRC/dcolumn_bmod.c                                 |    2 +-
 SRC/dcolumn_dfs.c                                  |    3 +-
 SRC/dcomplex.c                                     |    7 +-
 SRC/dcopy_to_ucol.c                                |    4 +-
 SRC/dgscon.c                                       |    2 +-
 SRC/dgsequ.c                                       |    4 +-
 SRC/dgsrfs.c                                       |    2 +-
 SRC/dgssv.c                                        |    3 +-
 SRC/dgssvx.c                                       |    2 +-
 SRC/dgstrf.c                                       |    6 +-
 SRC/dgstrs.c                                       |    2 +-
 SRC/dgstrs.c.bak                                   |  334 ----
 SRC/dgstrsL.c                                      |    4 +-
 SRC/dlacon.c                                       |    3 +-
 SRC/dlamch.c                                       |    2 +
 SRC/dlangs.c                                       |    4 +-
 SRC/dlaqgs.c                                       |    3 +-
 SRC/dmemory.c                                      |   24 +-
 SRC/dmyblas2.c                                     |    1 -
 SRC/dpanel_bmod.c                                  |    2 +-
 SRC/dpanel_dfs.c                                   |    4 +-
 SRC/dpivotL.c                                      |    2 +-
 SRC/dpivotgrowth.c                                 |    4 +-
 SRC/dpruneL.c                                      |    4 +-
 SRC/dreadhb.c                                      |    3 +-
 SRC/dsnode_bmod.c                                  |    2 +-
 SRC/dsnode_dfs.c                                   |    4 +-
 SRC/dsp_blas2.c                                    |    2 +-
 SRC/dsp_blas2.c.bak                                |  469 -----
 SRC/dsp_blas3.c                                    |    4 +-
 SRC/dutil.c                                        |   11 +-
 SRC/dzsum1.c                                       |    3 +-
 SRC/get_perm_c.c                                   |   14 +-
 SRC/heap_relax_snode.c                             |    2 +-
 SRC/icmax1.c                                       |    3 +-
 SRC/izmax1.c                                       |   16 +-
 SRC/lsame.c                                        |    2 +
 SRC/memory.c                                       |    5 +-
 SRC/{colamd.c => old_colamd.c}                     |    0
 SRC/{colamd.h => old_colamd.h}                     |    0
 SRC/relax_snode.c                                  |    2 +-
 SRC/scolumn_bmod.c                                 |    2 +-
 SRC/scolumn_dfs.c                                  |    3 +-
 SRC/scomplex.c                                     |    7 +-
 SRC/scopy_to_ucol.c                                |    4 +-
 SRC/scsum1.c                                       |    3 +-
 SRC/sgscon.c                                       |    2 +-
 SRC/sgsequ.c                                       |    4 +-
 SRC/sgsrfs.c                                       |    2 +-
 SRC/sgssv.c                                        |    3 +-
 SRC/sgssvx.c                                       |    2 +-
 SRC/sgstrf.c                                       |    6 +-
 SRC/sgstrs.c                                       |    2 +-
 SRC/sgstrs.c.bak                                   |  334 ----
 SRC/slacon.c                                       |    3 +-
 SRC/slamch.c                                       |    2 +
 SRC/slangs.c                                       |    4 +-
 SRC/slaqgs.c                                       |    4 +-
 SRC/{Cnames.h => slu_Cnames.h}                     |   57 +
 SRC/{csp_defs.h => slu_cdefs.h}                    |    6 +-
 SRC/{dcomplex.h => slu_dcomplex.h}                 |    0
 SRC/{dsp_defs.h => slu_ddefs.h}                    |    4 +-
 SRC/{scomplex.h => slu_scomplex.h}                 |    0
 SRC/{ssp_defs.h => slu_sdefs.h}                    |    4 +-
 SRC/{util.h => slu_util.h}                         |    8 +-
 SRC/{zsp_defs.h => slu_zdefs.h}                    |    6 +-
 SRC/smemory.c                                      |   24 +-
 SRC/smyblas2.c                                     |    1 -
 SRC/sp_coletree.c                                  |    2 +-
 SRC/sp_ienv.c                                      |    2 +
 SRC/sp_preorder.c                                  |    2 +-
 SRC/spanel_bmod.c                                  |    2 +-
 SRC/spanel_dfs.c                                   |    4 +-
 SRC/spivotL.c                                      |    2 +-
 SRC/spivotgrowth.c                                 |    4 +-
 SRC/spruneL.c                                      |    4 +-
 SRC/sreadhb.c                                      |    3 +-
 SRC/ssnode_bmod.c                                  |    2 +-
 SRC/ssnode_dfs.c                                   |    4 +-
 SRC/ssp_blas2.c                                    |    2 +-
 SRC/ssp_blas2.c.bak                                |  469 -----
 SRC/ssp_blas3.c                                    |    4 +-
 SRC/sutil.c                                        |   11 +-
 SRC/util.c                                         |   21 +-
 SRC/xerbla.c                                       |    3 +
 SRC/zcolumn_bmod.c                                 |    2 +-
 SRC/zcolumn_dfs.c                                  |    3 +-
 SRC/zcopy_to_ucol.c                                |    4 +-
 SRC/zgscon.c                                       |    2 +-
 SRC/zgsequ.c                                       |    4 +-
 SRC/zgsrfs.c                                       |    2 +-
 SRC/zgssv.c                                        |    3 +-
 SRC/zgssvx.c                                       |    4 +-
 SRC/zgstrf.c                                       |    6 +-
 SRC/zgstrs.c                                       |    2 +-
 SRC/zgstrs.c.bak                                   |  339 ----
 SRC/zlacon.c                                       |    5 +-
 SRC/zlangs.c                                       |    4 +-
 SRC/zlaqgs.c                                       |    4 +-
 SRC/zmemory.c                                      |   24 +-
 SRC/{zmemory.c => zmemory.c.bak}                   |   14 +-
 SRC/zmyblas2.c                                     |    3 +-
 SRC/zpanel_bmod.c                                  |    2 +-
 SRC/zpanel_dfs.c                                   |    4 +-
 SRC/zpivotL.c                                      |    2 +-
 SRC/zpivotgrowth.c                                 |    4 +-
 SRC/zpruneL.c                                      |    4 +-
 SRC/zreadhb.c                                      |    3 +-
 SRC/zsnode_bmod.c                                  |    2 +-
 SRC/zsnode_dfs.c                                   |    4 +-
 SRC/zsp_blas2.c                                    |   14 +-
 SRC/zsp_blas2.c.bak                                |  479 -----
 SRC/zsp_blas3.c                                    |    4 +-
 SRC/zutil.c                                        |   14 +-
 TESTING/MATGEN/Cnames.h                            |    1 -
 TESTING/MATGEN/clatb4.c                            |    1 +
 TESTING/MATGEN/dlatb4.c                            |    1 +
 TESTING/MATGEN/f2c.h                               |    2 +-
 TESTING/MATGEN/slatb4.c                            |    1 +
 TESTING/MATGEN/slu_Cnames.h                        |    1 +
 TESTING/MATGEN/zlatb4.c                            |    1 +
 TESTING/Makefile                                   |    5 +-
 TESTING/cdrive.c                                   |    9 +-
 TESTING/ddrive.c                                   |    7 +-
 TESTING/sdrive.c                                   |    9 +-
 TESTING/sp_cconvert.c                              |    4 +-
 TESTING/sp_cget01.c                                |    4 +-
 TESTING/sp_cget02.c                                |    2 +-
 TESTING/sp_cget04.c                                |    4 +-
 TESTING/sp_cget07.c                                |    2 +-
 TESTING/sp_dconvert.c                              |    4 +-
 TESTING/sp_dget01.c                                |    4 +-
 TESTING/sp_dget02.c                                |    2 +-
 TESTING/sp_dget04.c                                |    4 +-
 TESTING/sp_dget07.c                                |    2 +-
 TESTING/sp_ienv.c                                  |    2 +
 TESTING/sp_sconvert.c                              |    4 +-
 TESTING/sp_sget01.c                                |    4 +-
 TESTING/sp_sget02.c                                |    2 +-
 TESTING/sp_sget04.c                                |    4 +-
 TESTING/sp_sget07.c                                |    2 +-
 TESTING/sp_zconvert.c                              |    4 +-
 TESTING/sp_zget01.c                                |    4 +-
 TESTING/sp_zget02.c                                |    2 +-
 TESTING/sp_zget04.c                                |    4 +-
 TESTING/sp_zget07.c                                |    2 +-
 TESTING/zdrive.c                                   |    7 +-
 make.inc                                           |   12 +-
 234 files changed, 2408 insertions(+), 5202 deletions(-)

diff --git a/CBLAS/Cnames.h b/CBLAS/Cnames.h
deleted file mode 120000
index dd3a080..0000000
--- a/CBLAS/Cnames.h
+++ /dev/null
@@ -1 +0,0 @@
-../SRC/Cnames.h
\ No newline at end of file
diff --git a/CBLAS/Makefile b/CBLAS/Makefile
index a6fdc77..76d368c 100644
--- a/CBLAS/Makefile
+++ b/CBLAS/Makefile
@@ -83,4 +83,4 @@ complex16: $(ZBLAS1) $(ZBLAS2) $(ZBLAS3)
 	$(CC) $(CFLAGS) $(CDEFS) -I$(HEADER) -c $< $(VERBOSE)
 
 clean:	
-	rm -f *.o ../blas$(PLAT).a
+	rm -f *.o ../libblas.a
diff --git a/CBLAS/cmyblas2.c b/CBLAS/cmyblas2.c
deleted file mode 100644
index 74fdbca..0000000
--- a/CBLAS/cmyblas2.c
+++ /dev/null
@@ -1,183 +0,0 @@
-
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
- * File name:		cmyblas2.c
- * Purpose:
- *     Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- *     This is only used when the system lacks an efficient BLAS library.
- */
-#include "scomplex.h"
-
-/*
- * Solves a dense UNIT lower triangular system. The unit lower 
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
- * The solution will be returned in the rhs vector.
- */
-void clsolve ( int ldm, int ncol, complex *M, complex *rhs )
-{
-    int k;
-    complex x0, x1, x2, x3, temp;
-    complex *M0;
-    complex *Mki0, *Mki1, *Mki2, *Mki3;
-    register int firstcol = 0;
-
-    M0 = &M[0];
-
-
-    while ( firstcol < ncol - 3 ) { /* Do 4 columns */
-      	Mki0 = M0 + 1;
-      	Mki1 = Mki0 + ldm + 1;
-      	Mki2 = Mki1 + ldm + 1;
-      	Mki3 = Mki2 + ldm + 1;
-
-      	x0 = rhs[firstcol];
-      	cc_mult(&temp, &x0, Mki0); Mki0++;
-      	c_sub(&x1, &rhs[firstcol+1], &temp);
-      	cc_mult(&temp, &x0, Mki0); Mki0++;
-	c_sub(&x2, &rhs[firstcol+2], &temp);
-	cc_mult(&temp, &x1, Mki1); Mki1++;
-	c_sub(&x2, &x2, &temp);
-      	cc_mult(&temp, &x0, Mki0); Mki0++;
-	c_sub(&x3, &rhs[firstcol+3], &temp);
-	cc_mult(&temp, &x1, Mki1); Mki1++;
-	c_sub(&x3, &x3, &temp);
-	cc_mult(&temp, &x2, Mki2); Mki2++;
-	c_sub(&x3, &x3, &temp);
-
- 	rhs[++firstcol] = x1;
-      	rhs[++firstcol] = x2;
-      	rhs[++firstcol] = x3;
-      	++firstcol;
-    
-      	for (k = firstcol; k < ncol; k++) {
-	    cc_mult(&temp, &x0, Mki0); Mki0++;
-	    c_sub(&rhs[k], &rhs[k], &temp);
-	    cc_mult(&temp, &x1, Mki1); Mki1++;
-	    c_sub(&rhs[k], &rhs[k], &temp);
-	    cc_mult(&temp, &x2, Mki2); Mki2++;
-	    c_sub(&rhs[k], &rhs[k], &temp);
-	    cc_mult(&temp, &x3, Mki3); Mki3++;
-	    c_sub(&rhs[k], &rhs[k], &temp);
-	}
-
-        M0 += 4 * ldm + 4;
-    }
-
-    if ( firstcol < ncol - 1 ) { /* Do 2 columns */
-        Mki0 = M0 + 1;
-        Mki1 = Mki0 + ldm + 1;
-
-        x0 = rhs[firstcol];
-	cc_mult(&temp, &x0, Mki0); Mki0++;
-	c_sub(&x1, &rhs[firstcol+1], &temp);
-
-      	rhs[++firstcol] = x1;
-      	++firstcol;
-    
-      	for (k = firstcol; k < ncol; k++) {
-	    cc_mult(&temp, &x0, Mki0); Mki0++;
-	    c_sub(&rhs[k], &rhs[k], &temp);
-	    cc_mult(&temp, &x1, Mki1); Mki1++;
-	    c_sub(&rhs[k], &rhs[k], &temp);
-	} 
-    }
-    
-}
-
-/*
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-void
-cusolve ( ldm, ncol, M, rhs )
-int ldm;	/* in */
-int ncol;	/* in */
-complex *M;	/* in */
-complex *rhs;	/* modified */
-{
-    complex xj, temp;
-    int jcol, j, irow;
-
-    jcol = ncol - 1;
-
-    for (j = 0; j < ncol; j++) {
-
-	c_div(&xj, &rhs[jcol], &M[jcol + jcol*ldm]); /* M(jcol, jcol) */
-	rhs[jcol] = xj;
-	
-	for (irow = 0; irow < jcol; irow++) {
-	    cc_mult(&temp, &xj, &M[irow+jcol*ldm]); /* M(irow, jcol) */
-	    c_sub(&rhs[irow], &rhs[irow], &temp);
-	}
-
-	jcol--;
-
-    }
-}
-
-
-/*
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void cmatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-int ldm;	/* in -- leading dimension of M */
-int nrow;	/* in */ 
-int ncol;	/* in */
-complex *M;	/* in */
-complex *vec;	/* in */
-complex *Mxvec;	/* in/out */
-{
-    complex vi0, vi1, vi2, vi3;
-    complex *M0, temp;
-    complex *Mki0, *Mki1, *Mki2, *Mki3;
-    register int firstcol = 0;
-    int k;
-
-    M0 = &M[0];
-
-    while ( firstcol < ncol - 3 ) {	/* Do 4 columns */
-	Mki0 = M0;
-	Mki1 = Mki0 + ldm;
-	Mki2 = Mki1 + ldm;
-	Mki3 = Mki2 + ldm;
-
-	vi0 = vec[firstcol++];
-	vi1 = vec[firstcol++];
-	vi2 = vec[firstcol++];
-	vi3 = vec[firstcol++];	
-	for (k = 0; k < nrow; k++) {
-	    cc_mult(&temp, &vi0, Mki0); Mki0++;
-	    c_add(&Mxvec[k], &Mxvec[k], &temp);
-	    cc_mult(&temp, &vi1, Mki1); Mki1++;
-	    c_add(&Mxvec[k], &Mxvec[k], &temp);
-	    cc_mult(&temp, &vi2, Mki2); Mki2++;
-	    c_add(&Mxvec[k], &Mxvec[k], &temp);
-	    cc_mult(&temp, &vi3, Mki3); Mki3++;
-	    c_add(&Mxvec[k], &Mxvec[k], &temp);
-	}
-
-	M0 += 4 * ldm;
-    }
-
-    while ( firstcol < ncol ) {		/* Do 1 column */
- 	Mki0 = M0;
-	vi0 = vec[firstcol++];
-	for (k = 0; k < nrow; k++) {
-	    cc_mult(&temp, &vi0, Mki0); Mki0++;
-	    c_add(&Mxvec[k], &Mxvec[k], &temp);
-	}
-	M0 += ldm;
-    }
-	
-}
-
diff --git a/CBLAS/dmyblas2.c b/CBLAS/dmyblas2.c
deleted file mode 100644
index e6bbdd1..0000000
--- a/CBLAS/dmyblas2.c
+++ /dev/null
@@ -1,225 +0,0 @@
-
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
- * File name:		dmyblas2.c
- * Purpose:
- *     Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- *     This is only used when the system lacks an efficient BLAS library.
- */
-
-/*
- * Solves a dense UNIT lower triangular system. The unit lower 
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
- * The solution will be returned in the rhs vector.
- */
-void dlsolve ( int ldm, int ncol, double *M, double *rhs )
-{
-    int k;
-    double x0, x1, x2, x3, x4, x5, x6, x7;
-    double *M0;
-    register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
-    register int firstcol = 0;
-
-    M0 = &M[0];
-
-    while ( firstcol < ncol - 7 ) { /* Do 8 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-      Mki2 = Mki1 + ldm + 1;
-      Mki3 = Mki2 + ldm + 1;
-      Mki4 = Mki3 + ldm + 1;
-      Mki5 = Mki4 + ldm + 1;
-      Mki6 = Mki5 + ldm + 1;
-      Mki7 = Mki6 + ldm + 1;
-
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
-      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
-      x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-	                   - x3 * *Mki3++;
-      x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-	                   - x3 * *Mki3++ - x4 * *Mki4++;
-      x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-	                   - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
-      x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-	                   - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
-			   - x6 * *Mki6++;
-
-      rhs[++firstcol] = x1;
-      rhs[++firstcol] = x2;
-      rhs[++firstcol] = x3;
-      rhs[++firstcol] = x4;
-      rhs[++firstcol] = x5;
-      rhs[++firstcol] = x6;
-      rhs[++firstcol] = x7;
-      ++firstcol;
-    
-      for (k = firstcol; k < ncol; k++)
-	rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
-	                - x2 * *Mki2++ - x3 * *Mki3++
-                        - x4 * *Mki4++ - x5 * *Mki5++
-			- x6 * *Mki6++ - x7 * *Mki7++;
- 
-      M0 += 8 * ldm + 8;
-    }
-
-    while ( firstcol < ncol - 3 ) { /* Do 4 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-      Mki2 = Mki1 + ldm + 1;
-      Mki3 = Mki2 + ldm + 1;
-
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
-      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
-
-      rhs[++firstcol] = x1;
-      rhs[++firstcol] = x2;
-      rhs[++firstcol] = x3;
-      ++firstcol;
-    
-      for (k = firstcol; k < ncol; k++)
-	rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
-	                - x2 * *Mki2++ - x3 * *Mki3++;
- 
-      M0 += 4 * ldm + 4;
-    }
-
-    if ( firstcol < ncol - 1 ) { /* Do 2 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-
-      rhs[++firstcol] = x1;
-      ++firstcol;
-    
-      for (k = firstcol; k < ncol; k++)
-	rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
- 
-    }
-    
-}
-
-/*
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-void
-dusolve ( ldm, ncol, M, rhs )
-int ldm;	/* in */
-int ncol;	/* in */
-double *M;	/* in */
-double *rhs;	/* modified */
-{
-    double xj;
-    int jcol, j, irow;
-
-    jcol = ncol - 1;
-
-    for (j = 0; j < ncol; j++) {
-
-	xj = rhs[jcol] / M[jcol + jcol*ldm]; 		/* M(jcol, jcol) */
-	rhs[jcol] = xj;
-	
-	for (irow = 0; irow < jcol; irow++)
-	    rhs[irow] -= xj * M[irow + jcol*ldm];	/* M(irow, jcol) */
-
-	jcol--;
-
-    }
-}
-
-
-/*
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void dmatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-
-int ldm;	/* in -- leading dimension of M */
-int nrow;	/* in */ 
-int ncol;	/* in */
-double *M;	/* in */
-double *vec;	/* in */
-double *Mxvec;	/* in/out */
-
-{
-    double vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
-    double *M0;
-    register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
-    register int firstcol = 0;
-    int k;
-
-    M0 = &M[0];
-    while ( firstcol < ncol - 7 ) {	/* Do 8 columns */
-
-	Mki0 = M0;
-	Mki1 = Mki0 + ldm;
-        Mki2 = Mki1 + ldm;
-        Mki3 = Mki2 + ldm;
-	Mki4 = Mki3 + ldm;
-	Mki5 = Mki4 + ldm;
-	Mki6 = Mki5 + ldm;
-	Mki7 = Mki6 + ldm;
-
-	vi0 = vec[firstcol++];
-	vi1 = vec[firstcol++];
-	vi2 = vec[firstcol++];
-	vi3 = vec[firstcol++];	
-	vi4 = vec[firstcol++];
-	vi5 = vec[firstcol++];
-	vi6 = vec[firstcol++];
-	vi7 = vec[firstcol++];	
-
-	for (k = 0; k < nrow; k++) 
-	    Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
-		      + vi2 * *Mki2++ + vi3 * *Mki3++ 
-		      + vi4 * *Mki4++ + vi5 * *Mki5++
-		      + vi6 * *Mki6++ + vi7 * *Mki7++;
-
-	M0 += 8 * ldm;
-    }
-
-    while ( firstcol < ncol - 3 ) {	/* Do 4 columns */
-
-	Mki0 = M0;
-	Mki1 = Mki0 + ldm;
-	Mki2 = Mki1 + ldm;
-	Mki3 = Mki2 + ldm;
-
-	vi0 = vec[firstcol++];
-	vi1 = vec[firstcol++];
-	vi2 = vec[firstcol++];
-	vi3 = vec[firstcol++];	
-	for (k = 0; k < nrow; k++) 
-	    Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
-		      + vi2 * *Mki2++ + vi3 * *Mki3++ ;
-
-	M0 += 4 * ldm;
-    }
-
-    while ( firstcol < ncol ) {		/* Do 1 column */
-
- 	Mki0 = M0;
-	vi0 = vec[firstcol++];
-	for (k = 0; k < nrow; k++)
-	    Mxvec[k] += vi0 * *Mki0++;
-
-	M0 += ldm;
-    }
-	
-}
-
diff --git a/CBLAS/f2c.h b/CBLAS/f2c.h
index caa33e1..3116864 100644
--- a/CBLAS/f2c.h
+++ b/CBLAS/f2c.h
@@ -4,7 +4,7 @@
 
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 
-#include "Cnames.h"
+#include "slu_Cnames.h"
 
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
diff --git a/CBLAS/slu_Cnames.h b/CBLAS/slu_Cnames.h
new file mode 120000
index 0000000..62b2820
--- /dev/null
+++ b/CBLAS/slu_Cnames.h
@@ -0,0 +1 @@
+../SRC/slu_Cnames.h
\ No newline at end of file
diff --git a/CBLAS/smyblas2.c b/CBLAS/smyblas2.c
deleted file mode 100644
index 729e17f..0000000
--- a/CBLAS/smyblas2.c
+++ /dev/null
@@ -1,225 +0,0 @@
-
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
- * File name:		smyblas2.c
- * Purpose:
- *     Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- *     This is only used when the system lacks an efficient BLAS library.
- */
-
-/*
- * Solves a dense UNIT lower triangular system. The unit lower 
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
- * The solution will be returned in the rhs vector.
- */
-void slsolve ( int ldm, int ncol, float *M, float *rhs )
-{
-    int k;
-    float x0, x1, x2, x3, x4, x5, x6, x7;
-    float *M0;
-    register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
-    register int firstcol = 0;
-
-    M0 = &M[0];
-
-    while ( firstcol < ncol - 7 ) { /* Do 8 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-      Mki2 = Mki1 + ldm + 1;
-      Mki3 = Mki2 + ldm + 1;
-      Mki4 = Mki3 + ldm + 1;
-      Mki5 = Mki4 + ldm + 1;
-      Mki6 = Mki5 + ldm + 1;
-      Mki7 = Mki6 + ldm + 1;
-
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
-      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
-      x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-	                   - x3 * *Mki3++;
-      x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-	                   - x3 * *Mki3++ - x4 * *Mki4++;
-      x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-	                   - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
-      x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-	                   - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
-			   - x6 * *Mki6++;
-
-      rhs[++firstcol] = x1;
-      rhs[++firstcol] = x2;
-      rhs[++firstcol] = x3;
-      rhs[++firstcol] = x4;
-      rhs[++firstcol] = x5;
-      rhs[++firstcol] = x6;
-      rhs[++firstcol] = x7;
-      ++firstcol;
-    
-      for (k = firstcol; k < ncol; k++)
-	rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
-	                - x2 * *Mki2++ - x3 * *Mki3++
-                        - x4 * *Mki4++ - x5 * *Mki5++
-			- x6 * *Mki6++ - x7 * *Mki7++;
- 
-      M0 += 8 * ldm + 8;
-    }
-
-    while ( firstcol < ncol - 3 ) { /* Do 4 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-      Mki2 = Mki1 + ldm + 1;
-      Mki3 = Mki2 + ldm + 1;
-
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
-      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
-
-      rhs[++firstcol] = x1;
-      rhs[++firstcol] = x2;
-      rhs[++firstcol] = x3;
-      ++firstcol;
-    
-      for (k = firstcol; k < ncol; k++)
-	rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
-	                - x2 * *Mki2++ - x3 * *Mki3++;
- 
-      M0 += 4 * ldm + 4;
-    }
-
-    if ( firstcol < ncol - 1 ) { /* Do 2 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-
-      rhs[++firstcol] = x1;
-      ++firstcol;
-    
-      for (k = firstcol; k < ncol; k++)
-	rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
- 
-    }
-    
-}
-
-/*
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-void
-susolve ( ldm, ncol, M, rhs )
-int ldm;	/* in */
-int ncol;	/* in */
-float *M;	/* in */
-float *rhs;	/* modified */
-{
-    float xj;
-    int jcol, j, irow;
-
-    jcol = ncol - 1;
-
-    for (j = 0; j < ncol; j++) {
-
-	xj = rhs[jcol] / M[jcol + jcol*ldm]; 		/* M(jcol, jcol) */
-	rhs[jcol] = xj;
-	
-	for (irow = 0; irow < jcol; irow++)
-	    rhs[irow] -= xj * M[irow + jcol*ldm];	/* M(irow, jcol) */
-
-	jcol--;
-
-    }
-}
-
-
-/*
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void smatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-
-int ldm;	/* in -- leading dimension of M */
-int nrow;	/* in */ 
-int ncol;	/* in */
-float *M;	/* in */
-float *vec;	/* in */
-float *Mxvec;	/* in/out */
-
-{
-    float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
-    float *M0;
-    register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
-    register int firstcol = 0;
-    int k;
-
-    M0 = &M[0];
-    while ( firstcol < ncol - 7 ) {	/* Do 8 columns */
-
-	Mki0 = M0;
-	Mki1 = Mki0 + ldm;
-        Mki2 = Mki1 + ldm;
-        Mki3 = Mki2 + ldm;
-	Mki4 = Mki3 + ldm;
-	Mki5 = Mki4 + ldm;
-	Mki6 = Mki5 + ldm;
-	Mki7 = Mki6 + ldm;
-
-	vi0 = vec[firstcol++];
-	vi1 = vec[firstcol++];
-	vi2 = vec[firstcol++];
-	vi3 = vec[firstcol++];	
-	vi4 = vec[firstcol++];
-	vi5 = vec[firstcol++];
-	vi6 = vec[firstcol++];
-	vi7 = vec[firstcol++];	
-
-	for (k = 0; k < nrow; k++) 
-	    Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
-		      + vi2 * *Mki2++ + vi3 * *Mki3++ 
-		      + vi4 * *Mki4++ + vi5 * *Mki5++
-		      + vi6 * *Mki6++ + vi7 * *Mki7++;
-
-	M0 += 8 * ldm;
-    }
-
-    while ( firstcol < ncol - 3 ) {	/* Do 4 columns */
-
-	Mki0 = M0;
-	Mki1 = Mki0 + ldm;
-	Mki2 = Mki1 + ldm;
-	Mki3 = Mki2 + ldm;
-
-	vi0 = vec[firstcol++];
-	vi1 = vec[firstcol++];
-	vi2 = vec[firstcol++];
-	vi3 = vec[firstcol++];	
-	for (k = 0; k < nrow; k++) 
-	    Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
-		      + vi2 * *Mki2++ + vi3 * *Mki3++ ;
-
-	M0 += 4 * ldm;
-    }
-
-    while ( firstcol < ncol ) {		/* Do 1 column */
-
- 	Mki0 = M0;
-	vi0 = vec[firstcol++];
-	for (k = 0; k < nrow; k++)
-	    Mxvec[k] += vi0 * *Mki0++;
-
-	M0 += ldm;
-    }
-	
-}
-
diff --git a/CBLAS/zmyblas2.c b/CBLAS/zmyblas2.c
deleted file mode 100644
index 59450cd..0000000
--- a/CBLAS/zmyblas2.c
+++ /dev/null
@@ -1,183 +0,0 @@
-
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
- * File name:		zmyblas2.c
- * Purpose:
- *     Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- *     This is only used when the system lacks an efficient BLAS library.
- */
-#include "dcomplex.h"
-
-/*
- * Solves a dense UNIT lower triangular system. The unit lower 
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
- * The solution will be returned in the rhs vector.
- */
-void zlsolve ( int ldm, int ncol, doublecomplex *M, doublecomplex *rhs )
-{
-    int k;
-    doublecomplex x0, x1, x2, x3, temp;
-    doublecomplex *M0;
-    doublecomplex *Mki0, *Mki1, *Mki2, *Mki3;
-    register int firstcol = 0;
-
-    M0 = &M[0];
-
-
-    while ( firstcol < ncol - 3 ) { /* Do 4 columns */
-      	Mki0 = M0 + 1;
-      	Mki1 = Mki0 + ldm + 1;
-      	Mki2 = Mki1 + ldm + 1;
-      	Mki3 = Mki2 + ldm + 1;
-
-      	x0 = rhs[firstcol];
-      	zz_mult(&temp, &x0, Mki0); Mki0++;
-      	z_sub(&x1, &rhs[firstcol+1], &temp);
-      	zz_mult(&temp, &x0, Mki0); Mki0++;
-	z_sub(&x2, &rhs[firstcol+2], &temp);
-	zz_mult(&temp, &x1, Mki1); Mki1++;
-	z_sub(&x2, &x2, &temp);
-      	zz_mult(&temp, &x0, Mki0); Mki0++;
-	z_sub(&x3, &rhs[firstcol+3], &temp);
-	zz_mult(&temp, &x1, Mki1); Mki1++;
-	z_sub(&x3, &x3, &temp);
-	zz_mult(&temp, &x2, Mki2); Mki2++;
-	z_sub(&x3, &x3, &temp);
-
- 	rhs[++firstcol] = x1;
-      	rhs[++firstcol] = x2;
-      	rhs[++firstcol] = x3;
-      	++firstcol;
-    
-      	for (k = firstcol; k < ncol; k++) {
-	    zz_mult(&temp, &x0, Mki0); Mki0++;
-	    z_sub(&rhs[k], &rhs[k], &temp);
-	    zz_mult(&temp, &x1, Mki1); Mki1++;
-	    z_sub(&rhs[k], &rhs[k], &temp);
-	    zz_mult(&temp, &x2, Mki2); Mki2++;
-	    z_sub(&rhs[k], &rhs[k], &temp);
-	    zz_mult(&temp, &x3, Mki3); Mki3++;
-	    z_sub(&rhs[k], &rhs[k], &temp);
-	}
-
-        M0 += 4 * ldm + 4;
-    }
-
-    if ( firstcol < ncol - 1 ) { /* Do 2 columns */
-        Mki0 = M0 + 1;
-        Mki1 = Mki0 + ldm + 1;
-
-        x0 = rhs[firstcol];
-	zz_mult(&temp, &x0, Mki0); Mki0++;
-	z_sub(&x1, &rhs[firstcol+1], &temp);
-
-      	rhs[++firstcol] = x1;
-      	++firstcol;
-    
-      	for (k = firstcol; k < ncol; k++) {
-	    zz_mult(&temp, &x0, Mki0); Mki0++;
-	    z_sub(&rhs[k], &rhs[k], &temp);
-	    zz_mult(&temp, &x1, Mki1); Mki1++;
-	    z_sub(&rhs[k], &rhs[k], &temp);
-	} 
-    }
-    
-}
-
-/*
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-void
-zusolve ( ldm, ncol, M, rhs )
-int ldm;	/* in */
-int ncol;	/* in */
-doublecomplex *M;	/* in */
-doublecomplex *rhs;	/* modified */
-{
-    doublecomplex xj, temp;
-    int jcol, j, irow;
-
-    jcol = ncol - 1;
-
-    for (j = 0; j < ncol; j++) {
-
-	z_div(&xj, &rhs[jcol], &M[jcol + jcol*ldm]); /* M(jcol, jcol) */
-	rhs[jcol] = xj;
-	
-	for (irow = 0; irow < jcol; irow++) {
-	    zz_mult(&temp, &xj, &M[irow+jcol*ldm]); /* M(irow, jcol) */
-	    z_sub(&rhs[irow], &rhs[irow], &temp);
-	}
-
-	jcol--;
-
-    }
-}
-
-
-/*
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void zmatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-int ldm;	/* in -- leading dimension of M */
-int nrow;	/* in */ 
-int ncol;	/* in */
-doublecomplex *M;	/* in */
-doublecomplex *vec;	/* in */
-doublecomplex *Mxvec;	/* in/out */
-{
-    doublecomplex vi0, vi1, vi2, vi3;
-    doublecomplex *M0, temp;
-    doublecomplex *Mki0, *Mki1, *Mki2, *Mki3;
-    register int firstcol = 0;
-    int k;
-
-    M0 = &M[0];
-
-    while ( firstcol < ncol - 3 ) {	/* Do 4 columns */
-	Mki0 = M0;
-	Mki1 = Mki0 + ldm;
-	Mki2 = Mki1 + ldm;
-	Mki3 = Mki2 + ldm;
-
-	vi0 = vec[firstcol++];
-	vi1 = vec[firstcol++];
-	vi2 = vec[firstcol++];
-	vi3 = vec[firstcol++];	
-	for (k = 0; k < nrow; k++) {
-	    zz_mult(&temp, &vi0, Mki0); Mki0++;
-	    z_add(&Mxvec[k], &Mxvec[k], &temp);
-	    zz_mult(&temp, &vi1, Mki1); Mki1++;
-	    z_add(&Mxvec[k], &Mxvec[k], &temp);
-	    zz_mult(&temp, &vi2, Mki2); Mki2++;
-	    z_add(&Mxvec[k], &Mxvec[k], &temp);
-	    zz_mult(&temp, &vi3, Mki3); Mki3++;
-	    z_add(&Mxvec[k], &Mxvec[k], &temp);
-	}
-
-	M0 += 4 * ldm;
-    }
-
-    while ( firstcol < ncol ) {		/* Do 1 column */
- 	Mki0 = M0;
-	vi0 = vec[firstcol++];
-	for (k = 0; k < nrow; k++) {
-	    zz_mult(&temp, &vi0, Mki0); Mki0++;
-	    z_add(&Mxvec[k], &Mxvec[k], &temp);
-	}
-	M0 += ldm;
-    }
-	
-}
-
diff --git a/EXAMPLE/Makefile b/EXAMPLE/Makefile
index f3275cc..241af0d 100644
--- a/EXAMPLE/Makefile
+++ b/EXAMPLE/Makefile
@@ -33,6 +33,7 @@ include ../make.inc
 #######################################################################
 
 HEADER   = ../SRC
+LIBS	= ../$(SUPERLULIB) $(BLASLIB) -lm
 
 SLINEXM		= slinsol.o
 SLINEXM1	= slinsol1.o
@@ -68,88 +69,67 @@ complex:   clinsol clinsol1 clinsolx clinsolx1 clinsolx2
 complex16: zlinsol zlinsol1 zlinsolx zlinsolx1 zlinsolx2
 
 slinsol: $(SLINEXM) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(SLINEXM) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(SLINEXM) $(LIBS) -o $@
 
 slinsol1: $(SLINEXM1) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(SLINEXM1) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(SLINEXM1) $(LIBS) -o $@
 
 slinsolx: $(SLINXEXM) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(SLINXEXM) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(SLINXEXM) $(LIBS) -o $@
 
 slinsolx1: $(SLINXEXM1) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(SLINXEXM1) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(SLINXEXM1) $(LIBS) -o $@
 
 slinsolx2: $(SLINXEXM2) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(SLINXEXM2) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(SLINXEXM2) $(LIBS) -o $@
 
 dlinsol: $(DLINEXM) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(DLINEXM) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(DLINEXM) $(LIBS) -o $@
 
 dlinsol1: $(DLINEXM1) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(DLINEXM1) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(DLINEXM1) $(LIBS) -o $@
 
 dlinsolx: $(DLINXEXM) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(DLINXEXM) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(DLINXEXM) $(LIBS) -o $@
 
 dlinsolx1: $(DLINXEXM1) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(DLINXEXM1) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(DLINXEXM1) $(LIBS) -o $@
 
 dlinsolx2: $(DLINXEXM2) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(DLINXEXM2) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(DLINXEXM2) $(LIBS) -o $@
 
 superlu: $(SUPERLUEXM) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(SUPERLUEXM) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(SUPERLUEXM) $(LIBS) -o $@
 
 clinsol: $(CLINEXM) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(CLINEXM) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(CLINEXM) $(LIBS) -o $@
 
 clinsol1: $(CLINEXM1) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(CLINEXM1) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(CLINEXM1) $(LIBS) -o $@
 
 clinsolx: $(CLINXEXM) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(CLINXEXM) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(CLINXEXM) $(LIBS) -o $@
 
 clinsolx1: $(CLINXEXM1) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(CLINXEXM1) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(CLINXEXM1) $(LIBS) -o $@
 
 clinsolx2: $(CLINXEXM2) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(CLINXEXM2) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(CLINXEXM2) $(LIBS) -o $@
 
 zlinsol: $(ZLINEXM) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(ZLINEXM) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(ZLINEXM) $(LIBS) -o $@
 
 zlinsol1: $(ZLINEXM1) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(ZLINEXM1) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(ZLINEXM1) $(LIBS) -o $@
 
 zlinsolx: $(ZLINXEXM) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(ZLINXEXM) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(ZLINXEXM) $(LIBS) -o $@
 
 zlinsolx1: $(ZLINXEXM1) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(ZLINXEXM1) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(ZLINXEXM1) $(LIBS) -o $@
 
 zlinsolx2: $(ZLINXEXM2) ../$(SUPERLULIB)
-	$(LOADER) $(LOADOPTS) $(ZLINXEXM2) \
-        ../$(SUPERLULIB) $(BLASLIB) -lm -o $@
+	$(LOADER) $(LOADOPTS) $(ZLINXEXM2) $(LIBS) -o $@
 
 .c.o:
 	$(CC) $(CFLAGS) -I$(HEADER) -c $< $(VERBOSE)
diff --git a/EXAMPLE/clinsol.c b/EXAMPLE/clinsol.c
index 567bffe..93e43c7 100644
--- a/EXAMPLE/clinsol.c
+++ b/EXAMPLE/clinsol.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/clinsol1.c b/EXAMPLE/clinsol1.c
index f72c9c8..db765c3 100644
--- a/EXAMPLE/clinsol1.c
+++ b/EXAMPLE/clinsol1.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/clinsolx.c b/EXAMPLE/clinsolx.c
index 340cf9d..91147af 100644
--- a/EXAMPLE/clinsolx.c
+++ b/EXAMPLE/clinsolx.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/clinsolx1.c b/EXAMPLE/clinsolx1.c
index f5283dd..a1953f7 100644
--- a/EXAMPLE/clinsolx1.c
+++ b/EXAMPLE/clinsolx1.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/clinsolx2.c b/EXAMPLE/clinsolx2.c
index 522f2d5..4946e97 100644
--- a/EXAMPLE/clinsolx2.c
+++ b/EXAMPLE/clinsolx2.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/dlinsol.c b/EXAMPLE/dlinsol.c
index f5c5244..ca81d26 100644
--- a/EXAMPLE/dlinsol.c
+++ b/EXAMPLE/dlinsol.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/dlinsol1.c b/EXAMPLE/dlinsol1.c
index 85b4f06..87793de 100644
--- a/EXAMPLE/dlinsol1.c
+++ b/EXAMPLE/dlinsol1.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/dlinsolx.c b/EXAMPLE/dlinsolx.c
index 8fc4eed..12c9b37 100644
--- a/EXAMPLE/dlinsolx.c
+++ b/EXAMPLE/dlinsolx.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/dlinsolx1.c b/EXAMPLE/dlinsolx1.c
index 157404d..7f727b7 100644
--- a/EXAMPLE/dlinsolx1.c
+++ b/EXAMPLE/dlinsolx1.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/dlinsolx2.c b/EXAMPLE/dlinsolx2.c
index 5fe99a9..4824421 100644
--- a/EXAMPLE/dlinsolx2.c
+++ b/EXAMPLE/dlinsolx2.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/dreadtriple.c b/EXAMPLE/dreadtriple.c
new file mode 100644
index 0000000..b8768fa
--- /dev/null
+++ b/EXAMPLE/dreadtriple.c
@@ -0,0 +1,118 @@
+#include <stdio.h>
+#include "dsp_defs.h"
+#include "util.h"
+
+
+void
+dreadtriple(int *m, int *n, int *nonz,
+	    double **nzval, int **rowind, int **colptr)
+{
+/*
+ * Output parameters
+ * =================
+ *   (a,asub,xa): asub[*] contains the row subscripts of nonzeros
+ *	in columns of matrix A; a[*] the numerical values;
+ *	row i of A is given by a[k],k=xa[i],...,xa[i+1]-1.
+ *
+ */
+    int    i, j, k, jsize, lasta, nnz, nz;
+    double *a, *val;
+    int    *asub, *xa, *row, *col;
+    
+    /* 	Matrix format:
+     *    First line:  #rows, #cols, #non-zero
+     *    Triplet in the rest of lines:
+     *                 row, col, value
+     */
+
+    scanf("%d%d", n, nonz);
+    *m = *n;
+    printf("m %d, n %d, nonz %d\n", *m, *n, *nonz);
+    dallocateA(*n, *nonz, nzval, rowind, colptr); /* Allocate storage */
+    a    = *nzval;
+    asub = *rowind;
+    xa   = *colptr;
+
+    val = (double *) SUPERLU_MALLOC(*nonz * sizeof(double));
+    row = (int *) SUPERLU_MALLOC(*nonz * sizeof(int));
+    col = (int *) SUPERLU_MALLOC(*nonz * sizeof(int));
+
+    for (j = 0; j < *n; ++j) xa[j] = 0;
+
+    /* Read into the triplet array from a file */
+    for (nnz = 0, nz = 0; nnz < *nonz; ++nnz) {
+	scanf("%d%d%lf\n", &row[nz], &col[nz], &val[nz]);
+	/* Change to 0-based indexing. */
+#if 0
+	--row[nz];
+	--col[nz];
+#endif
+	if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n
+	    /*|| val[nz] == 0.*/) {
+	    fprintf(stderr, "nz %d, (%d, %d) = %e out of bound, removed\n", 
+		    nz, row[nz], col[nz], val[nz]);
+	    exit(-1);
+	} else {
+	    ++xa[col[nz]];
+	    ++nz;
+	}
+    }
+
+    *nonz = nz;
+
+    /* Initialize the array of column pointers */
+    k = 0;
+    jsize = xa[0];
+    xa[0] = 0;
+    for (j = 1; j < *n; ++j) {
+	k += jsize;
+	jsize = xa[j];
+	xa[j] = k;
+    }
+    
+    /* Copy the triplets into the column oriented storage */
+    for (nz = 0; nz < *nonz; ++nz) {
+	j = col[nz];
+	k = xa[j];
+	asub[k] = row[nz];
+	a[k] = val[nz];
+	++xa[j];
+    }
+
+    /* Reset the column pointers to the beginning of each column */
+    for (j = *n; j > 0; --j)
+	xa[j] = xa[j-1];
+    xa[0] = 0;
+
+    SUPERLU_FREE(val);
+    SUPERLU_FREE(row);
+    SUPERLU_FREE(col);
+
+#ifdef CHK_INPUT
+    for (i = 0; i < *n; i++) {
+	printf("Col %d, xa %d\n", i, xa[i]);
+	for (k = xa[i]; k < xa[i+1]; k++)
+	    printf("%d\t%16.10f\n", asub[k], a[k]);
+    }
+#endif
+
+}
+
+
+void dreadrhs(int m, double *b)
+{
+    FILE *fp, *fopen();
+    int i, j;
+
+    if ( !(fp = fopen("b.dat", "r")) ) {
+        fprintf(stderr, "dreadrhs: file does not exist\n");
+	exit(-1);
+    }
+    for (i = 0; i < m; ++i)
+      fscanf(fp, "%lf\n", &b[i]);
+      /*fscanf(fp, "%d%lf\n", &j, &b[i]);*/
+    /*        readpair_(j, &b[i]);*/
+    fclose(fp);
+}
+
+
diff --git a/EXAMPLE/slinsol.c b/EXAMPLE/slinsol.c
index e63bfdf..9c53764 100644
--- a/EXAMPLE/slinsol.c
+++ b/EXAMPLE/slinsol.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/slinsol1.c b/EXAMPLE/slinsol1.c
index a736a09..07d6d30 100644
--- a/EXAMPLE/slinsol1.c
+++ b/EXAMPLE/slinsol1.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/slinsolx.c b/EXAMPLE/slinsolx.c
index d73cc6d..b3d2111 100644
--- a/EXAMPLE/slinsolx.c
+++ b/EXAMPLE/slinsolx.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/slinsolx1.c b/EXAMPLE/slinsolx1.c
index deb650f..f6e76ee 100644
--- a/EXAMPLE/slinsolx1.c
+++ b/EXAMPLE/slinsolx1.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/slinsolx2.c b/EXAMPLE/slinsolx2.c
index e0acad7..f9a2265 100644
--- a/EXAMPLE/slinsolx2.c
+++ b/EXAMPLE/slinsolx2.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/sp_ienv.c b/EXAMPLE/sp_ienv.c
index 052d860..7b0f93b 100644
--- a/EXAMPLE/sp_ienv.c
+++ b/EXAMPLE/sp_ienv.c
@@ -9,6 +9,8 @@
  * File name:		sp_ienv.c
  * History:             Modified from lapack routine ILAENV
  */
+#include "slu_Cnames.h"
+
 int
 sp_ienv(int ispec)
 {
diff --git a/EXAMPLE/superlu.c b/EXAMPLE/superlu.c
index 6b88a63..39d9582 100644
--- a/EXAMPLE/superlu.c
+++ b/EXAMPLE/superlu.c
@@ -5,7 +5,7 @@
  * November 15, 1997
  *
  */
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/zlinsol.c b/EXAMPLE/zlinsol.c
index d3a75c4..3993113 100644
--- a/EXAMPLE/zlinsol.c
+++ b/EXAMPLE/zlinsol.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/zlinsol1.c b/EXAMPLE/zlinsol1.c
index 1747397..ed9ec31 100644
--- a/EXAMPLE/zlinsol1.c
+++ b/EXAMPLE/zlinsol1.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/zlinsolx.c b/EXAMPLE/zlinsolx.c
index 364f62b..a89f8e0 100644
--- a/EXAMPLE/zlinsolx.c
+++ b/EXAMPLE/zlinsolx.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/zlinsolx1.c b/EXAMPLE/zlinsolx1.c
index 48f6261..e75ee28 100644
--- a/EXAMPLE/zlinsolx1.c
+++ b/EXAMPLE/zlinsolx1.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/zlinsolx2.c b/EXAMPLE/zlinsolx2.c
index ce7f2aa..1fce9d4 100644
--- a/EXAMPLE/zlinsolx2.c
+++ b/EXAMPLE/zlinsolx2.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 main(int argc, char *argv[])
 {
diff --git a/EXAMPLE/zreadtriple.c b/EXAMPLE/zreadtriple.c
new file mode 100644
index 0000000..ee0ac57
--- /dev/null
+++ b/EXAMPLE/zreadtriple.c
@@ -0,0 +1,90 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "zsp_defs.h"
+#include "util.h"
+
+
+void
+zreadtriple(int *m, int *n, int *nonz,
+	    doublecomplex **nzval, int **rowind, int **colptr)
+{
+/*
+ * Output parameters
+ * =================
+ *   (a,asub,xa): asub[*] contains the row subscripts of nonzeros
+ *	in columns of matrix A; a[*] the numerical values;
+ *	row i of A is given by a[k],k=xa[i],...,xa[i+1]-1.
+ *
+ */
+    int    i, j, k, jsize, nz, lasta;
+    doublecomplex *a, *val;
+    int    *asub, *xa, *row, *col;
+    
+    /* 	Matrix format:
+     *    First line:  #rows, #cols, #non-zero
+     *    Triplet in the rest of lines:
+     *                 row, col, value
+     */
+
+    scanf("%d%d%d", m, n, nonz);
+#ifdef DEBUG
+    printf("zreadtriple(): *m %d, *n %d, *nonz, %d\n", *m, *n, *nonz);
+#endif
+    zallocateA(*n, *nonz, nzval, rowind, colptr); /* Allocate storage */
+    a    = *nzval;
+    asub = *rowind;
+    xa   = *colptr;
+
+    val = (doublecomplex *) SUPERLU_MALLOC(*nonz * sizeof(doublecomplex));
+    row = (int *) SUPERLU_MALLOC(*nonz * sizeof(int));
+    col = (int *) SUPERLU_MALLOC(*nonz * sizeof(int));
+
+    /* Read into the triplet array from a file */
+    for (i = 0; i < *n+1; ++i) xa[i] = 0;
+    for (nz = 0; nz < *nonz; ++nz) {
+	scanf("%d%d%lf%lf\n", &row[nz], &col[nz], &val[nz].r, &val[nz].i);
+	if (row[nz] < 0 || row[nz] >= *m || col[nz] < 0 || col[nz] >= *n) {
+	    fprintf(stderr, "(%d, %d) out of bound!\n", row[nz], col[nz]);
+	    exit (-1);
+	}
+	++xa[col[nz]]; /* Count number of nonzeros in each column */
+    }
+
+    /* Initialize the array of column pointers */
+    k = 0;
+    jsize = xa[0];
+    xa[0] = 0;
+    for (j = 1; j < *n; ++j) {
+	k += jsize;
+	jsize = xa[j];
+	xa[j] = k;
+    }
+    
+    /* Copy the triplets into the column oriented storage */
+    for (nz = 0; nz < *nonz; ++nz) {
+	j = col[nz];
+	k = xa[j];
+	asub[k] = row[nz];
+	a[k] = val[nz];
+	++xa[j];
+    }
+
+    /* Reset the column pointers to the beginning of each column */
+    for (j = *n; j > 0; --j)
+	xa[j] = xa[j-1];
+    xa[0] = 0;
+
+    SUPERLU_FREE(val);
+    SUPERLU_FREE(row);
+    SUPERLU_FREE(col);
+
+#ifdef CHK_INPUT
+    for (i = 0; i < *n; i++) {
+	printf("Col %d, xa %d\n", i, xa[i]);
+	for (k = xa[i]; k < xa[i+1]; k++)
+	    printf("%d\t%16.10f\n", asub[k], a[k]);
+    }
+#endif
+
+}
diff --git a/FORTRAN/Makefile b/FORTRAN/Makefile
index 98abda6..0b171d2 100644
--- a/FORTRAN/Makefile
+++ b/FORTRAN/Makefile
@@ -6,13 +6,14 @@ include ../make.inc
 #######################################################################
 
 HEADER   = ../SRC
+LIBS	= ../$(SUPERLULIB) $(BLASLIB) -lm
 
 F77EXM	= f77_main.o hbcode1.o c_fortran_dgssv.o
 
 all:	f77exm
 
 f77exm: $(F77EXM) ../$(SUPERLULIB)
-	$(FORTRAN) $(F77EXM) ../$(SUPERLULIB) $(BLASLIB) -o $@
+	$(FORTRAN) $(LOADOPTS) $(F77EXM) $(LIBS) -o $@
 
 c_fortran_zgssv.o: c_fortran_zgssv.c
 	$(CC) $(CFLAGS) $(CDEFS) -I$(HEADER) -c $< $(VERBOSE)
diff --git a/FORTRAN/c_fortran_dgssv.c b/FORTRAN/c_fortran_cgssv.c
similarity index 86%
copy from FORTRAN/c_fortran_dgssv.c
copy to FORTRAN/c_fortran_cgssv.c
index 22dd066..219d922 100644
--- a/FORTRAN/c_fortran_dgssv.c
+++ b/FORTRAN/c_fortran_cgssv.c
@@ -1,3 +1,4 @@
+
 /*
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -6,7 +7,7 @@
  *
  */
 
-#include "dsp_defs.h"
+#include "slu_cdefs.h"
 
 #define HANDLE_SIZE  8
 /* kind of integer to hold a pointer.  Use int.
@@ -21,8 +22,9 @@ typedef struct {
 } factors_t;
 
 void
-c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
-		 int *rowind, int *colptr, double *b, int *ldb,
+c_fortran_cgssv_(int *iopt, int *n, int *nnz, int *nrhs, 
+                 complex *values, int *rowind, int *colptr,
+                 complex *b, int *ldb,
 		 fptr *f_factors, /* a handle containing the address
 				     pointing to the factored matrices */
 		 int *info)
@@ -53,7 +55,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
     NCformat *Ustore;
     int      i, panel_size, permc_spec, relax;
     trans_t  trans;
-    double   drop_tol = 0.0;
+    float   drop_tol = 0.0;
     mem_usage_t   mem_usage;
     superlu_options_t options;
     SuperLUStat_t stat;
@@ -73,8 +75,8 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	for (i = 0; i < *nnz; ++i) --rowind[i];
 	for (i = 0; i <= *n; ++i) --colptr[i];
 
-	dCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr,
-			       SLU_NC, SLU_D, SLU_GE);
+	cCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr,
+			       SLU_NC, SLU_C, SLU_GE);
 	L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
 	U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
 	if ( !(perm_r = intMalloc(*n)) ) ABORT("Malloc fails for perm_r[].");
@@ -88,7 +90,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	 *   permc_spec = 2: minimum degree on structure of A'+A
 	 *   permc_spec = 3: approximate minimum degree for unsymmetric matrices
 	 */    	
-	permc_spec = 3;
+	permc_spec = options.ColPerm;
 	get_perm_c(permc_spec, &A, perm_c);
 	
 	sp_preorder(&options, &A, perm_c, etree, &AC);
@@ -96,7 +98,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	panel_size = sp_ienv(1);
 	relax = sp_ienv(2);
 
-	dgstrf(&options, &AC, drop_tol, relax, panel_size, 
+	cgstrf(&options, &AC, drop_tol, relax, panel_size, 
 	       etree, NULL, 0, perm_c, perm_r, L, U, &stat, info);
 
 	if ( *info == 0 ) {
@@ -105,14 +107,14 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	    printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
 	    printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
 	    printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
-	    dQuerySpace(L, U, &mem_usage);
+	    cQuerySpace(L, U, &mem_usage);
 	    printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
 		   mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
 		   mem_usage.expansions);
 	} else {
-	    printf("dgstrf() error returns INFO= %d\n", *info);
+	    printf("cgstrf() error returns INFO= %d\n", *info);
 	    if ( *info <= *n ) { /* factorization completes */
-		dQuerySpace(L, U, &mem_usage);
+		cQuerySpace(L, U, &mem_usage);
 		printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
 		       mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
 		       mem_usage.expansions);
@@ -148,10 +150,10 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	perm_c = LUfactors->perm_c;
 	perm_r = LUfactors->perm_r;
 
-	dCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_D, SLU_GE);
+	cCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_C, SLU_GE);
 
         /* Solve the system A*X=B, overwriting B with X. */
-        dgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info);
+        cgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info);
 
 	Destroy_SuperMatrix_Store(&B);
 	StatFree(&stat);
@@ -167,7 +169,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
         SUPERLU_FREE (LUfactors->U);
 	SUPERLU_FREE (LUfactors);
     } else {
-	fprintf(stderr,"Invalid iopt=%d passed to c_fortran_dgssv()\n",*iopt);
+	fprintf(stderr,"Invalid iopt=%d passed to c_fortran_cgssv()\n",*iopt);
 	exit(-1);
     }
 }
diff --git a/FORTRAN/c_fortran_dgssv.c b/FORTRAN/c_fortran_dgssv.c
index 22dd066..9d824f0 100644
--- a/FORTRAN/c_fortran_dgssv.c
+++ b/FORTRAN/c_fortran_dgssv.c
@@ -1,3 +1,4 @@
+
 /*
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -6,12 +7,13 @@
  *
  */
 
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 #define HANDLE_SIZE  8
-/* kind of integer to hold a pointer.  Use int.
-   This might need to be changed on 64-bit systems. */
-typedef int fptr;  /* 32-bit by default */
+
+/* kind of integer to hold a pointer.  Use 'long int'
+   so it works on 64-bit systems. */
+typedef long int fptr;  /* 64 bit */
 
 typedef struct {
     SuperMatrix *L;
@@ -21,8 +23,9 @@ typedef struct {
 } factors_t;
 
 void
-c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
-		 int *rowind, int *colptr, double *b, int *ldb,
+c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, 
+                 double *values, int *rowind, int *colptr,
+                 double *b, int *ldb,
 		 fptr *f_factors, /* a handle containing the address
 				     pointing to the factored matrices */
 		 int *info)
@@ -88,7 +91,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	 *   permc_spec = 2: minimum degree on structure of A'+A
 	 *   permc_spec = 3: approximate minimum degree for unsymmetric matrices
 	 */    	
-	permc_spec = 3;
+	permc_spec = options.ColPerm;
 	get_perm_c(permc_spec, &A, perm_c);
 	
 	sp_preorder(&options, &A, perm_c, etree, &AC);
diff --git a/FORTRAN/c_fortran_dgssv.c b/FORTRAN/c_fortran_dgssv.c.bak
similarity index 99%
copy from FORTRAN/c_fortran_dgssv.c
copy to FORTRAN/c_fortran_dgssv.c.bak
index 22dd066..284abef 100644
--- a/FORTRAN/c_fortran_dgssv.c
+++ b/FORTRAN/c_fortran_dgssv.c.bak
@@ -88,7 +88,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	 *   permc_spec = 2: minimum degree on structure of A'+A
 	 *   permc_spec = 3: approximate minimum degree for unsymmetric matrices
 	 */    	
-	permc_spec = 3;
+	permc_spec = options.ColPerm;
 	get_perm_c(permc_spec, &A, perm_c);
 	
 	sp_preorder(&options, &A, perm_c, etree, &AC);
diff --git a/FORTRAN/c_fortran_dgssv.c b/FORTRAN/c_fortran_sgssv.c
similarity index 86%
copy from FORTRAN/c_fortran_dgssv.c
copy to FORTRAN/c_fortran_sgssv.c
index 22dd066..1fa08f1 100644
--- a/FORTRAN/c_fortran_dgssv.c
+++ b/FORTRAN/c_fortran_sgssv.c
@@ -1,3 +1,4 @@
+
 /*
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -6,7 +7,7 @@
  *
  */
 
-#include "dsp_defs.h"
+#include "slu_sdefs.h"
 
 #define HANDLE_SIZE  8
 /* kind of integer to hold a pointer.  Use int.
@@ -21,8 +22,9 @@ typedef struct {
 } factors_t;
 
 void
-c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
-		 int *rowind, int *colptr, double *b, int *ldb,
+c_fortran_sgssv_(int *iopt, int *n, int *nnz, int *nrhs, 
+                 float *values, int *rowind, int *colptr,
+                 float *b, int *ldb,
 		 fptr *f_factors, /* a handle containing the address
 				     pointing to the factored matrices */
 		 int *info)
@@ -53,7 +55,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
     NCformat *Ustore;
     int      i, panel_size, permc_spec, relax;
     trans_t  trans;
-    double   drop_tol = 0.0;
+    float   drop_tol = 0.0;
     mem_usage_t   mem_usage;
     superlu_options_t options;
     SuperLUStat_t stat;
@@ -73,8 +75,8 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	for (i = 0; i < *nnz; ++i) --rowind[i];
 	for (i = 0; i <= *n; ++i) --colptr[i];
 
-	dCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr,
-			       SLU_NC, SLU_D, SLU_GE);
+	sCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr,
+			       SLU_NC, SLU_S, SLU_GE);
 	L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
 	U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
 	if ( !(perm_r = intMalloc(*n)) ) ABORT("Malloc fails for perm_r[].");
@@ -88,7 +90,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	 *   permc_spec = 2: minimum degree on structure of A'+A
 	 *   permc_spec = 3: approximate minimum degree for unsymmetric matrices
 	 */    	
-	permc_spec = 3;
+	permc_spec = options.ColPerm;
 	get_perm_c(permc_spec, &A, perm_c);
 	
 	sp_preorder(&options, &A, perm_c, etree, &AC);
@@ -96,7 +98,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	panel_size = sp_ienv(1);
 	relax = sp_ienv(2);
 
-	dgstrf(&options, &AC, drop_tol, relax, panel_size, 
+	sgstrf(&options, &AC, drop_tol, relax, panel_size, 
 	       etree, NULL, 0, perm_c, perm_r, L, U, &stat, info);
 
 	if ( *info == 0 ) {
@@ -105,14 +107,14 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	    printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
 	    printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
 	    printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
-	    dQuerySpace(L, U, &mem_usage);
+	    sQuerySpace(L, U, &mem_usage);
 	    printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
 		   mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
 		   mem_usage.expansions);
 	} else {
-	    printf("dgstrf() error returns INFO= %d\n", *info);
+	    printf("sgstrf() error returns INFO= %d\n", *info);
 	    if ( *info <= *n ) { /* factorization completes */
-		dQuerySpace(L, U, &mem_usage);
+		sQuerySpace(L, U, &mem_usage);
 		printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
 		       mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
 		       mem_usage.expansions);
@@ -148,10 +150,10 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	perm_c = LUfactors->perm_c;
 	perm_r = LUfactors->perm_r;
 
-	dCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_D, SLU_GE);
+	sCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_S, SLU_GE);
 
         /* Solve the system A*X=B, overwriting B with X. */
-        dgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info);
+        sgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info);
 
 	Destroy_SuperMatrix_Store(&B);
 	StatFree(&stat);
@@ -167,7 +169,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
         SUPERLU_FREE (LUfactors->U);
 	SUPERLU_FREE (LUfactors);
     } else {
-	fprintf(stderr,"Invalid iopt=%d passed to c_fortran_dgssv()\n",*iopt);
+	fprintf(stderr,"Invalid iopt=%d passed to c_fortran_sgssv()\n",*iopt);
 	exit(-1);
     }
 }
diff --git a/FORTRAN/c_fortran_dgssv.c b/FORTRAN/c_fortran_zgssv.c
similarity index 87%
copy from FORTRAN/c_fortran_dgssv.c
copy to FORTRAN/c_fortran_zgssv.c
index 22dd066..b7cf074 100644
--- a/FORTRAN/c_fortran_dgssv.c
+++ b/FORTRAN/c_fortran_zgssv.c
@@ -1,3 +1,4 @@
+
 /*
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -6,7 +7,7 @@
  *
  */
 
-#include "dsp_defs.h"
+#include "slu_zdefs.h"
 
 #define HANDLE_SIZE  8
 /* kind of integer to hold a pointer.  Use int.
@@ -21,8 +22,9 @@ typedef struct {
 } factors_t;
 
 void
-c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
-		 int *rowind, int *colptr, double *b, int *ldb,
+c_fortran_zgssv_(int *iopt, int *n, int *nnz, int *nrhs, 
+                 doublecomplex *values, int *rowind, int *colptr,
+                 doublecomplex *b, int *ldb,
 		 fptr *f_factors, /* a handle containing the address
 				     pointing to the factored matrices */
 		 int *info)
@@ -73,8 +75,8 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	for (i = 0; i < *nnz; ++i) --rowind[i];
 	for (i = 0; i <= *n; ++i) --colptr[i];
 
-	dCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr,
-			       SLU_NC, SLU_D, SLU_GE);
+	zCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr,
+			       SLU_NC, SLU_Z, SLU_GE);
 	L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
 	U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
 	if ( !(perm_r = intMalloc(*n)) ) ABORT("Malloc fails for perm_r[].");
@@ -88,7 +90,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	 *   permc_spec = 2: minimum degree on structure of A'+A
 	 *   permc_spec = 3: approximate minimum degree for unsymmetric matrices
 	 */    	
-	permc_spec = 3;
+	permc_spec = options.ColPerm;
 	get_perm_c(permc_spec, &A, perm_c);
 	
 	sp_preorder(&options, &A, perm_c, etree, &AC);
@@ -96,7 +98,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	panel_size = sp_ienv(1);
 	relax = sp_ienv(2);
 
-	dgstrf(&options, &AC, drop_tol, relax, panel_size, 
+	zgstrf(&options, &AC, drop_tol, relax, panel_size, 
 	       etree, NULL, 0, perm_c, perm_r, L, U, &stat, info);
 
 	if ( *info == 0 ) {
@@ -105,14 +107,14 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	    printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
 	    printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
 	    printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
-	    dQuerySpace(L, U, &mem_usage);
+	    zQuerySpace(L, U, &mem_usage);
 	    printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
 		   mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
 		   mem_usage.expansions);
 	} else {
-	    printf("dgstrf() error returns INFO= %d\n", *info);
+	    printf("zgstrf() error returns INFO= %d\n", *info);
 	    if ( *info <= *n ) { /* factorization completes */
-		dQuerySpace(L, U, &mem_usage);
+		zQuerySpace(L, U, &mem_usage);
 		printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
 		       mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
 		       mem_usage.expansions);
@@ -148,10 +150,10 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
 	perm_c = LUfactors->perm_c;
 	perm_r = LUfactors->perm_r;
 
-	dCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_D, SLU_GE);
+	zCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_Z, SLU_GE);
 
         /* Solve the system A*X=B, overwriting B with X. */
-        dgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info);
+        zgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info);
 
 	Destroy_SuperMatrix_Store(&B);
 	StatFree(&stat);
@@ -167,7 +169,7 @@ c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values,
         SUPERLU_FREE (LUfactors->U);
 	SUPERLU_FREE (LUfactors);
     } else {
-	fprintf(stderr,"Invalid iopt=%d passed to c_fortran_dgssv()\n",*iopt);
+	fprintf(stderr,"Invalid iopt=%d passed to c_fortran_zgssv()\n",*iopt);
 	exit(-1);
     }
 }
diff --git a/FORTRAN/f77_main.f b/FORTRAN/f77_main.f
index 28efb22..997f88e 100644
--- a/FORTRAN/f77_main.f
+++ b/FORTRAN/f77_main.f
@@ -3,8 +3,8 @@
       parameter ( maxn = 10000, maxnz = 100000 )
       integer rowind(maxnz), colptr(maxn)
       real*8  values(maxnz), b(maxn)
-      integer n, nnz, nrhs, ldb, info
-      integer factors, iopt
+      integer n, nnz, nrhs, ldb, info, iopt
+      integer*8 factors
 *
       call hbcode1(n, n, nnz, values, rowind, colptr)
 *
diff --git a/FORTRAN/f77exm.out b/FORTRAN/f77exm.out
new file mode 100644
index 0000000..186fa25
--- /dev/null
+++ b/FORTRAN/f77exm.out
@@ -0,0 +1,9 @@
+No of nonzeros in factor L = 835
+No of nonzeros in factor U = 978
+No of nonzeros in L+U = 1813
+L\U MB 0.020	total MB needed 0.040	expansions 0
+ Factorization succeeded
+ Solve succeeded
+     188.45681574593    133.96798695468   -470.23879928609   -278.80339526911
+     19.917307361526    272.77268232866   -247.80663474720   -313.99765880983
+    -91.277211882061    99.759496460021
diff --git a/INSTALL/Makefile b/INSTALL/Makefile
index a8f77ec..a75e370 100644
--- a/INSTALL/Makefile
+++ b/INSTALL/Makefile
@@ -3,13 +3,13 @@ include ../make.inc
 all:  testdlamch testslamch testtimer install.out
 
 testdlamch: dlamch.o lsame.o dlamchtst.o
-	$(LOADER) -o testdlamch dlamch.o lsame.o dlamchtst.o
+	$(LOADER) $(LOADOPTS) -o testdlamch dlamch.o lsame.o dlamchtst.o
 
 testslamch: slamch.o lsame.o slamchtst.o
-	$(LOADER) -o testslamch slamch.o lsame.o slamchtst.o
+	$(LOADER) $(LOADOPTS) -o testslamch slamch.o lsame.o slamchtst.o
 
 testtimer: superlu_timer.o timertst.o
-	$(LOADER) -o testtimer superlu_timer.o timertst.o
+	$(LOADER) $(LOADOPTS) -o testtimer superlu_timer.o timertst.o
 
 install.out: install.csh
 	@echo Testing machines parameters and timer 
diff --git a/MAKE_INC/make.alpha b/MAKE_INC/make.alpha
index f354d12..a19aa2c 100644
--- a/MAKE_INC/make.alpha
+++ b/MAKE_INC/make.alpha
@@ -21,8 +21,8 @@ PLAT = _alpha
 #
 #  The name of the libraries to be created/linked to
 #
-TMGLIB       = tmglib$(PLAT).a
-SUPERLULIB   = superlu$(PLAT).a
+TMGLIB       = libtmglib.a
+SUPERLULIB   =lib superlu_3.0.a
 BLASDEF      = -DUSE_VENDOR_BLAS
 BLASLIB      = -ldxml
 
diff --git a/MAKE_INC/make.cray b/MAKE_INC/make.cray
index d8841ed..e2d5846 100644
--- a/MAKE_INC/make.cray
+++ b/MAKE_INC/make.cray
@@ -19,8 +19,8 @@ PLAT = _cray
 #
 #  The name of the libraries to be created/linked to
 #
-TMGLIB       = tmglib$(PLAT).a
-SUPERLULIB   = superlu$(PLAT).a
+TMGLIB       = libtmglib.a
+SUPERLULIB   = libsuperlu_3.0.a
 # 
 #
 BLASDEF	     = -DUSE_VENDOR_BLAS
diff --git a/MAKE_INC/make.hppa b/MAKE_INC/make.hppa
index 04e9d34..c7c1484 100644
--- a/MAKE_INC/make.hppa
+++ b/MAKE_INC/make.hppa
@@ -21,8 +21,8 @@ PLAT = _hppa
 #
 #  The name of the libraries to be created/linked to
 #
-TMGLIB       = tmglib$(PLAT).a
-SUPERLULIB   = superlu$(PLAT).a
+TMGLIB       = libtmglib.a
+SUPERLULIB   = libsuperlu_3.0.a
 BLASDEF      = -DUSE_VENDOR_BLAS
 BLASLIB      = -lblas -lcl
 
diff --git a/MAKE_INC/make.inc b/MAKE_INC/make.inc
index 83ffb02..707a52c 100644
--- a/MAKE_INC/make.inc
+++ b/MAKE_INC/make.inc
@@ -21,9 +21,9 @@ PLAT = _linux
 #
 #  The name of the libraries to be created/linked to
 #
-TMGLIB       = tmglib$(PLAT).a
-SUPERLULIB   = superlu$(PLAT).a
-BLASLIB      = ../blas$(PLAT).a
+TMGLIB       = libtmglib.a
+SUPERLULIB   = libsuperlu_3.0.a
+BLASLIB      = ../libblas.a
 
 #
 #  The archiver and the flag(s) to use when building archive (library)
diff --git a/MAKE_INC/make.linux b/MAKE_INC/make.linux
index 0d17a92..50bdda9 100644
--- a/MAKE_INC/make.linux
+++ b/MAKE_INC/make.linux
@@ -21,9 +21,9 @@ PLAT = _linux
 #
 #  The name of the libraries to be created/linked to
 #
-TMGLIB       = tmglib$(PLAT).a
-SUPERLULIB   = superlu$(PLAT).a
-BLASLIB      = ../blas$(PLAT).a
+TMGLIB       = libtmglib.a
+SUPERLULIB   = libsuperlu_3.0.a
+BLASLIB      = ../libblas.a
 
 #
 #  The archiver and the flag(s) to use when building archive (library)
diff --git a/MAKE_INC/make.rs6k b/MAKE_INC/make.rs6k
index 389b9cc..f68151c 100644
--- a/MAKE_INC/make.rs6k
+++ b/MAKE_INC/make.rs6k
@@ -21,8 +21,8 @@ PLAT = _rs6k
 #
 #  The name of the libraries to be created/linked to
 #
-TMGLIB       = tmglib$(PLAT).a
-SUPERLULIB   = superlu$(PLAT).a
+TMGLIB       = libtmglib.a
+SUPERLULIB   = libsuperlu_3.0.a
 # 
 # If you don't have ESSL, you can use the following blaslib instead:
 #           BLASLIB = -lblas -lxlf -lxlf90
diff --git a/MAKE_INC/make.sgi b/MAKE_INC/make.sgi
index 606b5aa..61e2197 100644
--- a/MAKE_INC/make.sgi
+++ b/MAKE_INC/make.sgi
@@ -21,9 +21,9 @@ PLAT = _sgi
 #
 #  The name of the libraries to be created/linked to
 #
-TMGLIB       = tmglib$(PLAT).a
-SUPERLULIB   = superlu$(PLAT).a
-BLASLIB      = ../blas$(PLAT).a
+TMGLIB       = libtmglib.a
+SUPERLULIB   = libsuperlu_3.0.a
+BLASLIB      = ../libblas.a
 
 #
 #  The archiver and the flag(s) to use when building archive (library)
diff --git a/MAKE_INC/make.solaris b/MAKE_INC/make.solaris
index feaad46..2f864b3 100644
--- a/MAKE_INC/make.solaris
+++ b/MAKE_INC/make.solaris
@@ -21,9 +21,9 @@ PLAT = _solaris
 #
 #  The name of the libraries to be created/linked to
 #
-TMGLIB       = tmglib$(PLAT).a
-SUPERLULIB   = superlu$(PLAT).a
-BLASLIB      = ../blas$(PLAT).a
+TMGLIB       = libtmglib.a
+SUPERLULIB   = libsuperlu_3.0.a
+BLASLIB      = ../libblas.a
 
 #
 #  The archiver and the flag(s) to use when building archive (library)
@@ -48,4 +48,4 @@ CDEFS        = -DAdd_
 #
 # The directory in which Matlab is installed
 #
-MATLAB	     = /usr/sww/matlab
+MATLAB	     = /usr/sww/pkg/matlab
diff --git a/MAKE_INC/make.sp b/MAKE_INC/make.sp
index 03417e3..a6d08f7 100644
--- a/MAKE_INC/make.sp
+++ b/MAKE_INC/make.sp
@@ -21,8 +21,8 @@ PLAT = _sp
 #
 #  The name of the libraries to be created/linked to
 #
-TMGLIB       = tmglib$(PLAT).a
-SUPERLULIB   = superlu$(PLAT).a
+TMGLIB       = libtmglib.a
+SUPERLULIB   = libsuperlu_3.0.a
 # 
 # If you don't have ESSL, you can use the following blaslib instead:
 #           BLASLIB = -lblas -lxlf -lxlf90
diff --git a/MAKE_INC/make.sun4 b/MAKE_INC/make.sun4
index 7ed5661..b15fa2b 100644
--- a/MAKE_INC/make.sun4
+++ b/MAKE_INC/make.sun4
@@ -21,9 +21,9 @@ PLAT = _sun4
 #
 #  The name of the libraries to be created/linked to
 #
-TMGLIB       = tmglib$(PLAT).a
-SUPERLULIB   = superlu$(PLAT).a
-BLASLIB      = ../blas$(PLAT).a
+TMGLIB       = libtmglib.a
+SUPERLULIB   = libsuperlu_3.0.a
+BLASLIB      = ../libblas.a
 
 #
 #  The archiver and the flag(s) to use when building archive (library)
@@ -50,7 +50,7 @@ CDEFS        = -DAdd_
 #
 # The directory in which Matlab is installed
 #
-MATLAB	     = /usr/sww/matlab
+MATLAB	     = /usr/sww/pkg/matlab
 
 
 
diff --git a/MATLAB/mexlusolve.c b/MATLAB/mexlusolve.c
index f7ceeb2..6db98a5 100644
--- a/MATLAB/mexlusolve.c
+++ b/MATLAB/mexlusolve.c
@@ -7,7 +7,7 @@
  */
 #include <stdio.h>
 #include "mex.h"
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 #ifdef V5
 #define  MatlabMatrix mxArray
diff --git a/MATLAB/mexsuperlu.c b/MATLAB/mexsuperlu.c
index 0da4a9e..90d4a96 100644
--- a/MATLAB/mexsuperlu.c
+++ b/MATLAB/mexsuperlu.c
@@ -7,7 +7,7 @@
  */
 #include <stdio.h>
 #include "mex.h"
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 
 #ifdef V5
diff --git a/README b/README
index f0f010f..102c476 100644
--- a/README
+++ b/README
@@ -107,7 +107,7 @@ on your system setup:
        to make the BLAS library from the routines in the CBLAS/ subdirectory.
 
 3. C preprocessor definition CDEFS.
-   In the header file SRC/Cnames.h, we use macros to determine how
+   In the header file SRC/slu_Cnames.h, we use macros to determine how
    C routines should be named so that they are callable by Fortran.
    (Some vendor-supplied BLAS libraries do not have C interface. So the 
     re-naming is needed in order for the SuperLU BLAS calls (in C) to 
diff --git a/SRC/Makefile b/SRC/Makefile
index 30840c0..83200a7 100644
--- a/SRC/Makefile
+++ b/SRC/Makefile
@@ -5,9 +5,14 @@ include ../make.inc
 #  This is the makefile to create a library for SuperLU.
 #  The files are organized as follows:
 #
-#       ALLAUX -- Auxiliary routines called from all precisions
-#       SCLAUX -- Auxiliary routines called from both real and complex
-#       DZLAUX -- Auxiliary routines called from both double precision
+#       ALLAUX -- Auxiliary routines called from all precisions of SuperLU
+#	LAAUX  -- LAPACK auxiliary routines called from all precisions
+#	SLASRC -- LAPACK single precision real routines
+#	DLASRC -- LAPACK double precision real routines
+#	CLASRC -- LAPACK single precision complex routines
+#	ZLASRC -- LAPACK double precision complex routines
+#       SCLAUX -- LAPACK Auxiliary routines called from both real and complex
+#       DZLAUX -- LAPACK Auxiliary routines called from both double precision
 #                 and complex*16
 #	SLUSRC -- Single precision real SuperLU routines
 #       DLUSRC -- Double precision real SuperLU routines
@@ -32,17 +37,23 @@ include ../make.inc
 #
 #######################################################################
 
-ALLAUX = superlu_timer.o lsame.o util.o memory.o get_perm_c.o mmd.o \
-	 sp_coletree.o sp_preorder.o sp_ienv.o relax_snode.o heap_relax_snode.o \
-	 xerbla.o colamd.o
+### LAPACK 
+LAAUX 	= lsame.o xerbla.o
+SLASRC 	= slacon.o 
+DLASRC	= dlacon.o
+CLASRC	= clacon.o scsum1.o icmax1.o
+ZLASRC	= zlacon.o dzsum1.o izmax1.o
+SCLAUX 	= slamch.o
+DZLAUX 	= dlamch.o
 
-SCLAUX = slamch.o
-
-DZLAUX = dlamch.o
+### SuperLU 
+ALLAUX 	= superlu_timer.o util.o memory.o get_perm_c.o mmd.o \
+	  sp_coletree.o sp_preorder.o sp_ienv.o relax_snode.o \
+	  heap_relax_snode.o colamd.o
 
 SLUSRC = \
 	sgssv.o sgssvx.o \
-	ssp_blas2.o ssp_blas3.o sgscon.o slacon.o \
+	ssp_blas2.o ssp_blas3.o sgscon.o  \
 	slangs.o sgsequ.o slaqgs.o spivotgrowth.o \
 	sgsrfs.o sgstrf.o sgstrs.o scopy_to_ucol.o \
 	ssnode_dfs.o ssnode_bmod.o \
@@ -52,7 +63,7 @@ SLUSRC = \
 
 DLUSRC = \
 	dgssv.o dgssvx.o \
-	dsp_blas2.o dsp_blas3.o dgscon.o dlacon.o \
+	dsp_blas2.o dsp_blas3.o dgscon.o \
 	dlangs.o dgsequ.o dlaqgs.o dpivotgrowth.o  \
 	dgsrfs.o dgstrf.o dgstrs.o dcopy_to_ucol.o \
 	dsnode_dfs.o dsnode_bmod.o \
@@ -61,9 +72,7 @@ DLUSRC = \
 	dmemory.o dutil.o dmyblas2.o
 
 CLUSRC = \
-	scomplex.o scsum1.o icmax1.o \
-	cgssv.o cgssvx.o \
-	csp_blas2.o csp_blas3.o cgscon.o clacon.o \
+	scomplex.o cgssv.o cgssvx.o csp_blas2.o csp_blas3.o cgscon.o \
 	clangs.o cgsequ.o claqgs.o cpivotgrowth.o  \
 	cgsrfs.o cgstrf.o cgstrs.o ccopy_to_ucol.o \
 	csnode_dfs.o csnode_bmod.o \
@@ -72,9 +81,7 @@ CLUSRC = \
 	cmemory.o cutil.o cmyblas2.o
 
 ZLUSRC = \
-	dcomplex.o dzsum1.o izmax1.o \
-	zgssv.o zgssvx.o \
-	zsp_blas2.o zsp_blas3.o zgscon.o zlacon.o \
+	dcomplex.o zgssv.o zgssvx.o zsp_blas2.o zsp_blas3.o zgscon.o \
 	zlangs.o zgsequ.o zlaqgs.o zpivotgrowth.o  \
 	zgsrfs.o zgstrf.o zgstrs.o zcopy_to_ucol.o \
 	zsnode_dfs.o zsnode_bmod.o \
@@ -84,32 +91,37 @@ ZLUSRC = \
 
 all:    single double complex complex16
 
-single: $(SLUSRC) $(ALLAUX) $(SCLAUX)
-	$(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) $(SLUSRC) $(ALLAUX) $(SCLAUX)
+single: $(SLUSRC) $(ALLAUX) $(LAAUX) $(SLASRC) $(SCLAUX)
+	$(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) \
+		$(SLUSRC) $(ALLAUX) $(LAAUX) $(SLASRC) $(SCLAUX)
 	$(RANLIB) ../$(SUPERLULIB)
 
-double: $(DLUSRC) $(ALLAUX) $(DZLAUX)
-	$(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) $(DLUSRC) $(ALLAUX) $(DZLAUX)
+double: $(DLUSRC) $(ALLAUX) $(LAAUX) $(DLASRC) $(DZLAUX)
+	$(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) \
+                $(DLUSRC) $(ALLAUX) $(LAAUX) $(DLASRC) $(DZLAUX)
 	$(RANLIB) ../$(SUPERLULIB)
 
-complex: $(CLUSRC) $(ALLAUX) $(SCLAUX)
-	$(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) $(CLUSRC) $(ALLAUX) $(SCLAUX)
+complex: $(CLUSRC) $(ALLAUX) $(LAAUX) $(CLASRC) $(SCLAUX)
+	$(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) \
+		$(CLUSRC) $(ALLAUX) $(LAAUX) $(CLASRC) $(SCLAUX)
 	$(RANLIB) ../$(SUPERLULIB)
 
-complex16: $(ZLUSRC) $(ALLAUX) $(DZLAUX)
-	$(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) $(ZLUSRC) $(ALLAUX) $(DZLAUX)
+complex16: $(ZLUSRC) $(ALLAUX) $(LAAUX) $(ZLASRC) $(DZLAUX)
+	$(ARCH) $(ARCHFLAGS) ../$(SUPERLULIB) \
+		$(ZLUSRC) $(ALLAUX) $(LAAUX) $(ZLASRC) $(DZLAUX)
 	$(RANLIB) ../$(SUPERLULIB)
 
 
 ##################################
 # Do not optimize these routines #
 ##################################
-slamch.o: slamch.c ; $(CC) -c $(NOOPTS) $<
-dlamch.o: dlamch.c ; $(CC) -c $(NOOPTS) $<
+slamch.o: slamch.c ; $(CC) -c $(NOOPTS) $(CDEFS) $<
+dlamch.o: dlamch.c ; $(CC) -c $(NOOPTS) $(CDEFS) $<
 superlu_timer.o:  superlu_timer.c ; $(CC) -c $(NOOPTS) $<
+##################################
 
 .c.o:
 	$(CC) $(CFLAGS) $(CDEFS) $(BLASDEF) -c $< $(VERBOSE)
 
 clean:	
-	rm -f *.o ../superlu$(PLAT).a
+	rm -f *.o ../libsuperlu_3.0.a
diff --git a/SRC/ccolumn_bmod.c b/SRC/ccolumn_bmod.c
index 730f04a..72ae5be 100644
--- a/SRC/ccolumn_bmod.c
+++ b/SRC/ccolumn_bmod.c
@@ -21,7 +21,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 /* 
  * Function prototypes 
diff --git a/SRC/ccolumn_dfs.c b/SRC/ccolumn_dfs.c
index d2c65fb..10f0fb6 100644
--- a/SRC/ccolumn_dfs.c
+++ b/SRC/ccolumn_dfs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 /* What type of supernodes we want */
 #define T2_SUPER
diff --git a/SRC/ccopy_to_ucol.c b/SRC/ccopy_to_ucol.c
index 0c7a969..a0972fa 100644
--- a/SRC/ccopy_to_ucol.c
+++ b/SRC/ccopy_to_ucol.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
 
 int
 ccopy_to_ucol(
diff --git a/SRC/cgscon.c b/SRC/cgscon.c
index b3f5c99..ee8bb4b 100644
--- a/SRC/cgscon.c
+++ b/SRC/cgscon.c
@@ -11,7 +11,7 @@
  * History:     Modified from lapack routines CGECON.
  */
 #include <math.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 void
 cgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/cgsequ.c b/SRC/cgsequ.c
index 10420cb..77a4961 100644
--- a/SRC/cgsequ.c
+++ b/SRC/cgsequ.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
  * History:     Modified from LAPACK routine CGEEQU
  */
 #include <math.h>
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
 
 void
 cgsequ(SuperMatrix *A, float *r, float *c, float *rowcnd,
diff --git a/SRC/cgsrfs.c b/SRC/cgsrfs.c
index a2d2e89..68568cf 100644
--- a/SRC/cgsrfs.c
+++ b/SRC/cgsrfs.c
@@ -11,7 +11,7 @@
  * History:     Modified from lapack routine CGERFS
  */
 #include <math.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 void
 cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/cgssv.c b/SRC/cgssv.c
index ba745ce..d0ecf19 100644
--- a/SRC/cgssv.c
+++ b/SRC/cgssv.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -7,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 void
 cgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
diff --git a/SRC/cgssvx.c b/SRC/cgssvx.c
index c678d76..36e6fdb 100644
--- a/SRC/cgssvx.c
+++ b/SRC/cgssvx.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 void
 cgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
@@ -455,7 +455,7 @@ printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
 			       Astore->nzval, Astore->colind, Astore->rowptr,
 			       SLU_NC, A->Dtype, A->Mtype);
 	if ( notran ) { /* Reverse the transpose argument. */
-            trant = CONJ;
+	    trant = TRANS;
 	    notran = 0;
 	} else {
 	    trant = NOTRANS;
diff --git a/SRC/cgstrf.c b/SRC/cgstrf.c
index f64de34..65700ce 100644
--- a/SRC/cgstrf.c
+++ b/SRC/cgstrf.c
@@ -19,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 void
 cgstrf (superlu_options_t *options, SuperMatrix *A, float drop_tol,
@@ -182,8 +182,8 @@ cgstrf (superlu_options_t *options, SuperMatrix *A, float drop_tol,
  */
     /* Local working arrays */
     NCPformat *Astore;
-    int       *iperm_r; /* inverse of perm_r;
-			   used when options->Fact == SamePattern_SameRowPerm */
+    int       *iperm_r = NULL; /* inverse of perm_r; used when 
+                                  options->Fact == SamePattern_SameRowPerm */
     int       *iperm_c; /* inverse of perm_c */
     int       *iwork;
     complex    *cwork;
diff --git a/SRC/cgstrs.c b/SRC/cgstrs.c
index dd3b1a1..270a343 100644
--- a/SRC/cgstrs.c
+++ b/SRC/cgstrs.c
@@ -19,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 
 /* 
diff --git a/SRC/cgstrs.c.bak b/SRC/cgstrs.c.bak
deleted file mode 100644
index e609d3c..0000000
--- a/SRC/cgstrs.c.bak
+++ /dev/null
@@ -1,339 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "csp_defs.h"
-
-
-/* 
- * Function prototypes 
- */
-void cusolve(int, int, complex*, complex*);
-void clsolve(int, int, complex*, complex*);
-void cmatvec(int, int, int, complex*, complex*, complex*);
-
-
-void
-cgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
-        int *perm_c, int *perm_r, SuperMatrix *B,
-        SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * CGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- * CGSTRF.
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *
- * L       (input) SuperMatrix*
- *         The factor L from the factorization Pr*A*Pc=L*U as computed by
- *         cgstrf(). Use compressed row subscripts storage for supernodes,
- *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
- *
- * U       (input) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U as computed by
- *         cgstrf(). Use column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
- *
- * perm_c  (input) int*, dimension (L->ncol)
- *	   Column permutation vector, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *
- * perm_r  (input) int*, dimension (L->nrow)
- *         Row permutation vector, which defines the permutation matrix Pr; 
- *         perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation count.
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- * 	   = 0: successful exit
- *	   < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-#endif
-    int      incx = 1, incy = 1;
-#ifdef USE_VENDOR_BLAS
-    complex   alpha = {1.0, 0.0}, beta = {1.0, 0.0};
-    complex   *work_col;
-#endif
-    complex   temp_comp;
-    DNformat *Bstore;
-    complex   *Bmat;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    complex   *Lval, *Uval;
-    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
-    int      i, j, k, iptr, jcol, n, ldb, nrhs;
-    complex   *work, *rhs_work, *soln;
-    flops_t  solve_ops;
-    void cprint_soln();
-
-    /* Test input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    ldb = Bstore->lda;
-    nrhs = B->ncol;
-    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-	      L->Stype != SLU_SC || L->Dtype != SLU_C || L->Mtype != SLU_TRLU )
-	*info = -2;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-	      U->Stype != SLU_NC || U->Dtype != SLU_C || U->Mtype != SLU_TRU )
-	*info = -3;
-    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
-	      B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE )
-	*info = -6;
-    if ( *info ) {
-	i = -(*info);
-	xerbla_("cgstrs", &i);
-	return;
-    }
-
-    n = L->nrow;
-    work = complexCalloc(n * nrhs);
-    if ( !work ) ABORT("Malloc fails for local work[].");
-    soln = complexMalloc(n);
-    if ( !soln ) ABORT("Malloc fails for local soln[].");
-
-    Bmat = Bstore->nzval;
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    
-    if ( trans == NOTRANS ) {
-	/* Permute right hand sides to form Pr*B */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-	
-	/* Forward solve PLy=Pb. */
-	for (k = 0; k <= Lstore->nsuper; k++) {
-	    fsupc = L_FST_SUPC(k);
-	    istart = L_SUB_START(fsupc);
-	    nsupr = L_SUB_START(fsupc+1) - istart;
-	    nsupc = L_FST_SUPC(k+1) - fsupc;
-	    nrow = nsupr - nsupc;
-
-	    solve_ops += 4 * nsupc * (nsupc - 1) * nrhs;
-	    solve_ops += 8 * nrow * nsupc * nrhs;
-	    
-	    if ( nsupc == 1 ) {
-		for (j = 0; j < nrhs; j++) {
-		    rhs_work = &Bmat[j*ldb];
-	    	    luptr = L_NZ_START(fsupc);
-		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
-			irow = L_SUB(iptr);
-			++luptr;
-			cc_mult(&temp_comp, &rhs_work[fsupc], &Lval[luptr]);
-			c_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
-		    }
-		}
-	    } else {
-	    	luptr = L_NZ_START(fsupc);
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		ftcs1 = _cptofcd("L", strlen("L"));
-		ftcs2 = _cptofcd("N", strlen("N"));
-		ftcs3 = _cptofcd("U", strlen("U"));
-		CTRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-		
-		CGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
-			&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-			&beta, &work[0], &n );
-#else
-		ctrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-		
-		cgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
-			&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-			&beta, &work[0], &n );
-#endif
-		for (j = 0; j < nrhs; j++) {
-		    rhs_work = &Bmat[j*ldb];
-		    work_col = &work[j*n];
-		    iptr = istart + nsupc;
-		    for (i = 0; i < nrow; i++) {
-			irow = L_SUB(iptr);
-			c_sub(&rhs_work[irow], &rhs_work[irow], &work_col[i]);
-			work_col[i].r = 0.0;
-	                work_col[i].i = 0.0;
-			iptr++;
-		    }
-		}
-#else		
-		for (j = 0; j < nrhs; j++) {
-		    rhs_work = &Bmat[j*ldb];
-		    clsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
-		    cmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
-			    &rhs_work[fsupc], &work[0] );
-
-		    iptr = istart + nsupc;
-		    for (i = 0; i < nrow; i++) {
-			irow = L_SUB(iptr);
-			c_sub(&rhs_work[irow], &rhs_work[irow], &work[i]);
-			work[i].r = 0.;
-	                work[i].i = 0.;
-			iptr++;
-		    }
-		}
-#endif		    
-	    } /* else ... */
-	} /* for L-solve */
-
-#ifdef DEBUG
-  	printf("After L-solve: y=\n");
-	cprint_soln(n, nrhs, Bmat);
-#endif
-
-	/*
-	 * Back solve Ux=y.
-	 */
-	for (k = Lstore->nsuper; k >= 0; k--) {
-	    fsupc = L_FST_SUPC(k);
-	    istart = L_SUB_START(fsupc);
-	    nsupr = L_SUB_START(fsupc+1) - istart;
-	    nsupc = L_FST_SUPC(k+1) - fsupc;
-	    luptr = L_NZ_START(fsupc);
-
-	    solve_ops += 4 * nsupc * (nsupc + 1) * nrhs;
-
-	    if ( nsupc == 1 ) {
-		rhs_work = &Bmat[0];
-		for (j = 0; j < nrhs; j++) {
-		    c_div(&rhs_work[fsupc], &rhs_work[fsupc], &Lval[luptr]);
-		    rhs_work += ldb;
-		}
-	    } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		ftcs1 = _cptofcd("L", strlen("L"));
-		ftcs2 = _cptofcd("U", strlen("U"));
-		ftcs3 = _cptofcd("N", strlen("N"));
-		CTRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#else
-		ctrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#endif
-#else		
-		for (j = 0; j < nrhs; j++)
-		    cusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-#endif		
-	    }
-
-	    for (j = 0; j < nrhs; ++j) {
-		rhs_work = &Bmat[j*ldb];
-		for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
-		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
-			irow = U_SUB(i);
-			cc_mult(&temp_comp, &rhs_work[jcol], &Uval[i]);
-			c_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
-		    }
-		}
-	    }
-	    
-	} /* for U-solve */
-
-#ifdef DEBUG
-  	printf("After U-solve: x=\n");
-	cprint_soln(n, nrhs, Bmat);
-#endif
-
-	/* Compute the final solution X := Pc*X. */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-	
-        stat->ops[SOLVE] = solve_ops;
-
-    } else { /* Solve A'*X=B */
-	/* Permute right hand sides to form Pc'*B. */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-
-	stat->ops[SOLVE] = 0;
-	
-	for (k = 0; k < nrhs; ++k) {
-	    
-	    /* Multiply by inv(U'). */
-	    sp_ctrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-	    
-	    /* Multiply by inv(L'). */
-	    sp_ctrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-	    
-	}
-	
-	/* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-
-    }
-
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(soln);
-}
-
-/*
- * Diagnostic print of the solution vector 
- */
-void
-cprint_soln(int n, int nrhs, complex *soln)
-{
-    int i;
-
-    for (i = 0; i < n; i++) 
-  	printf("\t%d: %.4f\n", i, soln[i]);
-}
diff --git a/SRC/clacon.c b/SRC/clacon.c
index ada4b61..704f1bf 100644
--- a/SRC/clacon.c
+++ b/SRC/clacon.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,8 @@
  *
  */
 #include <math.h>
-#include "Cnames.h"
-#include "scomplex.h"
+#include "slu_Cnames.h"
+#include "slu_scomplex.h"
 
 int
 clacon_(int *n, complex *v, complex *x, float *est, int *kase)
diff --git a/SRC/clangs.c b/SRC/clangs.c
index 612bf52..de7f91f 100644
--- a/SRC/clangs.c
+++ b/SRC/clangs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
  * History:     Modified from lapack routine CLANGE
  */
 #include <math.h>
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
 
 float clangs(char *norm, SuperMatrix *A)
 {
diff --git a/SRC/claqgs.c b/SRC/claqgs.c
index 9347a03..377b501 100644
--- a/SRC/claqgs.c
+++ b/SRC/claqgs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
  * History:     Modified from LAPACK routine CLAQGE
  */
 #include <math.h>
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
 
 void
 claqgs(SuperMatrix *A, float *r, float *c, 
diff --git a/SRC/cmemory.c b/SRC/cmemory.c
index 04185e7..d50f58d 100644
--- a/SRC/cmemory.c
+++ b/SRC/cmemory.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 /* Constants */
 #define NO_MEMTYPE  4      /* 0: lusup;
@@ -193,9 +193,10 @@ cLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
 	    cSetupSpace(work, lwork, &Glu->MemModel);
 	}
 	
-#ifdef DEBUG		   
-	printf("cLUMemInit() called: annz %d, MemModel %d\n", 
-		annz, Glu->MemModel);
+#if ( PRNTlevel >= 1 )
+	printf("cLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n", 
+	       FILL, nzlmax, nzumax);
+	fflush(stdout);
 #endif	
 	
 	/* Integer pointers for L\U factors */
@@ -234,6 +235,11 @@ cLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
 		printf("Not enough memory to perform factorization.\n");
 		return (cmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
 	    }
+#if ( PRNTlevel >= 1)
+	    printf("cLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", 
+		   nzlmax, nzumax);
+	    fflush(stdout);
+#endif
 	    lusup = (complex *) cexpand( &nzlumax, LUSUP, 0, 0, Glu );
 	    ucol  = (complex *) cexpand( &nzumax, UCOL, 0, 0, Glu );
 	    lsub  = (int *)    cexpand( &nzlmax, LSUB, 0, 0, Glu );
@@ -476,8 +482,7 @@ void
     else lword = sizeof(complex);
 
     if ( Glu->MemModel == SYSTEM ) {
-	new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
-/*	new_mem = (void *) calloc(new_len, lword); */
+	new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
 	if ( no_expand != 0 ) {
 	    tries = 0;
 	    if ( keep_prev ) {
@@ -487,8 +492,7 @@ void
 		    if ( ++tries > 10 ) return (NULL);
 		    alpha = Reduce(alpha);
 		    new_len = alpha * *prev_len;
-		    new_mem = (void *) SUPERLU_MALLOC(new_len * lword); 
-/*		    new_mem = (void *) calloc(new_len, lword); */
+		    new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
 		}
 	    }
 	    if ( type == LSUB || type == USUB ) {
@@ -641,7 +645,7 @@ callocateA(int n, int nnz, complex **a, int **asub, int **xa)
 complex *complexMalloc(int n)
 {
     complex *buf;
-    buf = (complex *) SUPERLU_MALLOC(n * sizeof(complex)); 
+    buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex)); 
     if ( !buf ) {
 	ABORT("SUPERLU_MALLOC failed for buf in complexMalloc()\n");
     }
@@ -653,7 +657,7 @@ complex *complexCalloc(int n)
     complex *buf;
     register int i;
     complex zero = {0.0, 0.0};
-    buf = (complex *) SUPERLU_MALLOC(n * sizeof(complex));
+    buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex));
     if ( !buf ) {
 	ABORT("SUPERLU_MALLOC failed for buf in complexCalloc()\n");
     }
diff --git a/SRC/cmyblas2.c b/SRC/cmyblas2.c
index 74fdbca..5998f87 100644
--- a/SRC/cmyblas2.c
+++ b/SRC/cmyblas2.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -14,7 +13,7 @@
  * Note:
  *     This is only used when the system lacks an efficient BLAS library.
  */
-#include "scomplex.h"
+#include "slu_scomplex.h"
 
 /*
  * Solves a dense UNIT lower triangular system. The unit lower 
diff --git a/SRC/colamd.c b/SRC/colamd.c
index b60718f..dc531f0 100644
--- a/SRC/colamd.c
+++ b/SRC/colamd.c
@@ -1,9 +1,15 @@
 /* ========================================================================== */
-/* === colamd - a sparse matrix column ordering algorithm =================== */
+/* === colamd/symamd - a sparse matrix column ordering algorithm ============ */
 /* ========================================================================== */
 
 /*
-    colamd:  An approximate minimum degree column ordering algorithm.
+    colamd:  an approximate minimum degree column ordering algorithm,
+    	for LU factorization of symmetric or unsymmetric matrices,
+	QR factorization, least squares, interior point methods for
+	linear programming problems, and other related problems.
+
+    symamd:  an approximate minimum degree ordering algorithm for Cholesky
+    	factorization of symmetric matrices.
 
     Purpose:
 
@@ -14,12 +20,16 @@
 	factorization, and P is computed during numerical factorization via
 	conventional partial pivoting with row interchanges.  Colamd is the
 	column ordering method used in SuperLU, part of the ScaLAPACK library.
-	It is also available as user-contributed software for Matlab 5.2,
+	It is also available as built-in function in MATLAB Version 6,
 	available from MathWorks, Inc. (http://www.mathworks.com).  This
-	routine can be used in place of COLMMD in Matlab.  By default, the \
-	and / operators in Matlab perform a column ordering (using COLMMD)
-	prior to LU factorization using sparse partial pivoting, in the
-	built-in Matlab LU(A) routine.
+	routine can be used in place of colmmd in MATLAB.
+
+    	Symamd computes a permutation P of a symmetric matrix A such that the
+	Cholesky factorization of PAP' has less fill-in and requires fewer
+	floating point operations than A.  Symamd constructs a matrix M such
+	that M'M has the same nonzero pattern of A, and then orders the columns
+	of M using colmmd.  The column ordering of M is then returned as the
+	row and column ordering P of A. 
 
     Authors:
 
@@ -30,45 +40,39 @@
 
     Date:
 
-	August 3, 1998.  Version 1.0.
+	September 8, 2003.  Version 2.3.
 
     Acknowledgements:
 
 	This work was supported by the National Science Foundation, under
 	grants DMS-9504974 and DMS-9803599.
 
-    Notice:
+    Copyright and License:
 
-	Copyright (c) 1998 by the University of Florida.  All Rights Reserved.
+	Copyright (c) 1998-2003 by the University of Florida.
+	All Rights Reserved.
 
 	THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
 	EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 
-	Permission is hereby granted to use or copy this program for any
-	purpose, provided the above notices are retained on all copies.
-	User documentation of any code that uses this code must cite the
-	Authors, the Copyright, and "Used by permission."  If this code is
-	accessible from within Matlab, then typing "help colamd" or "colamd"
-	(with no arguments) must cite the Authors.  Permission to modify the
-	code and to distribute modified code is granted, provided the above
-	notices are retained, and a notice that the code was modified is
-	included with the above copyright notice.  You must also retain the
-	Availability information below, of the original version.
-
-	This software is provided free of charge.
+	Permission is hereby granted to use, copy, modify, and/or distribute
+	this program, provided that the Copyright, this License, and the
+	Availability of the original version is retained on all copies and made
+	accessible to the end-user of any code or package that includes COLAMD
+	or any modified version of COLAMD. 
 
     Availability:
 
-	This file is located at
+	The colamd/symamd library is available at
 
-		http://www.cise.ufl.edu/~davis/colamd/colamd.c
+	    http://www.cise.ufl.edu/research/sparse/colamd/
+
+	This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.c
+	file.  It requires the colamd.h file.  It is required by the colamdmex.c
+	and symamdmex.c files, for the MATLAB interface to colamd and symamd.
+
+    See the ChangeLog file for changes since Version 1.0.
 
-	The colamd.h file is required, located in the same directory.
-	The colamdmex.c file provides a Matlab interface for colamd.
-	The symamdmex.c file provides a Matlab interface for symamd, which is
-	a symmetric ordering based on this code, colamd.c.  All codes are
-	purely ANSI C compliant (they use no Unix-specific routines, include
-	files, etc.).
 */
 
 /* ========================================================================== */
@@ -76,66 +80,86 @@
 /* ========================================================================== */
 
 /*
-    Each user-callable routine (declared as PUBLIC) is briefly described below.
-    Refer to the comments preceding each routine for more details.
-
     ----------------------------------------------------------------------------
     colamd_recommended:
     ----------------------------------------------------------------------------
 
-	Usage:
+	C syntax:
 
-	    Alen = colamd_recommended (nnz, n_row, n_col) ;
+	    #include "colamd.h"
+	    int colamd_recommended (int nnz, int n_row, int n_col) ;
+
+	    or as a C macro
+
+	    #include "colamd.h"
+	    Alen = COLAMD_RECOMMENDED (int nnz, int n_row, int n_col) ;
 
 	Purpose:
 
 	    Returns recommended value of Alen for use by colamd.  Returns -1
-	    if any input argument is negative.
+	    if any input argument is negative.  The use of this routine
+	    or macro is optional.  Note that the macro uses its arguments
+	    more than once, so be careful for side effects, if you pass
+	    expressions as arguments to COLAMD_RECOMMENDED.  Not needed for
+	    symamd, which dynamically allocates its own memory.
 
-	Arguments:
+	Arguments (all input arguments):
 
 	    int nnz ;		Number of nonzeros in the matrix A.  This must
 				be the same value as p [n_col] in the call to
 				colamd - otherwise you will get a wrong value
 				of the recommended memory to use.
+
 	    int n_row ;		Number of rows in the matrix A.
+
 	    int n_col ;		Number of columns in the matrix A.
 
     ----------------------------------------------------------------------------
     colamd_set_defaults:
     ----------------------------------------------------------------------------
 
-	Usage:
+	C syntax:
 
-	    colamd_set_defaults (knobs) ;
+	    #include "colamd.h"
+	    colamd_set_defaults (double knobs [COLAMD_KNOBS]) ;
 
 	Purpose:
 
-	    Sets the default parameters.
+	    Sets the default parameters.  The use of this routine is optional.
 
 	Arguments:
 
 	    double knobs [COLAMD_KNOBS] ;	Output only.
 
-		Rows with more than (knobs [COLAMD_DENSE_ROW] * n_col) entries
-		are removed prior to ordering.  Columns with more than
-		(knobs [COLAMD_DENSE_COL] * n_row) entries are removed
-		prior to ordering, and placed last in the output column
-		ordering.  Default values of these two knobs are both 0.5.
-		Currently, only knobs [0] and knobs [1] are used, but future
-		versions may use more knobs.  If so, they will be properly set
-		to their defaults by the future version of colamd_set_defaults,
-		so that the code that calls colamd will not need to change,
-		assuming that you either use colamd_set_defaults, or pass a
-		(double *) NULL pointer as the knobs array to colamd.
+		Colamd: rows with more than (knobs [COLAMD_DENSE_ROW] * n_col)
+		entries are removed prior to ordering.  Columns with more than
+		(knobs [COLAMD_DENSE_COL] * n_row) entries are removed prior to
+		ordering, and placed last in the output column ordering. 
+
+		Symamd: uses only knobs [COLAMD_DENSE_ROW], which is knobs [0].
+		Rows and columns with more than (knobs [COLAMD_DENSE_ROW] * n)
+		entries are removed prior to ordering, and placed last in the
+		output ordering.
+
+		COLAMD_DENSE_ROW and COLAMD_DENSE_COL are defined as 0 and 1,
+		respectively, in colamd.h.  Default values of these two knobs
+		are both 0.5.  Currently, only knobs [0] and knobs [1] are
+		used, but future versions may use more knobs.  If so, they will
+		be properly set to their defaults by the future version of
+		colamd_set_defaults, so that the code that calls colamd will
+		not need to change, assuming that you either use
+		colamd_set_defaults, or pass a (double *) NULL pointer as the
+		knobs array to colamd or symamd.
 
     ----------------------------------------------------------------------------
     colamd:
     ----------------------------------------------------------------------------
 
-	Usage:
+	C syntax:
 
-	    colamd (n_row, n_col, Alen, A, p, knobs) ;
+	    #include "colamd.h"
+	    int colamd (int n_row, int n_col, int Alen, int *A, int *p,
+	    	double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS]) ;
 
 	Purpose:
 
@@ -143,34 +167,44 @@
 	    (AQ)'AQ=LL' have less fill-in and require fewer floating point
 	    operations than factorizing the unpermuted matrix A or A'A,
 	    respectively.
+	    
+	Returns:
+
+	    TRUE (1) if successful, FALSE (0) otherwise.
 
 	Arguments:
 
-	    int n_row ;
+	    int n_row ;		Input argument.
 
 		Number of rows in the matrix A.
 		Restriction:  n_row >= 0.
 		Colamd returns FALSE if n_row is negative.
 
-	    int n_col ;
+	    int n_col ;		Input argument.
 
 		Number of columns in the matrix A.
 		Restriction:  n_col >= 0.
 		Colamd returns FALSE if n_col is negative.
 
-	    int Alen ;
+	    int Alen ;		Input argument.
 
 		Restriction (see note):
-		Alen >= 2*nnz + 6*(n_col+1) + 4*(n_row+1) + n_col + COLAMD_STATS
+		Alen >= 2*nnz + 6*(n_col+1) + 4*(n_row+1) + n_col
 		Colamd returns FALSE if these conditions are not met.
 
 		Note:  this restriction makes an modest assumption regarding
-		the size of the two typedef'd structures, below.  We do,
-		however, guarantee that
-		Alen >= colamd_recommended (nnz, n_row, n_col)
+		the size of the two typedef's structures in colamd.h.
+		We do, however, guarantee that
+
+			Alen >= colamd_recommended (nnz, n_row, n_col)
+		
+		or equivalently as a C preprocessor macro: 
+
+			Alen >= COLAMD_RECOMMENDED (nnz, n_row, n_col)
+
 		will be sufficient.
 
-	    int A [Alen] ;	Input argument, stats on output.
+	    int A [Alen] ;	Input argument, undefined on output.
 
 		A is an integer array of size Alen.  Alen must be at least as
 		large as the bare minimum value given above, but this is very
@@ -191,21 +225,8 @@
 		n_row-1, and columns are in the range 0 to n_col-1.  Colamd
 		returns FALSE if any row index is out of range.
 
-		The contents of A are modified during ordering, and are thus
-		undefined on output with the exception of a few statistics
-		about the ordering (A [0..COLAMD_STATS-1]):
-		A [0]:  number of dense or empty rows ignored.
-		A [1]:  number of dense or empty columns ignored (and ordered
-			last in the output permutation p)
-		A [2]:  number of garbage collections performed.
-		A [3]:  0, if all row indices in each column were in sorted
-			  order, and no duplicates were present.
-			1, otherwise (in which case colamd had to do more work)
-		Note that a row can become "empty" if it contains only
-		"dense" and/or "empty" columns, and similarly a column can
-		become "empty" if it only contains "dense" and/or "empty" rows.
-		Future versions may return more statistics in A, but the usage
-		of these 4 entries in A will remain unchanged.
+		The contents of A are modified during ordering, and are
+		undefined on output.
 
 	    int p [n_col+1] ;	Both input and output argument.
 
@@ -227,25 +248,334 @@
 		If colamd returns FALSE, then no permutation is returned, and
 		p is undefined on output.
 
-	    double knobs [COLAMD_KNOBS] ;	Input only.
+	    double knobs [COLAMD_KNOBS] ;	Input argument.
 
-		See colamd_set_defaults for a description.  If the knobs array
-		is not present (that is, if a (double *) NULL pointer is passed
-		in its place), then the default values of the parameters are
-		used instead.
+		See colamd_set_defaults for a description.
 
-*/
+	    int stats [COLAMD_STATS] ;		Output argument.
 
+		Statistics on the ordering, and error status.
+		See colamd.h for related definitions.
+		Colamd returns FALSE if stats is not present.
 
-/* ========================================================================== */
-/* === Include files ======================================================== */
-/* ========================================================================== */
+		stats [0]:  number of dense or empty rows ignored.
 
-/* limits.h:  the largest positive integer (INT_MAX) */
-#include <limits.h>
+		stats [1]:  number of dense or empty columns ignored (and
+				ordered last in the output permutation p)
+				Note that a row can become "empty" if it
+				contains only "dense" and/or "empty" columns,
+				and similarly a column can become "empty" if it
+				only contains "dense" and/or "empty" rows.
 
-/* colamd.h:  knob array size, stats output size, and global prototypes */
-#include "colamd.h"
+		stats [2]:  number of garbage collections performed.
+				This can be excessively high if Alen is close
+				to the minimum required value.
+
+		stats [3]:  status code.  < 0 is an error code.
+			    > 1 is a warning or notice.
+
+			0	OK.  Each column of the input matrix contained
+				row indices in increasing order, with no
+				duplicates.
+
+			1	OK, but columns of input matrix were jumbled
+				(unsorted columns or duplicate entries).  Colamd
+				had to do some extra work to sort the matrix
+				first and remove duplicate entries, but it
+				still was able to return a valid permutation
+				(return value of colamd was TRUE).
+
+					stats [4]: highest numbered column that
+						is unsorted or has duplicate
+						entries.
+					stats [5]: last seen duplicate or
+						unsorted row index.
+					stats [6]: number of duplicate or
+						unsorted row indices.
+
+			-1	A is a null pointer
+
+			-2	p is a null pointer
+
+			-3 	n_row is negative
+
+					stats [4]: n_row
+
+			-4	n_col is negative
+
+					stats [4]: n_col
+
+			-5	number of nonzeros in matrix is negative
+
+					stats [4]: number of nonzeros, p [n_col]
+
+			-6	p [0] is nonzero
+
+					stats [4]: p [0]
+
+			-7	A is too small
+
+					stats [4]: required size
+					stats [5]: actual size (Alen)
+
+			-8	a column has a negative number of entries
+
+					stats [4]: column with < 0 entries
+					stats [5]: number of entries in col
+
+			-9	a row index is out of bounds
+
+					stats [4]: column with bad row index
+					stats [5]: bad row index
+					stats [6]: n_row, # of rows of matrx
+
+			-10	(unused; see symamd.c)
+
+			-999	(unused; see symamd.c)
+
+		Future versions may return more statistics in the stats array.
+
+	Example:
+	
+	    See http://www.cise.ufl.edu/research/sparse/colamd/example.c
+	    for a complete example.
+
+	    To order the columns of a 5-by-4 matrix with 11 nonzero entries in
+	    the following nonzero pattern
+
+	    	x 0 x 0
+		x 0 x x
+		0 x x 0
+		0 0 x x
+		x x 0 0
+
+	    with default knobs and no output statistics, do the following:
+
+		#include "colamd.h"
+		#define ALEN COLAMD_RECOMMENDED (11, 5, 4)
+		int A [ALEN] = {1, 2, 5, 3, 5, 1, 2, 3, 4, 2, 4} ;
+		int p [ ] = {0, 3, 5, 9, 11} ;
+		int stats [COLAMD_STATS] ;
+		colamd (5, 4, ALEN, A, p, (double *) NULL, stats) ;
+
+	    The permutation is returned in the array p, and A is destroyed.
+
+    ----------------------------------------------------------------------------
+    symamd:
+    ----------------------------------------------------------------------------
+
+	C syntax:
+
+	    #include "colamd.h"
+	    int symamd (int n, int *A, int *p, int *perm,
+	    	double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS],
+		void (*allocate) (size_t, size_t), void (*release) (void *)) ;
+
+	Purpose:
+
+    	    The symamd routine computes an ordering P of a symmetric sparse
+	    matrix A such that the Cholesky factorization PAP' = LL' remains
+	    sparse.  It is based on a column ordering of a matrix M constructed
+	    so that the nonzero pattern of M'M is the same as A.  The matrix A
+	    is assumed to be symmetric; only the strictly lower triangular part
+	    is accessed.  You must pass your selected memory allocator (usually
+	    calloc/free or mxCalloc/mxFree) to symamd, for it to allocate
+	    memory for the temporary matrix M.
+
+	Returns:
+
+	    TRUE (1) if successful, FALSE (0) otherwise.
+
+	Arguments:
+
+	    int n ;		Input argument.
+
+	    	Number of rows and columns in the symmetrix matrix A.
+		Restriction:  n >= 0.
+		Symamd returns FALSE if n is negative.
+
+	    int A [nnz] ;	Input argument.
+
+	    	A is an integer array of size nnz, where nnz = p [n].
+		
+		The row indices of the entries in column c of the matrix are
+		held in A [(p [c]) ... (p [c+1]-1)].  The row indices in a
+		given column c need not be in ascending order, and duplicate
+		row indices may be present.  However, symamd will run faster
+		if the columns are in sorted order with no duplicate entries. 
+
+		The matrix is 0-based.  That is, rows are in the range 0 to
+		n-1, and columns are in the range 0 to n-1.  Symamd
+		returns FALSE if any row index is out of range.
+
+		The contents of A are not modified.
+
+	    int p [n+1] ;   	Input argument.
+
+		p is an integer array of size n+1.  On input, it holds the
+		"pointers" for the column form of the matrix A.  Column c of
+		the matrix A is held in A [(p [c]) ... (p [c+1]-1)].  The first
+		entry, p [0], must be zero, and p [c] <= p [c+1] must hold
+		for all c in the range 0 to n-1.  The value p [n] is
+		thus the total number of entries in the pattern of the matrix A.
+		Symamd returns FALSE if these conditions are not met.
+
+		The contents of p are not modified.
+
+	    int perm [n+1] ;   	Output argument.
+
+		On output, if symamd returns TRUE, the array perm holds the
+		permutation P, where perm [0] is the first index in the new
+		ordering, and perm [n-1] is the last.  That is, perm [k] = j
+		means that row and column j of A is the kth column in PAP',
+		where k is in the range 0 to n-1 (perm [0] = j means
+		that row and column j of A are the first row and column in
+		PAP').  The array is used as a workspace during the ordering,
+		which is why it must be of length n+1, not just n.
+
+	    double knobs [COLAMD_KNOBS] ;	Input argument.
+
+		See colamd_set_defaults for a description.
+
+	    int stats [COLAMD_STATS] ;		Output argument.
+
+		Statistics on the ordering, and error status.
+		See colamd.h for related definitions.
+		Symamd returns FALSE if stats is not present.
+
+		stats [0]:  number of dense or empty row and columns ignored
+				(and ordered last in the output permutation 
+				perm).  Note that a row/column can become
+				"empty" if it contains only "dense" and/or
+				"empty" columns/rows.
+
+		stats [1]:  (same as stats [0])
+
+		stats [2]:  number of garbage collections performed.
+
+		stats [3]:  status code.  < 0 is an error code.
+			    > 1 is a warning or notice.
+
+			0	OK.  Each column of the input matrix contained
+				row indices in increasing order, with no
+				duplicates.
+
+			1	OK, but columns of input matrix were jumbled
+				(unsorted columns or duplicate entries).  Symamd
+				had to do some extra work to sort the matrix
+				first and remove duplicate entries, but it
+				still was able to return a valid permutation
+				(return value of symamd was TRUE).
+
+					stats [4]: highest numbered column that
+						is unsorted or has duplicate
+						entries.
+					stats [5]: last seen duplicate or
+						unsorted row index.
+					stats [6]: number of duplicate or
+						unsorted row indices.
+
+			-1	A is a null pointer
+
+			-2	p is a null pointer
+
+			-3	(unused, see colamd.c)
+
+			-4 	n is negative
+
+					stats [4]: n
+
+			-5	number of nonzeros in matrix is negative
+
+					stats [4]: # of nonzeros (p [n]).
+
+			-6	p [0] is nonzero
+
+					stats [4]: p [0]
+
+			-7	(unused)
+
+			-8	a column has a negative number of entries
+
+					stats [4]: column with < 0 entries
+					stats [5]: number of entries in col
+
+			-9	a row index is out of bounds
+
+					stats [4]: column with bad row index
+					stats [5]: bad row index
+					stats [6]: n_row, # of rows of matrx
+
+			-10	out of memory (unable to allocate temporary
+				workspace for M or count arrays using the
+				"allocate" routine passed into symamd).
+
+			-999	internal error.  colamd failed to order the
+				matrix M, when it should have succeeded.  This
+				indicates a bug.  If this (and *only* this)
+				error code occurs, please contact the authors.
+				Don't contact the authors if you get any other
+				error code.
+
+		Future versions may return more statistics in the stats array.
+
+	    void * (*allocate) (size_t, size_t)
+
+	    	A pointer to a function providing memory allocation.  The
+		allocated memory must be returned initialized to zero.  For a
+		C application, this argument should normally be a pointer to
+		calloc.  For a MATLAB mexFunction, the routine mxCalloc is
+		passed instead.
+
+	    void (*release) (size_t, size_t)
+
+	    	A pointer to a function that frees memory allocated by the
+		memory allocation routine above.  For a C application, this
+		argument should normally be a pointer to free.  For a MATLAB
+		mexFunction, the routine mxFree is passed instead.
+
+
+    ----------------------------------------------------------------------------
+    colamd_report:
+    ----------------------------------------------------------------------------
+
+	C syntax:
+
+	    #include "colamd.h"
+	    colamd_report (int stats [COLAMD_STATS]) ;
+
+	Purpose:
+
+	    Prints the error status and statistics recorded in the stats
+	    array on the standard error output (for a standard C routine)
+	    or on the MATLAB output (for a mexFunction).
+
+	Arguments:
+
+	    int stats [COLAMD_STATS] ;	Input only.  Statistics from colamd.
+
+
+    ----------------------------------------------------------------------------
+    symamd_report:
+    ----------------------------------------------------------------------------
+
+	C syntax:
+
+	    #include "colamd.h"
+	    symamd_report (int stats [COLAMD_STATS]) ;
+
+	Purpose:
+
+	    Prints the error status and statistics recorded in the stats
+	    array on the standard error output (for a standard C routine)
+	    or on the MATLAB output (for a mexFunction).
+
+	Arguments:
+
+	    int stats [COLAMD_STATS] ;	Input only.  Statistics from symamd.
+
+
+*/
 
 /* ========================================================================== */
 /* === Scaffolding code definitions  ======================================== */
@@ -254,10 +584,7 @@
 /* Ensure that debugging is turned off: */
 #ifndef NDEBUG
 #define NDEBUG
-#endif
-
-/* assert.h:  the assert macro (no debugging if NDEBUG is defined) */
-#include <assert.h>
+#endif /* NDEBUG */
 
 /*
    Our "scaffolding code" philosophy:  In our opinion, well-written library
@@ -276,77 +603,62 @@
    (3) (gasp!) for actually finding bugs.  This code has been heavily tested
 	and "should" be fully functional and bug-free ... but you never know...
 
-    To enable debugging, comment out the "#define NDEBUG" above.  The code will
-    become outrageously slow when debugging is enabled.  To control the level of
-    debugging output, set an environment variable D to 0 (little), 1 (some),
-    2, 3, or 4 (lots).
+    To enable debugging, comment out the "#define NDEBUG" above.  For a MATLAB
+    mexFunction, you will also need to modify mexopts.sh to remove the -DNDEBUG
+    definition.  The code will become outrageously slow when debugging is
+    enabled.  To control the level of debugging output, set an environment
+    variable D to 0 (little), 1 (some), 2, 3, or 4 (lots).  When debugging,
+    you should see the following message on the standard output:
+
+    	colamd: debug version, D = 1 (THIS WILL BE SLOW!)
+
+    or a similar message for symamd.  If you don't, then debugging has not
+    been enabled.
+
 */
 
 /* ========================================================================== */
-/* === Row and Column structures ============================================ */
+/* === Include files ======================================================== */
 /* ========================================================================== */
 
-typedef struct ColInfo_struct
-{
-    int start ;		/* index for A of first row in this column, or DEAD */
-			/* if column is dead */
-    int length ;	/* number of rows in this column */
-    union
-    {
-	int thickness ;	/* number of original columns represented by this */
-			/* col, if the column is alive */
-	int parent ;	/* parent in parent tree super-column structure, if */
-			/* the column is dead */
-    } shared1 ;
-    union
-    {
-	int score ;	/* the score used to maintain heap, if col is alive */
-	int order ;	/* pivot ordering of this column, if col is dead */
-    } shared2 ;
-    union
-    {
-	int headhash ;	/* head of a hash bucket, if col is at the head of */
-			/* a degree list */
-	int hash ;	/* hash value, if col is not in a degree list */
-	int prev ;	/* previous column in degree list, if col is in a */
-			/* degree list (but not at the head of a degree list) */
-    } shared3 ;
-    union
-    {
-	int degree_next ;	/* next column, if col is in a degree list */
-	int hash_next ;		/* next column, if col is in a hash list */
-    } shared4 ;
-
-} ColInfo ;
-
-typedef struct RowInfo_struct
-{
-    int start ;		/* index for A of first col in this row */
-    int length ;	/* number of principal columns in this row */
-    union
-    {
-	int degree ;	/* number of principal & non-principal columns in row */
-	int p ;		/* used as a row pointer in init_rows_cols () */
-    } shared1 ;
-    union
-    {
-	int mark ;	/* for computing set differences and marking dead rows*/
-	int first_column ;/* first column in row (used in garbage collection) */
-    } shared2 ;
+#include "colamd.h"
+#include <limits.h>
 
-} RowInfo ;
+#ifdef MATLAB_MEX_FILE
+#include "mex.h"
+#include "matrix.h"
+#else
+#include <stdio.h>
+#include <assert.h>
+#endif /* MATLAB_MEX_FILE */
 
 /* ========================================================================== */
 /* === Definitions ========================================================== */
 /* ========================================================================== */
 
+/* Routines are either PUBLIC (user-callable) or PRIVATE (not user-callable) */
+#define PUBLIC
+#define PRIVATE static
+
 #define MAX(a,b) (((a) > (b)) ? (a) : (b))
 #define MIN(a,b) (((a) < (b)) ? (a) : (b))
 
 #define ONES_COMPLEMENT(r) (-(r)-1)
 
-#define TRUE	(1)
-#define FALSE	(0)
+/* -------------------------------------------------------------------------- */
+/* Change for version 2.1:  define TRUE and FALSE only if not yet defined */  
+/* -------------------------------------------------------------------------- */
+
+#ifndef TRUE
+#define TRUE (1)
+#endif
+
+#ifndef FALSE
+#define FALSE (0)
+#endif
+
+/* -------------------------------------------------------------------------- */
+
 #define EMPTY	(-1)
 
 /* Row and column status */
@@ -368,9 +680,29 @@ typedef struct RowInfo_struct
 #define KILL_PRINCIPAL_COL(c)		{ Col [c].start = DEAD_PRINCIPAL ; }
 #define KILL_NON_PRINCIPAL_COL(c)	{ Col [c].start = DEAD_NON_PRINCIPAL ; }
 
-/* Routines are either PUBLIC (user-callable) or PRIVATE (not user-callable) */
-#define PUBLIC
-#define PRIVATE static
+/* ========================================================================== */
+/* === Colamd reporting mechanism =========================================== */
+/* ========================================================================== */
+
+#ifdef MATLAB_MEX_FILE
+
+/* use mexPrintf in a MATLAB mexFunction, for debugging and statistics output */
+#define PRINTF mexPrintf
+
+/* In MATLAB, matrices are 1-based to the user, but 0-based internally */
+#define INDEX(i) ((i)+1)
+
+#else
+
+/* Use printf in standard C environment, for debugging and statistics output. */
+/* Output is generated only if debugging is enabled at compile time, or if */
+/* the caller explicitly calls colamd_report or symamd_report. */
+#define PRINTF printf
+
+/* In C, matrices are 0-based and indices are reported as such in *_report */
+#define INDEX(i) (i)
+
+#endif /* MATLAB_MEX_FILE */
 
 /* ========================================================================== */
 /* === Prototypes of PRIVATE routines ======================================= */
@@ -380,18 +712,19 @@ PRIVATE int init_rows_cols
 (
     int n_row,
     int n_col,
-    RowInfo Row [],
-    ColInfo Col [],
+    Colamd_Row Row [],
+    Colamd_Col Col [],
     int A [],
-    int p []
+    int p [],
+    int stats [COLAMD_STATS]
 ) ;
 
 PRIVATE void init_scoring
 (
     int n_row,
     int n_col,
-    RowInfo Row [],
-    ColInfo Col [],
+    Colamd_Row Row [],
+    Colamd_Col Col [],
     int A [],
     int head [],
     double knobs [COLAMD_KNOBS],
@@ -405,8 +738,8 @@ PRIVATE int find_ordering
     int n_row,
     int n_col,
     int Alen,
-    RowInfo Row [],
-    ColInfo Col [],
+    Colamd_Row Row [],
+    Colamd_Col Col [],
     int A [],
     int head [],
     int n_col2,
@@ -417,17 +750,19 @@ PRIVATE int find_ordering
 PRIVATE void order_children
 (
     int n_col,
-    ColInfo Col [],
+    Colamd_Col Col [],
     int p []
 ) ;
 
 PRIVATE void detect_super_cols
 (
+
 #ifndef NDEBUG
     int n_col,
-    RowInfo Row [],
-#endif
-    ColInfo Col [],
+    Colamd_Row Row [],
+#endif /* NDEBUG */
+
+    Colamd_Col Col [],
     int A [],
     int head [],
     int row_start,
@@ -438,8 +773,8 @@ PRIVATE int garbage_collection
 (
     int n_row,
     int n_col,
-    RowInfo Row [],
-    ColInfo Col [],
+    Colamd_Row Row [],
+    Colamd_Col Col [],
     int A [],
     int *pfree
 ) ;
@@ -447,29 +782,49 @@ PRIVATE int garbage_collection
 PRIVATE int clear_mark
 (
     int n_row,
-    RowInfo Row []
+    Colamd_Row Row []
+) ;
+
+PRIVATE void print_report
+(
+    char *method,
+    int stats [COLAMD_STATS]
 ) ;
 
 /* ========================================================================== */
-/* === Debugging definitions ================================================ */
+/* === Debugging prototypes and definitions ================================= */
 /* ========================================================================== */
 
 #ifndef NDEBUG
 
-/* === With debugging ======================================================= */
+/* colamd_debug is the *ONLY* global variable, and is only */
+/* present when debugging */
 
-/* stdlib.h: for getenv and atoi, to get debugging level from environment */
-#include <stdlib.h>
+PRIVATE int colamd_debug ;	/* debug print level */
 
-/* stdio.h:  for printf (no printing if debugging is turned off) */
-#include <stdio.h>
+#define DEBUG0(params) { (void) PRINTF params ; }
+#define DEBUG1(params) { if (colamd_debug >= 1) (void) PRINTF params ; }
+#define DEBUG2(params) { if (colamd_debug >= 2) (void) PRINTF params ; }
+#define DEBUG3(params) { if (colamd_debug >= 3) (void) PRINTF params ; }
+#define DEBUG4(params) { if (colamd_debug >= 4) (void) PRINTF params ; }
+
+#ifdef MATLAB_MEX_FILE
+#define ASSERT(expression) (mxAssert ((expression), ""))
+#else
+#define ASSERT(expression) (assert (expression))
+#endif /* MATLAB_MEX_FILE */
+
+PRIVATE void colamd_get_debug	/* gets the debug print level from getenv */
+(
+    char *method
+) ;
 
 PRIVATE void debug_deg_lists
 (
     int n_row,
     int n_col,
-    RowInfo Row [],
-    ColInfo Col [],
+    Colamd_Row Row [],
+    Colamd_Col Col [],
     int head [],
     int min_score,
     int should,
@@ -479,7 +834,7 @@ PRIVATE void debug_deg_lists
 PRIVATE void debug_mark
 (
     int n_row,
-    RowInfo Row [],
+    Colamd_Row Row [],
     int tag_mark,
     int max_mark
 ) ;
@@ -488,8 +843,8 @@ PRIVATE void debug_matrix
 (
     int n_row,
     int n_col,
-    RowInfo Row [],
-    ColInfo Col [],
+    Colamd_Row Row [],
+    Colamd_Col Col [],
     int A []
 ) ;
 
@@ -497,24 +852,13 @@ PRIVATE void debug_structures
 (
     int n_row,
     int n_col,
-    RowInfo Row [],
-    ColInfo Col [],
+    Colamd_Row Row [],
+    Colamd_Col Col [],
     int A [],
     int n_col2
 ) ;
 
-/* the following is the *ONLY* global variable in this file, and is only */
-/* present when debugging */
-
-PRIVATE int debug_colamd ;	/* debug print level */
-
-#define DEBUG0(params) { (void) printf params ; }
-#define DEBUG1(params) { if (debug_colamd >= 1) (void) printf params ; }
-#define DEBUG2(params) { if (debug_colamd >= 2) (void) printf params ; }
-#define DEBUG3(params) { if (debug_colamd >= 3) (void) printf params ; }
-#define DEBUG4(params) { if (debug_colamd >= 4) (void) printf params ; }
-
-#else
+#else /* NDEBUG */
 
 /* === No debugging ========================================================= */
 
@@ -524,104 +868,426 @@ PRIVATE int debug_colamd ;	/* debug print level */
 #define DEBUG3(params) ;
 #define DEBUG4(params) ;
 
-#endif
+#define ASSERT(expression) ((void) 0)
+
+#endif /* NDEBUG */
+
+/* ========================================================================== */
+
+
+
+/* ========================================================================== */
+/* === USER-CALLABLE ROUTINES: ============================================== */
+/* ========================================================================== */
+
+
+/* ========================================================================== */
+/* === colamd_recommended =================================================== */
+/* ========================================================================== */
+
+/*
+    The colamd_recommended routine returns the suggested size for Alen.  This
+    value has been determined to provide good balance between the number of
+    garbage collections and the memory requirements for colamd.  If any
+    argument is negative, a -1 is returned as an error condition.  This
+    function is also available as a macro defined in colamd.h, so that you
+    can use it for a statically-allocated array size.
+*/
+
+PUBLIC int colamd_recommended	/* returns recommended value of Alen. */
+(
+    /* === Parameters ======================================================= */
+
+    int nnz,			/* number of nonzeros in A */
+    int n_row,			/* number of rows in A */
+    int n_col			/* number of columns in A */
+)
+{
+    return (COLAMD_RECOMMENDED (nnz, n_row, n_col)) ; 
+}
+
+
+/* ========================================================================== */
+/* === colamd_set_defaults ================================================== */
+/* ========================================================================== */
+
+/*
+    The colamd_set_defaults routine sets the default values of the user-
+    controllable parameters for colamd:
+
+	knobs [0]	rows with knobs[0]*n_col entries or more are removed
+			prior to ordering in colamd.  Rows and columns with
+			knobs[0]*n_col entries or more are removed prior to
+			ordering in symamd and placed last in the output
+			ordering.
+
+	knobs [1]	columns with knobs[1]*n_row entries or more are removed
+			prior to ordering in colamd, and placed last in the
+			column permutation.  Symamd ignores this knob.
+
+	knobs [2..19]	unused, but future versions might use this
+*/
+
+PUBLIC void colamd_set_defaults
+(
+    /* === Parameters ======================================================= */
+
+    double knobs [COLAMD_KNOBS]		/* knob array */
+)
+{
+    /* === Local variables ================================================== */
+
+    int i ;
+
+    if (!knobs)
+    {
+	return ;			/* no knobs to initialize */
+    }
+    for (i = 0 ; i < COLAMD_KNOBS ; i++)
+    {
+	knobs [i] = 0 ;
+    }
+    knobs [COLAMD_DENSE_ROW] = 0.5 ;	/* ignore rows over 50% dense */
+    knobs [COLAMD_DENSE_COL] = 0.5 ;	/* ignore columns over 50% dense */
+}
+
+
+/* ========================================================================== */
+/* === symamd =============================================================== */
+/* ========================================================================== */
+
+PUBLIC int symamd			/* return TRUE if OK, FALSE otherwise */
+(
+    /* === Parameters ======================================================= */
+
+    int n,				/* number of rows and columns of A */
+    int A [],				/* row indices of A */
+    int p [],				/* column pointers of A */
+    int perm [],			/* output permutation, size n+1 */
+    double knobs [COLAMD_KNOBS],	/* parameters (uses defaults if NULL) */
+    int stats [COLAMD_STATS],		/* output statistics and error codes */
+    void * (*allocate) (size_t, size_t),
+    					/* pointer to calloc (ANSI C) or */
+					/* mxCalloc (for MATLAB mexFunction) */
+    void (*release) (void *)
+    					/* pointer to free (ANSI C) or */
+    					/* mxFree (for MATLAB mexFunction) */
+)
+{
+    /* === Local variables ================================================== */
+
+    int *count ;		/* length of each column of M, and col pointer*/
+    int *mark ;			/* mark array for finding duplicate entries */
+    int *M ;			/* row indices of matrix M */
+    int Mlen ;			/* length of M */
+    int n_row ;			/* number of rows in M */
+    int nnz ;			/* number of entries in A */
+    int i ;			/* row index of A */
+    int j ;			/* column index of A */
+    int k ;			/* row index of M */ 
+    int mnz ;			/* number of nonzeros in M */
+    int pp ;			/* index into a column of A */
+    int last_row ;		/* last row seen in the current column */
+    int length ;		/* number of nonzeros in a column */
+
+    double cknobs [COLAMD_KNOBS] ;		/* knobs for colamd */
+    double default_knobs [COLAMD_KNOBS] ;	/* default knobs for colamd */
+    int cstats [COLAMD_STATS] ;			/* colamd stats */
+
+#ifndef NDEBUG
+    colamd_get_debug ("symamd") ;
+#endif /* NDEBUG */
+
+    /* === Check the input arguments ======================================== */
+
+    if (!stats)
+    {
+	DEBUG0 (("symamd: stats not present\n")) ;
+	return (FALSE) ;
+    }
+    for (i = 0 ; i < COLAMD_STATS ; i++)
+    {
+	stats [i] = 0 ;
+    }
+    stats [COLAMD_STATUS] = COLAMD_OK ;
+    stats [COLAMD_INFO1] = -1 ;
+    stats [COLAMD_INFO2] = -1 ;
+
+    if (!A)
+    {
+    	stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ;
+	DEBUG0 (("symamd: A not present\n")) ;
+	return (FALSE) ;
+    }
+
+    if (!p)		/* p is not present */
+    {
+	stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ;
+	DEBUG0 (("symamd: p not present\n")) ;
+    	return (FALSE) ;
+    }
+
+    if (n < 0)		/* n must be >= 0 */
+    {
+	stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ;
+	stats [COLAMD_INFO1] = n ;
+	DEBUG0 (("symamd: n negative %d\n", n)) ;
+    	return (FALSE) ;
+    }
+
+    nnz = p [n] ;
+    if (nnz < 0)	/* nnz must be >= 0 */
+    {
+	stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ;
+	stats [COLAMD_INFO1] = nnz ;
+	DEBUG0 (("symamd: number of entries negative %d\n", nnz)) ;
+	return (FALSE) ;
+    }
+
+    if (p [0] != 0)
+    {
+	stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ;
+	stats [COLAMD_INFO1] = p [0] ;
+	DEBUG0 (("symamd: p[0] not zero %d\n", p [0])) ;
+	return (FALSE) ;
+    }
+
+    /* === If no knobs, set default knobs =================================== */
+
+    if (!knobs)
+    {
+	colamd_set_defaults (default_knobs) ;
+	knobs = default_knobs ;
+    }
+
+    /* === Allocate count and mark ========================================== */
+
+    count = (int *) ((*allocate) (n+1, sizeof (int))) ;
+    if (!count)
+    {
+	stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ;
+	DEBUG0 (("symamd: allocate count (size %d) failed\n", n+1)) ;
+	return (FALSE) ;
+    }
+
+    mark = (int *) ((*allocate) (n+1, sizeof (int))) ;
+    if (!mark)
+    {
+	stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ;
+	(*release) ((void *) count) ;
+	DEBUG0 (("symamd: allocate mark (size %d) failed\n", n+1)) ;
+	return (FALSE) ;
+    }
+
+    /* === Compute column counts of M, check if A is valid ================== */
+
+    stats [COLAMD_INFO3] = 0 ;  /* number of duplicate or unsorted row indices*/
+
+    for (i = 0 ; i < n ; i++)
+    {
+    	mark [i] = -1 ;
+    }
+
+    for (j = 0 ; j < n ; j++)
+    {
+	last_row = -1 ;
+
+	length = p [j+1] - p [j] ;
+	if (length < 0)
+	{
+	    /* column pointers must be non-decreasing */
+	    stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ;
+	    stats [COLAMD_INFO1] = j ;
+	    stats [COLAMD_INFO2] = length ;
+	    (*release) ((void *) count) ;
+	    (*release) ((void *) mark) ;
+	    DEBUG0 (("symamd: col %d negative length %d\n", j, length)) ;
+	    return (FALSE) ;
+	}
+
+	for (pp = p [j] ; pp < p [j+1] ; pp++)
+	{
+	    i = A [pp] ;
+	    if (i < 0 || i >= n)
+	    {
+		/* row index i, in column j, is out of bounds */
+		stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ;
+		stats [COLAMD_INFO1] = j ;
+		stats [COLAMD_INFO2] = i ;
+		stats [COLAMD_INFO3] = n ;
+		(*release) ((void *) count) ;
+		(*release) ((void *) mark) ;
+		DEBUG0 (("symamd: row %d col %d out of bounds\n", i, j)) ;
+		return (FALSE) ;
+	    }
+
+	    if (i <= last_row || mark [i] == j)
+	    {
+		/* row index is unsorted or repeated (or both), thus col */
+		/* is jumbled.  This is a notice, not an error condition. */
+		stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ;
+		stats [COLAMD_INFO1] = j ;
+		stats [COLAMD_INFO2] = i ;
+		(stats [COLAMD_INFO3]) ++ ;
+		DEBUG1 (("symamd: row %d col %d unsorted/duplicate\n", i, j)) ;
+	    }
+
+	    if (i > j && mark [i] != j)
+	    {
+		/* row k of M will contain column indices i and j */
+		count [i]++ ;
+		count [j]++ ;
+	    }
 
-/* ========================================================================== */
+	    /* mark the row as having been seen in this column */
+	    mark [i] = j ;
 
+	    last_row = i ;
+	}
+    }
 
-/* ========================================================================== */
-/* === USER-CALLABLE ROUTINES: ============================================== */
-/* ========================================================================== */
+    if (stats [COLAMD_STATUS] == COLAMD_OK)
+    {
+	/* if there are no duplicate entries, then mark is no longer needed */
+	(*release) ((void *) mark) ;
+    }
 
+    /* === Compute column pointers of M ===================================== */
 
-/* ========================================================================== */
-/* === colamd_recommended =================================================== */
-/* ========================================================================== */
+    /* use output permutation, perm, for column pointers of M */
+    perm [0] = 0 ;
+    for (j = 1 ; j <= n ; j++)
+    {
+	perm [j] = perm [j-1] + count [j-1] ;
+    }
+    for (j = 0 ; j < n ; j++)
+    {
+	count [j] = perm [j] ;
+    }
 
-/*
-    The colamd_recommended routine returns the suggested size for Alen.  This
-    value has been determined to provide good balance between the number of
-    garbage collections and the memory requirements for colamd.
-*/
+    /* === Construct M ====================================================== */
 
-PUBLIC int colamd_recommended	/* returns recommended value of Alen. */
-(
-    /* === Parameters ======================================================= */
+    mnz = perm [n] ;
+    n_row = mnz / 2 ;
+    Mlen = colamd_recommended (mnz, n_row, n) ;
+    M = (int *) ((*allocate) (Mlen, sizeof (int))) ;
+    DEBUG0 (("symamd: M is %d-by-%d with %d entries, Mlen = %d\n",
+    	n_row, n, mnz, Mlen)) ;
 
-    int nnz,			/* number of nonzeros in A */
-    int n_row,			/* number of rows in A */
-    int n_col			/* number of columns in A */
-)
-{
-    /* === Local variables ================================================== */
+    if (!M)
+    {
+	stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ;
+	(*release) ((void *) count) ;
+	(*release) ((void *) mark) ;
+	DEBUG0 (("symamd: allocate M (size %d) failed\n", Mlen)) ;
+	return (FALSE) ;
+    }
 
-    int minimum ;		/* bare minimum requirements */
-    int recommended ;		/* recommended value of Alen */
+    k = 0 ;
 
-    if (nnz < 0 || n_row < 0 || n_col < 0)
+    if (stats [COLAMD_STATUS] == COLAMD_OK)
     {
-	/* return -1 if any input argument is corrupted */
-	DEBUG0 (("colamd_recommended error!")) ;
-	DEBUG0 ((" nnz: %d, n_row: %d, n_col: %d\n", nnz, n_row, n_col)) ;
-	return (-1) ;
+	/* Matrix is OK */
+	for (j = 0 ; j < n ; j++)
+	{
+	    ASSERT (p [j+1] - p [j] >= 0) ;
+	    for (pp = p [j] ; pp < p [j+1] ; pp++)
+	    {
+		i = A [pp] ;
+		ASSERT (i >= 0 && i < n) ;
+		if (i > j)
+		{
+		    /* row k of M contains column indices i and j */
+		    M [count [i]++] = k ;
+		    M [count [j]++] = k ;
+		    k++ ;
+		}
+	    }
+	}
+    }
+    else
+    {
+	/* Matrix is jumbled.  Do not add duplicates to M.  Unsorted cols OK. */
+	DEBUG0 (("symamd: Duplicates in A.\n")) ;
+	for (i = 0 ; i < n ; i++)
+	{
+	    mark [i] = -1 ;
+	}
+	for (j = 0 ; j < n ; j++)
+	{
+	    ASSERT (p [j+1] - p [j] >= 0) ;
+	    for (pp = p [j] ; pp < p [j+1] ; pp++)
+	    {
+		i = A [pp] ;
+		ASSERT (i >= 0 && i < n) ;
+		if (i > j && mark [i] != j)
+		{
+		    /* row k of M contains column indices i and j */
+		    M [count [i]++] = k ;
+		    M [count [j]++] = k ;
+		    k++ ;
+		    mark [i] = j ;
+		}
+	    }
+	}
+	(*release) ((void *) mark) ;
     }
 
-    minimum =
-	2 * (nnz)		/* for A */
-	+ (((n_col) + 1) * sizeof (ColInfo) / sizeof (int))	/* for Col */
-	+ (((n_row) + 1) * sizeof (RowInfo) / sizeof (int))	/* for Row */
-	+ n_col			/* minimum elbow room to guarrantee success */
-	+ COLAMD_STATS ;	/* for output statistics */
+    /* count and mark no longer needed */
+    (*release) ((void *) count) ;
+    ASSERT (k == n_row) ;
 
-    /* recommended is equal to the minumum plus enough memory to keep the */
-    /* number garbage collections low */
-    recommended = minimum + nnz/5 ;
+    /* === Adjust the knobs for M =========================================== */
 
-    return (recommended) ;
-}
+    for (i = 0 ; i < COLAMD_KNOBS ; i++)
+    {
+	cknobs [i] = knobs [i] ;
+    }
 
+    /* there are no dense rows in M */
+    cknobs [COLAMD_DENSE_ROW] = 1.0 ;
 
-/* ========================================================================== */
-/* === colamd_set_defaults ================================================== */
-/* ========================================================================== */
+    if (n_row != 0 && n < n_row)
+    {
+	/* On input, the knob is a fraction of 1..n, the number of rows of A. */
+	/* Convert it to a fraction of 1..n_row, of the number of rows of M. */
+    	cknobs [COLAMD_DENSE_COL] = (knobs [COLAMD_DENSE_ROW] * n) / n_row ;
+    }
+    else
+    {
+	/* no dense columns in M */
+    	cknobs [COLAMD_DENSE_COL] = 1.0 ;
+    }
 
-/*
-    The colamd_set_defaults routine sets the default values of the user-
-    controllable parameters for colamd:
+    DEBUG0 (("symamd: dense col knob for M: %g\n", cknobs [COLAMD_DENSE_COL])) ;
 
-	knobs [0]	rows with knobs[0]*n_col entries or more are removed
-			prior to ordering.
+    /* === Order the columns of M =========================================== */
 
-	knobs [1]	columns with knobs[1]*n_row entries or more are removed
-			prior to ordering, and placed last in the column
-			permutation.
+    if (!colamd (n_row, n, Mlen, M, perm, cknobs, cstats))
+    {
+	/* This "cannot" happen, unless there is a bug in the code. */
+	stats [COLAMD_STATUS] = COLAMD_ERROR_internal_error ;
+	(*release) ((void *) M) ;
+	DEBUG0 (("symamd: internal error!\n")) ;
+	return (FALSE) ;
+    }
 
-	knobs [2..19]	unused, but future versions might use this
-*/
+    /* Note that the output permutation is now in perm */
 
-PUBLIC void colamd_set_defaults
-(
-    /* === Parameters ======================================================= */
+    /* === get the statistics for symamd from colamd ======================== */
 
-    double knobs [COLAMD_KNOBS]		/* knob array */
-)
-{
-    /* === Local variables ================================================== */
+    /* note that a dense column in colamd means a dense row and col in symamd */
+    stats [COLAMD_DENSE_ROW]    = cstats [COLAMD_DENSE_COL] ;
+    stats [COLAMD_DENSE_COL]    = cstats [COLAMD_DENSE_COL] ;
+    stats [COLAMD_DEFRAG_COUNT] = cstats [COLAMD_DEFRAG_COUNT] ;
 
-    int i ;
+    /* === Free M =========================================================== */
 
-    if (!knobs)
-    {
-	return ;			/* no knobs to initialize */
-    }
-    for (i = 0 ; i < COLAMD_KNOBS ; i++)
-    {
-	knobs [i] = 0 ;
-    }
-    knobs [COLAMD_DENSE_ROW] = 0.5 ;	/* ignore rows over 50% dense */
-    knobs [COLAMD_DENSE_COL] = 0.5 ;	/* ignore columns over 50% dense */
-}
+    (*release) ((void *) M) ;
+    DEBUG0 (("symamd: done.\n")) ;
+    return (TRUE) ;
 
+}
 
 /* ========================================================================== */
 /* === colamd =============================================================== */
@@ -633,79 +1299,9 @@ PUBLIC void colamd_set_defaults
     selected via partial pivoting.   The routine can also be viewed as
     providing a permutation Q such that the Cholesky factorization
     (AQ)'(AQ) = LL' remains sparse.
-
-    On input, the nonzero patterns of the columns of A are stored in the
-    array A, in order 0 to n_col-1.  A is held in 0-based form (rows in the
-    range 0 to n_row-1 and columns in the range 0 to n_col-1).  Row indices
-    for column c are located in A [(p [c]) ... (p [c+1]-1)], where p [0] = 0,
-    and thus p [n_col] is the number of entries in A.  The matrix is
-    destroyed on output.  The row indices within each column do not have to
-    be sorted (from small to large row indices), and duplicate row indices
-    may be present.  However, colamd will work a little faster if columns are
-    sorted and no duplicates are present.  Matlab 5.2 always passes the matrix
-    with sorted columns, and no duplicates.
-
-    The integer array A is of size Alen.  Alen must be at least of size
-    (where nnz is the number of entries in A):
-
-	nnz			for the input column form of A
-	+ nnz			for a row form of A that colamd generates
-	+ 6*(n_col+1)		for a ColInfo Col [0..n_col] array
-				(this assumes sizeof (ColInfo) is 6 int's).
-	+ 4*(n_row+1)		for a RowInfo Row [0..n_row] array
-				(this assumes sizeof (RowInfo) is 4 int's).
-	+ elbow_room		must be at least n_col.  We recommend at least
-				nnz/5 in addition to that.  If sufficient,
-				changes in the elbow room affect the ordering
-				time only, not the ordering itself.
-	+ COLAMD_STATS		for the output statistics
-
-    Colamd returns FALSE is memory is insufficient, or TRUE otherwise.
-
-    On input, the caller must specify:
-
-	n_row			the number of rows of A
-	n_col			the number of columns of A
-	Alen			the size of the array A
-	A [0 ... nnz-1]		the row indices, where nnz = p [n_col]
-	A [nnz ... Alen-1]	(need not be initialized by the user)
-	p [0 ... n_col]		the column pointers,  p [0] = 0, and p [n_col]
-				is the number of entries in A.  Column c of A
-				is stored in A [p [c] ... p [c+1]-1].
-	knobs [0 ... 19]	a set of parameters that control the behavior
-				of colamd.  If knobs is a NULL pointer the
-				defaults are used.  The user-callable
-				colamd_set_defaults routine sets the default
-				parameters.  See that routine for a description
-				of the user-controllable parameters.
-
-    If the return value of Colamd is TRUE, then on output:
-
-	p [0 ... n_col-1]	the column permutation. p [0] is the first
-				column index, and p [n_col-1] is the last.
-				That is, p [k] = j means that column j of A
-				is the kth column of AQ.
-
-	A			is undefined on output (the matrix pattern is
-				destroyed), except for the following statistics:
-
-	A [0]			the number of dense (or empty) rows ignored
-	A [1]			the number of dense (or empty) columms.  These
-				are ordered last, in their natural order.
-	A [2]			the number of garbage collections performed.
-				If this is excessive, then you would have
-				gotten your results faster if Alen was larger.
-	A [3]			0, if all row indices in each column were in
-				sorted order and no duplicates were present.
-				1, if there were unsorted or duplicate row
-				indices in the input.  You would have gotten
-				your results faster if A [3] was returned as 0.
-
-    If the return value of Colamd is FALSE, then A and p are undefined on
-    output.
 */
 
-PUBLIC int colamd		/* returns TRUE if successful */
+PUBLIC int colamd		/* returns TRUE if successful, FALSE otherwise*/
 (
     /* === Parameters ======================================================= */
 
@@ -714,7 +1310,8 @@ PUBLIC int colamd		/* returns TRUE if successful */
     int Alen,			/* length of A */
     int A [],			/* row indices of A */
     int p [],			/* pointers to columns in A */
-    double knobs [COLAMD_KNOBS]	/* parameters (uses defaults if NULL) */
+    double knobs [COLAMD_KNOBS],/* parameters (uses defaults if NULL) */
+    int stats [COLAMD_STATS]	/* output statistics and error codes */
 )
 {
     /* === Local variables ================================================== */
@@ -723,69 +1320,115 @@ PUBLIC int colamd		/* returns TRUE if successful */
     int nnz ;			/* nonzeros in A */
     int Row_size ;		/* size of Row [], in integers */
     int Col_size ;		/* size of Col [], in integers */
-    int elbow_room ;		/* remaining free space */
-    RowInfo *Row ;		/* pointer into A of Row [0..n_row] array */
-    ColInfo *Col ;		/* pointer into A of Col [0..n_col] array */
+    int need ;			/* minimum required length of A */
+    Colamd_Row *Row ;		/* pointer into A of Row [0..n_row] array */
+    Colamd_Col *Col ;		/* pointer into A of Col [0..n_col] array */
     int n_col2 ;		/* number of non-dense, non-empty columns */
     int n_row2 ;		/* number of non-dense, non-empty rows */
     int ngarbage ;		/* number of garbage collections performed */
     int max_deg ;		/* maximum row degree */
-    double default_knobs [COLAMD_KNOBS] ;	/* default knobs knobs array */
-    int init_result ;		/* return code from initialization */
+    double default_knobs [COLAMD_KNOBS] ;	/* default knobs array */
 
 #ifndef NDEBUG
-    debug_colamd = 0 ;		/* no debug printing */
-    /* get "D" environment variable, which gives the debug printing level */
-    if (getenv ("D")) debug_colamd = atoi (getenv ("D")) ;
-    DEBUG0 (("debug version, D = %d (THIS WILL BE SLOOOOW!)\n", debug_colamd)) ;
-#endif
+    colamd_get_debug ("colamd") ;
+#endif /* NDEBUG */
 
     /* === Check the input arguments ======================================== */
 
-    if (n_row < 0 || n_col < 0 || !A || !p)
+    if (!stats)
+    {
+	DEBUG0 (("colamd: stats not present\n")) ;
+	return (FALSE) ;
+    }
+    for (i = 0 ; i < COLAMD_STATS ; i++)
+    {
+	stats [i] = 0 ;
+    }
+    stats [COLAMD_STATUS] = COLAMD_OK ;
+    stats [COLAMD_INFO1] = -1 ;
+    stats [COLAMD_INFO2] = -1 ;
+
+    if (!A)		/* A is not present */
     {
-	/* n_row and n_col must be non-negative, A and p must be present */
-	DEBUG0 (("colamd error! %d %d %d\n", n_row, n_col, Alen)) ;
+	stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ;
+	DEBUG0 (("colamd: A not present\n")) ;
 	return (FALSE) ;
     }
+
+    if (!p)		/* p is not present */
+    {
+	stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ;
+	DEBUG0 (("colamd: p not present\n")) ;
+    	return (FALSE) ;
+    }
+
+    if (n_row < 0)	/* n_row must be >= 0 */
+    {
+	stats [COLAMD_STATUS] = COLAMD_ERROR_nrow_negative ;
+	stats [COLAMD_INFO1] = n_row ;
+	DEBUG0 (("colamd: nrow negative %d\n", n_row)) ;
+    	return (FALSE) ;
+    }
+
+    if (n_col < 0)	/* n_col must be >= 0 */
+    {
+	stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ;
+	stats [COLAMD_INFO1] = n_col ;
+	DEBUG0 (("colamd: ncol negative %d\n", n_col)) ;
+    	return (FALSE) ;
+    }
+
     nnz = p [n_col] ;
-    if (nnz < 0 || p [0] != 0)
+    if (nnz < 0)	/* nnz must be >= 0 */
     {
-	/* nnz must be non-negative, and p [0] must be zero */
-	DEBUG0 (("colamd error! %d %d\n", nnz, p [0])) ;
+	stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ;
+	stats [COLAMD_INFO1] = nnz ;
+	DEBUG0 (("colamd: number of entries negative %d\n", nnz)) ;
 	return (FALSE) ;
     }
 
-    /* === If no knobs, set default parameters ============================== */
+    if (p [0] != 0)
+    {
+	stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero	;
+	stats [COLAMD_INFO1] = p [0] ;
+	DEBUG0 (("colamd: p[0] not zero %d\n", p [0])) ;
+	return (FALSE) ;
+    }
+
+    /* === If no knobs, set default knobs =================================== */
 
     if (!knobs)
     {
+	colamd_set_defaults (default_knobs) ;
 	knobs = default_knobs ;
-	colamd_set_defaults (knobs) ;
     }
 
     /* === Allocate the Row and Col arrays from array A ===================== */
 
-    Col_size = (n_col + 1) * sizeof (ColInfo) / sizeof (int) ;
-    Row_size = (n_row + 1) * sizeof (RowInfo) / sizeof (int) ;
-    elbow_room = Alen - (2*nnz + Col_size + Row_size) ;
-    if (elbow_room < n_col + COLAMD_STATS)
+    Col_size = COLAMD_C (n_col) ;
+    Row_size = COLAMD_R (n_row) ;
+    need = 2*nnz + n_col + Col_size + Row_size ;
+
+    if (need > Alen)
     {
 	/* not enough space in array A to perform the ordering */
-	DEBUG0 (("colamd error! elbow_room %d, %d\n", elbow_room,n_col)) ;
+	stats [COLAMD_STATUS] = COLAMD_ERROR_A_too_small ;
+	stats [COLAMD_INFO1] = need ;
+	stats [COLAMD_INFO2] = Alen ;
+	DEBUG0 (("colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen));
 	return (FALSE) ;
     }
-    Alen = 2*nnz + elbow_room ;
-    Col  = (ColInfo *) &A [Alen] ;
-    Row  = (RowInfo *) &A [Alen + Col_size] ;
+
+    Alen -= Col_size + Row_size ;
+    Col = (Colamd_Col *) &A [Alen] ;
+    Row = (Colamd_Row *) &A [Alen + Col_size] ;
 
     /* === Construct the row and column data structures ===================== */
 
-    init_result = init_rows_cols (n_row, n_col, Row, Col, A, p) ;
-    if (init_result == -1)
+    if (!init_rows_cols (n_row, n_col, Row, Col, A, p, stats))
     {
 	/* input matrix is invalid */
-	DEBUG0 (("colamd error! matrix invalid\n")) ;
+	DEBUG0 (("colamd: Matrix invalid\n")) ;
 	return (FALSE) ;
     }
 
@@ -803,22 +1446,44 @@ PUBLIC int colamd		/* returns TRUE if successful */
 
     order_children (n_col, Col, p) ;
 
-    /* === Return statistics in A =========================================== */
-
-    for (i = 0 ; i < COLAMD_STATS ; i++)
-    {
-	A [i] = 0 ;
-    }
-    A [COLAMD_DENSE_ROW] = n_row - n_row2 ;
-    A [COLAMD_DENSE_COL] = n_col - n_col2 ;
-    A [COLAMD_DEFRAG_COUNT] = ngarbage ;
-    A [COLAMD_JUMBLED_COLS] = init_result ;
+    /* === Return statistics in stats ======================================= */
 
+    stats [COLAMD_DENSE_ROW] = n_row - n_row2 ;
+    stats [COLAMD_DENSE_COL] = n_col - n_col2 ;
+    stats [COLAMD_DEFRAG_COUNT] = ngarbage ;
+    DEBUG0 (("colamd: done.\n")) ; 
     return (TRUE) ;
 }
 
 
 /* ========================================================================== */
+/* === colamd_report ======================================================== */
+/* ========================================================================== */
+
+PUBLIC void colamd_report
+(
+    int stats [COLAMD_STATS]
+)
+{
+    print_report ("colamd", stats) ;
+}
+
+
+/* ========================================================================== */
+/* === symamd_report ======================================================== */
+/* ========================================================================== */
+
+PUBLIC void symamd_report
+(
+    int stats [COLAMD_STATS]
+)
+{
+    print_report ("symamd", stats) ;
+}
+
+
+
+/* ========================================================================== */
 /* === NON-USER-CALLABLE ROUTINES: ========================================== */
 /* ========================================================================== */
 
@@ -834,20 +1499,21 @@ PUBLIC int colamd		/* returns TRUE if successful */
     matrix.  Also, row and column attributes are stored in the Col and Row
     structs.  If the columns are un-sorted or contain duplicate row indices,
     this routine will also sort and remove duplicate row indices from the
-    column form of the matrix.  Returns -1 on error, 1 if columns jumbled,
-    or 0 if columns not jumbled.  Not user-callable.
+    column form of the matrix.  Returns FALSE if the matrix is invalid,
+    TRUE otherwise.  Not user-callable.
 */
 
-PRIVATE int init_rows_cols	/* returns status code */
+PRIVATE int init_rows_cols	/* returns TRUE if OK, or FALSE otherwise */
 (
     /* === Parameters ======================================================= */
 
     int n_row,			/* number of rows of A */
     int n_col,			/* number of columns of A */
-    RowInfo Row [],		/* of size n_row+1 */
-    ColInfo Col [],		/* of size n_col+1 */
+    Colamd_Row Row [],		/* of size n_row+1 */
+    Colamd_Col Col [],		/* of size n_col+1 */
     int A [],			/* row indices of A, of size Alen */
-    int p []			/* pointers to columns in A, of size n_col+1 */
+    int p [],			/* pointers to columns in A, of size n_col+1 */
+    int stats [COLAMD_STATS]	/* colamd statistics */ 
 )
 {
     /* === Local variables ================================================== */
@@ -858,44 +1524,36 @@ PRIVATE int init_rows_cols	/* returns status code */
     int *cp_end ;		/* a pointer to the end of a column */
     int *rp ;			/* a row pointer */
     int *rp_end ;		/* a pointer to the end of a row */
-    int last_start ;		/* start index of previous column in A */
-    int start ;			/* start index of column in A */
     int last_row ;		/* previous row */
-    int jumbled_columns ;	/* indicates if columns are jumbled */
 
     /* === Initialize columns, and check column pointers ==================== */
 
-    last_start = 0 ;
     for (col = 0 ; col < n_col ; col++)
     {
-	start = p [col] ;
-	if (start < last_start)
+	Col [col].start = p [col] ;
+	Col [col].length = p [col+1] - p [col] ;
+
+	if (Col [col].length < 0)
 	{
 	    /* column pointers must be non-decreasing */
-	    DEBUG0 (("colamd error!  last p %d p [col] %d\n",last_start,start));
-	    return (-1) ;
+	    stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ;
+	    stats [COLAMD_INFO1] = col ;
+	    stats [COLAMD_INFO2] = Col [col].length ;
+	    DEBUG0 (("colamd: col %d length %d < 0\n", col, Col [col].length)) ;
+	    return (FALSE) ;
 	}
-	Col [col].start = start ;
-	Col [col].length = p [col+1] - start ;
+
 	Col [col].shared1.thickness = 1 ;
 	Col [col].shared2.score = 0 ;
 	Col [col].shared3.prev = EMPTY ;
 	Col [col].shared4.degree_next = EMPTY ;
-	last_start = start ;
-    }
-    /* must check the end pointer for last column */
-    if (p [n_col] < last_start)
-    {
-	/* column pointers must be non-decreasing */
-	DEBUG0 (("colamd error!  last p %d p [n_col] %d\n",p[col],last_start)) ;
-	return (-1) ;
     }
 
     /* p [0..n_col] no longer needed, used as "head" in subsequent routines */
 
     /* === Scan columns, compute row degrees, and check row indices ========= */
 
-    jumbled_columns = FALSE ;
+    stats [COLAMD_INFO3] = 0 ;	/* number of duplicate or unsorted row indices*/
 
     for (row = 0 ; row < n_row ; row++)
     {
@@ -917,22 +1575,28 @@ PRIVATE int init_rows_cols	/* returns status code */
 	    /* make sure row indices within range */
 	    if (row < 0 || row >= n_row)
 	    {
-		DEBUG0 (("colamd error!  col %d row %d last_row %d\n",
-			 col, row, last_row)) ;
-		return (-1) ;
+		stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ;
+		stats [COLAMD_INFO1] = col ;
+		stats [COLAMD_INFO2] = row ;
+		stats [COLAMD_INFO3] = n_row ;
+		DEBUG0 (("colamd: row %d col %d out of bounds\n", row, col)) ;
+		return (FALSE) ;
 	    }
-	    else if (row <= last_row)
+
+	    if (row <= last_row || Row [row].shared2.mark == col)
 	    {
-		/* row indices are not sorted or repeated, thus cols */
-		/* are jumbled */
-		jumbled_columns = TRUE ;
+		/* row index are unsorted or repeated (or both), thus col */
+		/* is jumbled.  This is a notice, not an error condition. */
+		stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ;
+		stats [COLAMD_INFO1] = col ;
+		stats [COLAMD_INFO2] = row ;
+		(stats [COLAMD_INFO3]) ++ ;
+		DEBUG1 (("colamd: row %d col %d unsorted/duplicate\n",row,col));
 	    }
-	    /* prevent repeated row from being counted */
+
 	    if (Row [row].shared2.mark != col)
 	    {
 		Row [row].length++ ;
-		Row [row].shared2.mark = col ;
-		last_row = row ;
 	    }
 	    else
 	    {
@@ -940,6 +1604,11 @@ PRIVATE int init_rows_cols	/* returns status code */
 		/* it will be removed */
 		Col [col].length-- ;
 	    }
+
+	    /* mark the row as having been seen in this column */
+	    Row [row].shared2.mark = col ;
+
+	    last_row = row ;
 	}
     }
 
@@ -959,7 +1628,7 @@ PRIVATE int init_rows_cols	/* returns status code */
 
     /* === Create row form ================================================== */
 
-    if (jumbled_columns)
+    if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED)
     {
 	/* if cols jumbled, watch for repeated row indices */
 	for (col = 0 ; col < n_col ; col++)
@@ -1001,8 +1670,9 @@ PRIVATE int init_rows_cols	/* returns status code */
 
     /* === See if we need to re-create columns ============================== */
 
-    if (jumbled_columns)
+    if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED)
     {
+    	DEBUG0 (("colamd: reconstructing column form, matrix jumbled\n")) ;
 
 #ifndef NDEBUG
 	/* make sure column lengths are correct */
@@ -1021,10 +1691,10 @@ PRIVATE int init_rows_cols	/* returns status code */
 	}
 	for (col = 0 ; col < n_col ; col++)
 	{
-	    assert (p [col] == 0) ;
+	    ASSERT (p [col] == 0) ;
 	}
 	/* now p is all zero (different than when debugging is turned off) */
-#endif
+#endif /* NDEBUG */
 
 	/* === Compute col pointers ========================================= */
 
@@ -1053,13 +1723,11 @@ PRIVATE int init_rows_cols	/* returns status code */
 		A [(p [*rp++])++] = row ;
 	    }
 	}
-	return (1) ;
-    }
-    else
-    {
-	/* no columns jumbled (this is faster) */
-	return (0) ;
     }
+
+    /* === Done.  Matrix is not (or no longer) jumbled ====================== */
+
+    return (TRUE) ;
 }
 
 
@@ -1078,8 +1746,8 @@ PRIVATE void init_scoring
 
     int n_row,			/* number of rows of A */
     int n_col,			/* number of columns of A */
-    RowInfo Row [],		/* of size n_row+1 */
-    ColInfo Col [],		/* of size n_col+1 */
+    Colamd_Row Row [],		/* of size n_row+1 */
+    Colamd_Col Col [],		/* of size n_col+1 */
     int A [],			/* column form and row form of A */
     int head [],		/* of size n_col+1 */
     double knobs [COLAMD_KNOBS],/* parameters */
@@ -1093,7 +1761,7 @@ PRIVATE void init_scoring
     int c ;			/* a column index */
     int r, row ;		/* a row index */
     int *cp ;			/* a column pointer */
-    int deg ;			/* degree (# entries) of a row or column */
+    int deg ;			/* degree of a row or column */
     int *cp_end ;		/* a pointer to the end of a column */
     int *new_cp ;		/* new column pointer */
     int col_length ;		/* length of pruned column */
@@ -1105,22 +1773,23 @@ PRIVATE void init_scoring
     int min_score ;		/* smallest column score */
     int max_deg ;		/* maximum row degree */
     int next_col ;		/* Used to add to degree list.*/
+
 #ifndef NDEBUG
     int debug_count ;		/* debug only. */
-#endif
+#endif /* NDEBUG */
 
     /* === Extract knobs ==================================================== */
 
     dense_row_count = MAX (0, MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ;
     dense_col_count = MAX (0, MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ;
-    DEBUG0 (("densecount: %d %d\n", dense_row_count, dense_col_count)) ;
+    DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ;
     max_deg = 0 ;
     n_col2 = n_col ;
     n_row2 = n_row ;
 
     /* === Kill empty columns =============================================== */
 
-    /* Put the empty columns at the end in their natural, so that LU */
+    /* Put the empty columns at the end in their natural order, so that LU */
     /* factorization can proceed as far as possible. */
     for (c = n_col-1 ; c >= 0 ; c--)
     {
@@ -1132,7 +1801,7 @@ PRIVATE void init_scoring
 	    KILL_PRINCIPAL_COL (c) ;
 	}
     }
-    DEBUG0 (("null columns killed: %d\n", n_col - n_col2)) ;
+    DEBUG1 (("colamd: null columns killed: %d\n", n_col - n_col2)) ;
 
     /* === Kill dense columns =============================================== */
 
@@ -1159,14 +1828,14 @@ PRIVATE void init_scoring
 	    KILL_PRINCIPAL_COL (c) ;
 	}
     }
-    DEBUG0 (("Dense and null columns killed: %d\n", n_col - n_col2)) ;
+    DEBUG1 (("colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ;
 
     /* === Kill dense and empty rows ======================================== */
 
     for (r = 0 ; r < n_row ; r++)
     {
 	deg = Row [r].shared1.degree ;
-	assert (deg >= 0 && deg <= n_col) ;
+	ASSERT (deg >= 0 && deg <= n_col) ;
 	if (deg > dense_row_count || deg == 0)
 	{
 	    /* kill a dense or empty row */
@@ -1179,7 +1848,7 @@ PRIVATE void init_scoring
 	    max_deg = MAX (max_deg, deg) ;
 	}
     }
-    DEBUG0 (("Dense and null rows killed: %d\n", n_row - n_row2)) ;
+    DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ;
 
     /* === Compute initial column scores ==================================== */
 
@@ -1222,20 +1891,21 @@ PRIVATE void init_scoring
 	{
 	    /* a newly-made null column (all rows in this col are "dense" */
 	    /* and have already been killed) */
-	    DEBUG0 (("Newly null killed: %d\n", c)) ;
+	    DEBUG2 (("Newly null killed: %d\n", c)) ;
 	    Col [c].shared2.order = --n_col2 ;
 	    KILL_PRINCIPAL_COL (c) ;
 	}
 	else
 	{
 	    /* set column length and set score */
-	    assert (score >= 0) ;
-	    assert (score <= n_col) ;
+	    ASSERT (score >= 0) ;
+	    ASSERT (score <= n_col) ;
 	    Col [c].length = col_length ;
 	    Col [c].shared2.score = score ;
 	}
     }
-    DEBUG0 (("Dense, null, and newly-null columns killed: %d\n",n_col-n_col2)) ;
+    DEBUG1 (("colamd: Dense, null, and newly-null columns killed: %d\n",
+    	n_col-n_col2)) ;
 
     /* At this point, all empty rows and columns are dead.  All live columns */
     /* are "clean" (containing no dead rows) and simplicial (no supercolumns */
@@ -1244,13 +1914,13 @@ PRIVATE void init_scoring
 
 #ifndef NDEBUG
     debug_structures (n_row, n_col, Row, Col, A, n_col2) ;
-#endif
+#endif /* NDEBUG */
 
     /* === Initialize degree lists ========================================== */
 
 #ifndef NDEBUG
     debug_count = 0 ;
-#endif
+#endif /* NDEBUG */
 
     /* clear the hash buckets */
     for (c = 0 ; c <= n_col ; c++)
@@ -1272,11 +1942,11 @@ PRIVATE void init_scoring
 
 	    score = Col [c].shared2.score ;
 
-	    assert (min_score >= 0) ;
-	    assert (min_score <= n_col) ;
-	    assert (score >= 0) ;
-	    assert (score <= n_col) ;
-	    assert (head [score] >= EMPTY) ;
+	    ASSERT (min_score >= 0) ;
+	    ASSERT (min_score <= n_col) ;
+	    ASSERT (score >= 0) ;
+	    ASSERT (score <= n_col) ;
+	    ASSERT (head [score] >= EMPTY) ;
 
 	    /* now add this column to dList at proper score location */
 	    next_col = head [score] ;
@@ -1296,16 +1966,17 @@ PRIVATE void init_scoring
 
 #ifndef NDEBUG
 	    debug_count++ ;
-#endif
+#endif /* NDEBUG */
+
 	}
     }
 
 #ifndef NDEBUG
-    DEBUG0 (("Live cols %d out of %d, non-princ: %d\n",
+    DEBUG1 (("colamd: Live cols %d out of %d, non-princ: %d\n",
 	debug_count, n_col, n_col-debug_count)) ;
-    assert (debug_count == n_col2) ;
+    ASSERT (debug_count == n_col2) ;
     debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2, max_deg) ;
-#endif
+#endif /* NDEBUG */
 
     /* === Return number of remaining columns, and max row degree =========== */
 
@@ -1331,9 +2002,9 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 
     int n_row,			/* number of rows of A */
     int n_col,			/* number of columns of A */
-    int Alen,			/* size of A, 2*nnz + elbow_room or larger */
-    RowInfo Row [],		/* of size n_row+1 */
-    ColInfo Col [],		/* of size n_col+1 */
+    int Alen,			/* size of A, 2*nnz + n_col or larger */
+    Colamd_Row Row [],		/* of size n_row+1 */
+    Colamd_Col Col [],		/* of size n_col+1 */
     int A [],			/* column form and row form of A */
     int head [],		/* of size n_col+1 */
     int n_col2,			/* Remaining columns to order */
@@ -1351,8 +2022,8 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
     int *new_cp ;		/* modified column pointer */
     int *new_rp ;		/* modified row pointer */
     int pivot_row_start ;	/* pointer to start of pivot row */
-    int pivot_row_degree ;	/* # of columns in pivot row */
-    int pivot_row_length ;	/* # of supercolumns in pivot row */
+    int pivot_row_degree ;	/* number of columns in pivot row */
+    int pivot_row_length ;	/* number of supercolumns in pivot row */
     int pivot_col_score ;	/* score of pivot column */
     int needed_memory ;		/* free space needed for pivot row */
     int *cp_end ;		/* pointer to the end of a column */
@@ -1368,16 +2039,17 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
     int row_mark ;		/* Row [row].shared2.mark */
     int set_difference ;	/* set difference size of row with pivot row */
     int min_score ;		/* smallest column score */
-    int col_thickness ;		/* "thickness" (# of columns in a supercol) */
+    int col_thickness ;		/* "thickness" (no. of columns in a supercol) */
     int max_mark ;		/* maximum value of tag_mark */
     int pivot_col_thickness ;	/* number of columns represented by pivot col */
     int prev_col ;		/* Used by Dlist operations. */
     int next_col ;		/* Used by Dlist operations. */
     int ngarbage ;		/* number of garbage collections performed */
+
 #ifndef NDEBUG
     int debug_d ;		/* debug loop counter */
     int debug_step = 0 ;	/* debug loop counter */
-#endif
+#endif /* NDEBUG */
 
     /* === Initialization and clear mark ==================================== */
 
@@ -1385,7 +2057,7 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
     tag_mark = clear_mark (n_row, Row) ;
     min_score = 0 ;
     ngarbage = 0 ;
-    DEBUG0 (("Ordering.. n_col2=%d\n", n_col2)) ;
+    DEBUG1 (("colamd: Ordering, n_col2=%d\n", n_col2)) ;
 
     /* === Order the columns ================================================ */
 
@@ -1395,31 +2067,31 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 #ifndef NDEBUG
 	if (debug_step % 100 == 0)
 	{
-	    DEBUG0 (("\n...       Step k: %d out of n_col2: %d\n", k, n_col2)) ;
+	    DEBUG2 (("\n...       Step k: %d out of n_col2: %d\n", k, n_col2)) ;
 	}
 	else
 	{
-	    DEBUG1 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ;
+	    DEBUG3 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ;
 	}
 	debug_step++ ;
 	debug_deg_lists (n_row, n_col, Row, Col, head,
 		min_score, n_col2-k, max_deg) ;
 	debug_matrix (n_row, n_col, Row, Col, A) ;
-#endif
+#endif /* NDEBUG */
 
 	/* === Select pivot column, and order it ============================ */
 
 	/* make sure degree list isn't empty */
-	assert (min_score >= 0) ;
-	assert (min_score <= n_col) ;
-	assert (head [min_score] >= EMPTY) ;
+	ASSERT (min_score >= 0) ;
+	ASSERT (min_score <= n_col) ;
+	ASSERT (head [min_score] >= EMPTY) ;
 
 #ifndef NDEBUG
 	for (debug_d = 0 ; debug_d < min_score ; debug_d++)
 	{
-	    assert (head [debug_d] == EMPTY) ;
+	    ASSERT (head [debug_d] == EMPTY) ;
 	}
-#endif
+#endif /* NDEBUG */
 
 	/* get pivot column from head of minimum degree list */
 	while (head [min_score] == EMPTY && min_score < n_col)
@@ -1427,7 +2099,7 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 	    min_score++ ;
 	}
 	pivot_col = head [min_score] ;
-	assert (pivot_col >= 0 && pivot_col <= n_col) ;
+	ASSERT (pivot_col >= 0 && pivot_col <= n_col) ;
 	next_col = Col [pivot_col].shared4.degree_next ;
 	head [min_score] = next_col ;
 	if (next_col != EMPTY)
@@ -1435,7 +2107,7 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 	    Col [next_col].shared3.prev = EMPTY ;
 	}
 
-	assert (COL_IS_ALIVE (pivot_col)) ;
+	ASSERT (COL_IS_ALIVE (pivot_col)) ;
 	DEBUG3 (("Pivot col: %d\n", pivot_col)) ;
 
 	/* remember score for defrag check */
@@ -1447,7 +2119,7 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 	/* increment order count by column thickness */
 	pivot_col_thickness = Col [pivot_col].shared1.thickness ;
 	k += pivot_col_thickness ;
-	assert (pivot_col_thickness > 0) ;
+	ASSERT (pivot_col_thickness > 0) ;
 
 	/* === Garbage_collection, if necessary ============================= */
 
@@ -1457,12 +2129,13 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 	    pfree = garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
 	    ngarbage++ ;
 	    /* after garbage collection we will have enough */
-	    assert (pfree + needed_memory < Alen) ;
+	    ASSERT (pfree + needed_memory < Alen) ;
 	    /* garbage collection has wiped out the Row[].shared2.mark array */
 	    tag_mark = clear_mark (n_row, Row) ;
+
 #ifndef NDEBUG
 	    debug_matrix (n_row, n_col, Row, Col, A) ;
-#endif
+#endif /* NDEBUG */
 	}
 
 	/* === Compute pivot row pattern ==================================== */
@@ -1502,7 +2175,7 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 		{
 		    /* tag column in pivot row */
 		    Col [col].shared1.thickness = -col_thickness ;
-		    assert (pfree < Alen) ;
+		    ASSERT (pfree < Alen) ;
 		    /* place column in pivot row */
 		    A [pfree++] = col ;
 		    pivot_row_degree += col_thickness ;
@@ -1517,7 +2190,7 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 #ifndef NDEBUG
 	DEBUG3 (("check2\n")) ;
 	debug_mark (n_row, Row, tag_mark, max_mark) ;
-#endif
+#endif /* NDEBUG */
 
 	/* === Kill all rows used to construct pivot row ==================== */
 
@@ -1528,7 +2201,7 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 	{
 	    /* may be killing an already dead row */
 	    row = *cp++ ;
-	    DEBUG2 (("Kill row in pivot col: %d\n", row)) ;
+	    DEBUG3 (("Kill row in pivot col: %d\n", row)) ;
 	    KILL_ROW (row) ;
 	}
 
@@ -1539,15 +2212,15 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 	{
 	    /* pick the "pivot" row arbitrarily (first row in col) */
 	    pivot_row = A [Col [pivot_col].start] ;
-	    DEBUG2 (("Pivotal row is %d\n", pivot_row)) ;
+	    DEBUG3 (("Pivotal row is %d\n", pivot_row)) ;
 	}
 	else
 	{
 	    /* there is no pivot row, since it is of zero length */
 	    pivot_row = EMPTY ;
-	    assert (pivot_row_length == 0) ;
+	    ASSERT (pivot_row_length == 0) ;
 	}
-	assert (Col [pivot_col].length > 0 || pivot_row_length == 0) ;
+	ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ;
 
 	/* === Approximate degree computation =============================== */
 
@@ -1570,23 +2243,23 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 
 	/* === Compute set differences ====================================== */
 
-	DEBUG1 (("** Computing set differences phase. **\n")) ;
+	DEBUG3 (("** Computing set differences phase. **\n")) ;
 
 	/* pivot row is currently dead - it will be revived later. */
 
-	DEBUG2 (("Pivot row: ")) ;
+	DEBUG3 (("Pivot row: ")) ;
 	/* for each column in pivot row */
 	rp = &A [pivot_row_start] ;
 	rp_end = rp + pivot_row_length ;
 	while (rp < rp_end)
 	{
 	    col = *rp++ ;
-	    assert (COL_IS_ALIVE (col) && col != pivot_col) ;
-	    DEBUG2 (("Col: %d\n", col)) ;
+	    ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ;
+	    DEBUG3 (("Col: %d\n", col)) ;
 
 	    /* clear tags used to construct pivot row pattern */
 	    col_thickness = -Col [col].shared1.thickness ;
-	    assert (col_thickness > 0) ;
+	    ASSERT (col_thickness > 0) ;
 	    Col [col].shared1.thickness = col_thickness ;
 
 	    /* === Remove column from degree list =========================== */
@@ -1594,9 +2267,9 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 	    cur_score = Col [col].shared2.score ;
 	    prev_col = Col [col].shared3.prev ;
 	    next_col = Col [col].shared4.degree_next ;
-	    assert (cur_score >= 0) ;
-	    assert (cur_score <= n_col) ;
-	    assert (cur_score >= EMPTY) ;
+	    ASSERT (cur_score >= 0) ;
+	    ASSERT (cur_score <= n_col) ;
+	    ASSERT (cur_score >= EMPTY) ;
 	    if (prev_col == EMPTY)
 	    {
 		head [cur_score] = next_col ;
@@ -1624,21 +2297,21 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 		{
 		    continue ;
 		}
-		assert (row != pivot_row) ;
+		ASSERT (row != pivot_row) ;
 		set_difference = row_mark - tag_mark ;
 		/* check if the row has been seen yet */
 		if (set_difference < 0)
 		{
-		    assert (Row [row].shared1.degree <= max_deg) ;
+		    ASSERT (Row [row].shared1.degree <= max_deg) ;
 		    set_difference = Row [row].shared1.degree ;
 		}
 		/* subtract column thickness from this row's set difference */
 		set_difference -= col_thickness ;
-		assert (set_difference >= 0) ;
+		ASSERT (set_difference >= 0) ;
 		/* absorb this row if the set difference becomes zero */
 		if (set_difference == 0)
 		{
-		    DEBUG1 (("aggressive absorption. Row: %d\n", row)) ;
+		    DEBUG3 (("aggressive absorption. Row: %d\n", row)) ;
 		    KILL_ROW (row) ;
 		}
 		else
@@ -1652,11 +2325,11 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 #ifndef NDEBUG
 	debug_deg_lists (n_row, n_col, Row, Col, head,
 		min_score, n_col2-k-pivot_row_degree, max_deg) ;
-#endif
+#endif /* NDEBUG */
 
 	/* === Add up set differences for each column ======================= */
 
-	DEBUG1 (("** Adding set differences phase. **\n")) ;
+	DEBUG3 (("** Adding set differences phase. **\n")) ;
 
 	/* for each column in pivot row */
 	rp = &A [pivot_row_start] ;
@@ -1665,7 +2338,7 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 	{
 	    /* get a column */
 	    col = *rp++ ;
-	    assert (COL_IS_ALIVE (col) && col != pivot_col) ;
+	    ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ;
 	    hash = 0 ;
 	    cur_score = 0 ;
 	    cp = &A [Col [col].start] ;
@@ -1673,20 +2346,20 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 	    new_cp = cp ;
 	    cp_end = cp + Col [col].length ;
 
-	    DEBUG2 (("Adding set diffs for Col: %d.\n", col)) ;
+	    DEBUG4 (("Adding set diffs for Col: %d.\n", col)) ;
 
 	    while (cp < cp_end)
 	    {
 		/* get a row */
 		row = *cp++ ;
-		assert(row >= 0 && row < n_row) ;
+		ASSERT(row >= 0 && row < n_row) ;
 		row_mark = Row [row].shared2.mark ;
 		/* skip if dead */
 		if (ROW_IS_MARKED_DEAD (row_mark))
 		{
 		    continue ;
 		}
-		assert (row_mark > tag_mark) ;
+		ASSERT (row_mark > tag_mark) ;
 		/* compact the column */
 		*new_cp++ = row ;
 		/* compute hash function */
@@ -1704,11 +2377,11 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 
 	    if (Col [col].length == 0)
 	    {
-		DEBUG1 (("further mass elimination. Col: %d\n", col)) ;
+		DEBUG4 (("further mass elimination. Col: %d\n", col)) ;
 		/* nothing left but the pivot row in this column */
 		KILL_PRINCIPAL_COL (col) ;
 		pivot_row_degree -= Col [col].shared1.thickness ;
-		assert (pivot_row_degree >= 0) ;
+		ASSERT (pivot_row_degree >= 0) ;
 		/* order it */
 		Col [col].shared2.order = k ;
 		/* increment order count by column thickness */
@@ -1718,7 +2391,7 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 	    {
 		/* === Prepare for supercolumn detection ==================== */
 
-		DEBUG2 (("Preparing supercol detection for Col: %d.\n", col)) ;
+		DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ;
 
 		/* save score so far */
 		Col [col].shared2.score = cur_score ;
@@ -1726,8 +2399,8 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 		/* add column to hash table, for supercolumn detection */
 		hash %= n_col + 1 ;
 
-		DEBUG2 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ;
-		assert (hash <= n_col) ;
+		DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ;
+		ASSERT (hash <= n_col) ;
 
 		head_column = head [hash] ;
 		if (head_column > EMPTY)
@@ -1747,7 +2420,7 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 
 		/* save hash function in Col [col].shared3.hash */
 		Col [col].shared3.hash = (int) hash ;
-		assert (COL_IS_ALIVE (col)) ;
+		ASSERT (COL_IS_ALIVE (col)) ;
 	    }
 	}
 
@@ -1755,12 +2428,14 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 
 	/* === Supercolumn detection ======================================== */
 
-	DEBUG1 (("** Supercolumn detection phase. **\n")) ;
+	DEBUG3 (("** Supercolumn detection phase. **\n")) ;
 
 	detect_super_cols (
+
 #ifndef NDEBUG
 		n_col, Row,
-#endif
+#endif /* NDEBUG */
+
 		Col, A, head, pivot_row_start, pivot_row_length) ;
 
 	/* === Kill the pivotal column ====================================== */
@@ -1772,17 +2447,18 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 	tag_mark += (max_deg + 1) ;
 	if (tag_mark >= max_mark)
 	{
-	    DEBUG1 (("clearing tag_mark\n")) ;
+	    DEBUG2 (("clearing tag_mark\n")) ;
 	    tag_mark = clear_mark (n_row, Row) ;
 	}
+
 #ifndef NDEBUG
 	DEBUG3 (("check3\n")) ;
 	debug_mark (n_row, Row, tag_mark, max_mark) ;
-#endif
+#endif /* NDEBUG */
 
 	/* === Finalize the new pivot row, and column scores ================ */
 
-	DEBUG1 (("** Finalize scores phase. **\n")) ;
+	DEBUG3 (("** Finalize scores phase. **\n")) ;
 
 	/* for each column in pivot row */
 	rp = &A [pivot_row_start] ;
@@ -1816,18 +2492,18 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 
 	    /* make sure score is less or equal than the max score */
 	    cur_score = MIN (cur_score, max_score) ;
-	    assert (cur_score >= 0) ;
+	    ASSERT (cur_score >= 0) ;
 
 	    /* store updated score */
 	    Col [col].shared2.score = cur_score ;
 
 	    /* === Place column back in degree list ========================= */
 
-	    assert (min_score >= 0) ;
-	    assert (min_score <= n_col) ;
-	    assert (cur_score >= 0) ;
-	    assert (cur_score <= n_col) ;
-	    assert (head [cur_score] >= EMPTY) ;
+	    ASSERT (min_score >= 0) ;
+	    ASSERT (min_score <= n_col) ;
+	    ASSERT (cur_score >= 0) ;
+	    ASSERT (cur_score <= n_col) ;
+	    ASSERT (head [cur_score] >= EMPTY) ;
 	    next_col = head [cur_score] ;
 	    Col [col].shared4.degree_next = next_col ;
 	    Col [col].shared3.prev = EMPTY ;
@@ -1845,7 +2521,7 @@ PRIVATE int find_ordering	/* return the number of garbage collections */
 #ifndef NDEBUG
 	debug_deg_lists (n_row, n_col, Row, Col, head,
 		min_score, n_col2-k, max_deg) ;
-#endif
+#endif /* NDEBUG */
 
 	/* === Resurrect the new pivot row ================================== */
 
@@ -1889,7 +2565,7 @@ PRIVATE void order_children
     /* === Parameters ======================================================= */
 
     int n_col,			/* number of columns of A */
-    ColInfo Col [],		/* of size n_col+1 */
+    Colamd_Col Col [],		/* of size n_col+1 */
     int p []			/* p [0 ... n_col-1] is the column permutation*/
 )
 {
@@ -1905,7 +2581,7 @@ PRIVATE void order_children
     for (i = 0 ; i < n_col ; i++)
     {
 	/* find an un-ordered non-principal column */
-	assert (COL_IS_DEAD (i)) ;
+	ASSERT (COL_IS_DEAD (i)) ;
 	if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == EMPTY)
 	{
 	    parent = i ;
@@ -1923,7 +2599,7 @@ PRIVATE void order_children
 
 	    do
 	    {
-		assert (Col [c].shared2.order == EMPTY) ;
+		ASSERT (Col [c].shared2.order == EMPTY) ;
 
 		/* order this column */
 		Col [c].shared2.order = order++ ;
@@ -1992,9 +2668,10 @@ PRIVATE void detect_super_cols
 #ifndef NDEBUG
     /* these two parameters are only needed when debugging is enabled: */
     int n_col,			/* number of columns of A */
-    RowInfo Row [],		/* of size n_row+1 */
-#endif
-    ColInfo Col [],		/* of size n_col+1 */
+    Colamd_Row Row [],		/* of size n_row+1 */
+#endif /* NDEBUG */
+
+    Colamd_Col Col [],		/* of size n_col+1 */
     int A [],			/* row indices of A */
     int head [],		/* head of degree lists and hash buckets */
     int row_start,		/* pointer to set of columns to check */
@@ -2003,7 +2680,7 @@ PRIVATE void detect_super_cols
 {
     /* === Local variables ================================================== */
 
-    int hash ;			/* hash # for a column */
+    int hash ;			/* hash value for a column */
     int *rp ;			/* pointer to a row */
     int c ;			/* a column index */
     int super_c ;		/* column index of the column to absorb into */
@@ -2031,7 +2708,7 @@ PRIVATE void detect_super_cols
 
 	/* get hash number for this column */
 	hash = Col [col].shared3.hash ;
-	assert (hash <= n_col) ;
+	ASSERT (hash <= n_col) ;
 
 	/* === Get the first column in this hash bucket ===================== */
 
@@ -2050,8 +2727,8 @@ PRIVATE void detect_super_cols
 	for (super_c = first_col ; super_c != EMPTY ;
 	    super_c = Col [super_c].shared4.hash_next)
 	{
-	    assert (COL_IS_ALIVE (super_c)) ;
-	    assert (Col [super_c].shared3.hash == hash) ;
+	    ASSERT (COL_IS_ALIVE (super_c)) ;
+	    ASSERT (Col [super_c].shared3.hash == hash) ;
 	    length = Col [super_c].length ;
 
 	    /* prev_c is the column preceding column c in the hash bucket */
@@ -2062,9 +2739,9 @@ PRIVATE void detect_super_cols
 	    for (c = Col [super_c].shared4.hash_next ;
 		 c != EMPTY ; c = Col [c].shared4.hash_next)
 	    {
-		assert (c != super_c) ;
-		assert (COL_IS_ALIVE (c)) ;
-		assert (Col [c].shared3.hash == hash) ;
+		ASSERT (c != super_c) ;
+		ASSERT (COL_IS_ALIVE (c)) ;
+		ASSERT (Col [c].shared3.hash == hash) ;
 
 		/* not identical if lengths or scores are different */
 		if (Col [c].length != length ||
@@ -2081,8 +2758,8 @@ PRIVATE void detect_super_cols
 		for (i = 0 ; i < length ; i++)
 		{
 		    /* the columns are "clean" (no dead rows) */
-		    assert (ROW_IS_ALIVE (*cp1))  ;
-		    assert (ROW_IS_ALIVE (*cp2))  ;
+		    ASSERT (ROW_IS_ALIVE (*cp1))  ;
+		    ASSERT (ROW_IS_ALIVE (*cp2))  ;
 		    /* row indices will same order for both supercols, */
 		    /* no gather scatter nessasary */
 		    if (*cp1++ != *cp2++)
@@ -2100,7 +2777,7 @@ PRIVATE void detect_super_cols
 
 		/* === Got it!  two columns are identical =================== */
 
-		assert (Col [c].shared2.score == Col [super_c].shared2.score) ;
+		ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ;
 
 		Col [super_c].shared1.thickness += Col [c].shared1.thickness ;
 		Col [c].shared1.parent = super_c ;
@@ -2147,8 +2824,8 @@ PRIVATE int garbage_collection  /* returns the new value of pfree */
 
     int n_row,			/* number of rows */
     int n_col,			/* number of columns */
-    RowInfo Row [],		/* row info */
-    ColInfo Col [],		/* column info */
+    Colamd_Row Row [],		/* row info */
+    Colamd_Col Col [],		/* column info */
     int A [],			/* A [0 ... Alen-1] holds the matrix */
     int *pfree			/* &A [0] ... pfree is in use */
 )
@@ -2164,10 +2841,10 @@ PRIVATE int garbage_collection  /* returns the new value of pfree */
 
 #ifndef NDEBUG
     int debug_rows ;
-    DEBUG0 (("Defrag..\n")) ;
-    for (psrc = &A[0] ; psrc < pfree ; psrc++) assert (*psrc >= 0) ;
+    DEBUG2 (("Defrag..\n")) ;
+    for (psrc = &A[0] ; psrc < pfree ; psrc++) ASSERT (*psrc >= 0) ;
     debug_rows = 0 ;
-#endif
+#endif /* NDEBUG */
 
     /* === Defragment the columns =========================================== */
 
@@ -2179,7 +2856,7 @@ PRIVATE int garbage_collection  /* returns the new value of pfree */
 	    psrc = &A [Col [c].start] ;
 
 	    /* move and compact the column */
-	    assert (pdest <= psrc) ;
+	    ASSERT (pdest <= psrc) ;
 	    Col [c].start = (int) (pdest - &A [0]) ;
 	    length = Col [c].length ;
 	    for (j = 0 ; j < length ; j++)
@@ -2203,7 +2880,7 @@ PRIVATE int garbage_collection  /* returns the new value of pfree */
 	    if (Row [r].length == 0)
 	    {
 		/* this row is of zero length.  cannot compact it, so kill it */
-		DEBUG0 (("Defrag row kill\n")) ;
+		DEBUG3 (("Defrag row kill\n")) ;
 		KILL_ROW (r) ;
 	    }
 	    else
@@ -2211,12 +2888,14 @@ PRIVATE int garbage_collection  /* returns the new value of pfree */
 		/* save first column index in Row [r].shared2.first_column */
 		psrc = &A [Row [r].start] ;
 		Row [r].shared2.first_column = *psrc ;
-		assert (ROW_IS_ALIVE (r)) ;
+		ASSERT (ROW_IS_ALIVE (r)) ;
 		/* flag the start of the row with the one's complement of row */
 		*psrc = ONES_COMPLEMENT (r) ;
+
 #ifndef NDEBUG
 		debug_rows++ ;
-#endif
+#endif /* NDEBUG */
+
 	    }
 	}
     }
@@ -2232,13 +2911,13 @@ PRIVATE int garbage_collection  /* returns the new value of pfree */
 	    psrc-- ;
 	    /* get the row index */
 	    r = ONES_COMPLEMENT (*psrc) ;
-	    assert (r >= 0 && r < n_row) ;
+	    ASSERT (r >= 0 && r < n_row) ;
 	    /* restore first column index */
 	    *psrc = Row [r].shared2.first_column ;
-	    assert (ROW_IS_ALIVE (r)) ;
+	    ASSERT (ROW_IS_ALIVE (r)) ;
 
 	    /* move and compact the row */
-	    assert (pdest <= psrc) ;
+	    ASSERT (pdest <= psrc) ;
 	    Row [r].start = (int) (pdest - &A [0]) ;
 	    length = Row [r].length ;
 	    for (j = 0 ; j < length ; j++)
@@ -2250,13 +2929,15 @@ PRIVATE int garbage_collection  /* returns the new value of pfree */
 		}
 	    }
 	    Row [r].length = (int) (pdest - &A [Row [r].start]) ;
+
 #ifndef NDEBUG
 	    debug_rows-- ;
-#endif
+#endif /* NDEBUG */
+
 	}
     }
     /* ensure we found all the rows */
-    assert (debug_rows == 0) ;
+    ASSERT (debug_rows == 0) ;
 
     /* === Return the new value of pfree ==================================== */
 
@@ -2278,14 +2959,13 @@ PRIVATE int clear_mark	/* return the new value for tag_mark */
     /* === Parameters ======================================================= */
 
     int n_row,		/* number of rows in A */
-    RowInfo Row []	/* Row [0 ... n_row-1].shared2.mark is set to zero */
+    Colamd_Row Row []	/* Row [0 ... n_row-1].shared2.mark is set to zero */
 )
 {
     /* === Local variables ================================================== */
 
     int r ;
 
-    DEBUG0 (("Clear mark\n")) ;
     for (r = 0 ; r < n_row ; r++)
     {
 	if (ROW_IS_ALIVE (r))
@@ -2298,7 +2978,139 @@ PRIVATE int clear_mark	/* return the new value for tag_mark */
 
 
 /* ========================================================================== */
-/* === debugging routines =================================================== */
+/* === print_report ========================================================= */
+/* ========================================================================== */
+
+PRIVATE void print_report
+(
+    char *method,
+    int stats [COLAMD_STATS]
+)
+{
+
+    int i1, i2, i3 ;
+
+    if (!stats)
+    {
+    	PRINTF ("%s: No statistics available.\n", method) ;
+	return ;
+    }
+
+    i1 = stats [COLAMD_INFO1] ;
+    i2 = stats [COLAMD_INFO2] ;
+    i3 = stats [COLAMD_INFO3] ;
+
+    if (stats [COLAMD_STATUS] >= 0)
+    {
+    	PRINTF ("%s: OK.  ", method) ;
+    }
+    else
+    {
+    	PRINTF ("%s: ERROR.  ", method) ;
+    }
+
+    switch (stats [COLAMD_STATUS])
+    {
+
+	case COLAMD_OK_BUT_JUMBLED:
+
+	    PRINTF ("Matrix has unsorted or duplicate row indices.\n") ;
+
+	    PRINTF ("%s: number of duplicate or out-of-order row indices: %d\n",
+	    method, i3) ;
+
+	    PRINTF ("%s: last seen duplicate or out-of-order row index:   %d\n",
+	    method, INDEX (i2)) ;
+
+	    PRINTF ("%s: last seen in column:                             %d",
+	    method, INDEX (i1)) ;
+
+	    /* no break - fall through to next case instead */
+
+	case COLAMD_OK:
+
+	    PRINTF ("\n") ;
+
+ 	    PRINTF ("%s: number of dense or empty rows ignored:           %d\n",
+	    method, stats [COLAMD_DENSE_ROW]) ;
+
+	    PRINTF ("%s: number of dense or empty columns ignored:        %d\n",
+	    method, stats [COLAMD_DENSE_COL]) ;
+
+	    PRINTF ("%s: number of garbage collections performed:         %d\n",
+	    method, stats [COLAMD_DEFRAG_COUNT]) ;
+	    break ;
+
+	case COLAMD_ERROR_A_not_present:
+
+	    PRINTF ("Array A (row indices of matrix) not present.\n") ;
+	    break ;
+
+	case COLAMD_ERROR_p_not_present:
+
+	    PRINTF ("Array p (column pointers for matrix) not present.\n") ;
+	    break ;
+
+	case COLAMD_ERROR_nrow_negative:
+
+	    PRINTF ("Invalid number of rows (%d).\n", i1) ;
+	    break ;
+
+	case COLAMD_ERROR_ncol_negative:
+
+	    PRINTF ("Invalid number of columns (%d).\n", i1) ;
+	    break ;
+
+	case COLAMD_ERROR_nnz_negative:
+
+	    PRINTF ("Invalid number of nonzero entries (%d).\n", i1) ;
+	    break ;
+
+	case COLAMD_ERROR_p0_nonzero:
+
+	    PRINTF ("Invalid column pointer, p [0] = %d, must be zero.\n", i1) ;
+	    break ;
+
+	case COLAMD_ERROR_A_too_small:
+
+	    PRINTF ("Array A too small.\n") ;
+	    PRINTF ("        Need Alen >= %d, but given only Alen = %d.\n",
+	    i1, i2) ;
+	    break ;
+
+	case COLAMD_ERROR_col_length_negative:
+
+	    PRINTF
+	    ("Column %d has a negative number of nonzero entries (%d).\n",
+	    INDEX (i1), i2) ;
+	    break ;
+
+	case COLAMD_ERROR_row_index_out_of_bounds:
+
+	    PRINTF
+	    ("Row index (row %d) out of bounds (%d to %d) in column %d.\n",
+	    INDEX (i2), INDEX (0), INDEX (i3-1), INDEX (i1)) ;
+	    break ;
+
+	case COLAMD_ERROR_out_of_memory:
+
+	    PRINTF ("Out of memory.\n") ;
+	    break ;
+
+	case COLAMD_ERROR_internal_error:
+
+	    /* if this happens, there is a bug in the code */
+	    PRINTF
+	    ("Internal error! Please contact authors (davis at cise.ufl.edu).\n") ;
+	    break ;
+    }
+}
+
+
+
+
+/* ========================================================================== */
+/* === colamd debugging routines ============================================ */
 /* ========================================================================== */
 
 /* When debugging is disabled, the remainder of this file is ignored. */
@@ -2323,8 +3135,8 @@ PRIVATE void debug_structures
 
     int n_row,
     int n_col,
-    RowInfo Row [],
-    ColInfo Col [],
+    Colamd_Row Row [],
+    Colamd_Col Col [],
     int A [],
     int n_col2
 )
@@ -2351,21 +3163,21 @@ PRIVATE void debug_structures
 	    len = Col [c].length ;
 	    score = Col [c].shared2.score ;
 	    DEBUG4 (("initial live col %5d %5d %5d\n", c, len, score)) ;
-	    assert (len > 0) ;
-	    assert (score >= 0) ;
-	    assert (Col [c].shared1.thickness == 1) ;
+	    ASSERT (len > 0) ;
+	    ASSERT (score >= 0) ;
+	    ASSERT (Col [c].shared1.thickness == 1) ;
 	    cp = &A [Col [c].start] ;
 	    cp_end = cp + len ;
 	    while (cp < cp_end)
 	    {
 		r = *cp++ ;
-		assert (ROW_IS_ALIVE (r)) ;
+		ASSERT (ROW_IS_ALIVE (r)) ;
 	    }
 	}
 	else
 	{
 	    i = Col [c].shared2.order ;
-	    assert (i >= n_col2 && i < n_col) ;
+	    ASSERT (i >= n_col2 && i < n_col) ;
 	}
     }
 
@@ -2376,8 +3188,8 @@ PRIVATE void debug_structures
 	    i = 0 ;
 	    len = Row [r].length ;
 	    deg = Row [r].shared1.degree ;
-	    assert (len > 0) ;
-	    assert (deg > 0) ;
+	    ASSERT (len > 0) ;
+	    ASSERT (deg > 0) ;
 	    rp = &A [Row [r].start] ;
 	    rp_end = rp + len ;
 	    while (rp < rp_end)
@@ -2388,7 +3200,7 @@ PRIVATE void debug_structures
 		    i++ ;
 		}
 	    }
-	    assert (i > 0) ;
+	    ASSERT (i > 0) ;
 	}
     }
 }
@@ -2410,8 +3222,8 @@ PRIVATE void debug_deg_lists
 
     int n_row,
     int n_col,
-    RowInfo Row [],
-    ColInfo Col [],
+    Colamd_Row Row [],
+    Colamd_Col Col [],
     int head [],
     int min_score,
     int should,
@@ -2427,7 +3239,7 @@ PRIVATE void debug_deg_lists
 
     /* === Check the degree lists =========================================== */
 
-    if (n_col > 10000 && debug_colamd <= 0)
+    if (n_col > 10000 && colamd_debug <= 0)
     {
 	return ;
     }
@@ -2445,17 +3257,17 @@ PRIVATE void debug_deg_lists
 	{
 	    DEBUG4 ((" %d", col)) ;
 	    have += Col [col].shared1.thickness ;
-	    assert (COL_IS_ALIVE (col)) ;
+	    ASSERT (COL_IS_ALIVE (col)) ;
 	    col = Col [col].shared4.degree_next ;
 	}
 	DEBUG4 (("\n")) ;
     }
     DEBUG4 (("should %d have %d\n", should, have)) ;
-    assert (should == have) ;
+    ASSERT (should == have) ;
 
     /* === Check the row degrees ============================================ */
 
-    if (n_row > 10000 && debug_colamd <= 0)
+    if (n_row > 10000 && colamd_debug <= 0)
     {
 	return ;
     }
@@ -2463,7 +3275,7 @@ PRIVATE void debug_deg_lists
     {
 	if (ROW_IS_ALIVE (row))
 	{
-	    assert (Row [row].shared1.degree <= max_deg) ;
+	    ASSERT (Row [row].shared1.degree <= max_deg) ;
 	}
     }
 }
@@ -2483,7 +3295,7 @@ PRIVATE void debug_mark
     /* === Parameters ======================================================= */
 
     int n_row,
-    RowInfo Row [],
+    Colamd_Row Row [],
     int tag_mark,
     int max_mark
 )
@@ -2494,14 +3306,14 @@ PRIVATE void debug_mark
 
     /* === Check the Row marks ============================================== */
 
-    assert (tag_mark > 0 && tag_mark <= max_mark) ;
-    if (n_row > 10000 && debug_colamd <= 0)
+    ASSERT (tag_mark > 0 && tag_mark <= max_mark) ;
+    if (n_row > 10000 && colamd_debug <= 0)
     {
 	return ;
     }
     for (r = 0 ; r < n_row ; r++)
     {
-	assert (Row [r].shared2.mark < tag_mark) ;
+	ASSERT (Row [r].shared2.mark < tag_mark) ;
     }
 }
 
@@ -2520,8 +3332,8 @@ PRIVATE void debug_matrix
 
     int n_row,
     int n_col,
-    RowInfo Row [],
-    ColInfo Col [],
+    Colamd_Row Row [],
+    Colamd_Col Col [],
     int A []
 )
 {
@@ -2536,7 +3348,7 @@ PRIVATE void debug_matrix
 
     /* === Dump the rows and columns of the matrix ========================== */
 
-    if (debug_colamd < 3)
+    if (colamd_debug < 3)
     {
 	return ;
     }
@@ -2555,7 +3367,7 @@ PRIVATE void debug_matrix
 	while (rp < rp_end)
 	{
 	    c = *rp++ ;
-	    DEBUG3 (("	%d col %d\n", COL_IS_ALIVE (c), c)) ;
+	    DEBUG4 (("	%d col %d\n", COL_IS_ALIVE (c), c)) ;
 	}
     }
 
@@ -2574,10 +3386,27 @@ PRIVATE void debug_matrix
 	while (cp < cp_end)
 	{
 	    r = *cp++ ;
-	    DEBUG3 (("	%d row %d\n", ROW_IS_ALIVE (r), r)) ;
+	    DEBUG4 (("	%d row %d\n", ROW_IS_ALIVE (r), r)) ;
 	}
     }
 }
 
-#endif
+PRIVATE void colamd_get_debug
+(
+    char *method
+)
+{
+    colamd_debug = 0 ;		/* no debug printing */
+
+    /* get "D" environment variable, which gives the debug printing level */
+    if (getenv ("D"))
+    {
+    	colamd_debug = atoi (getenv ("D")) ;
+    }
+
+    DEBUG0 (("%s: debug version, D = %d (THIS WILL BE SLOW!)\n",
+    	method, colamd_debug)) ;
+}
+
+#endif /* NDEBUG */
 
diff --git a/SRC/colamd.h b/SRC/colamd.h
index 0078398..6e30662 100644
--- a/SRC/colamd.h
+++ b/SRC/colamd.h
@@ -1,49 +1,200 @@
 /* ========================================================================== */
-/* === colamd prototypes and definitions ==================================== */
+/* === colamd/symamd prototypes and definitions ============================= */
 /* ========================================================================== */
 
 /*
-    This is the colamd include file,
+    You must include this file (colamd.h) in any routine that uses colamd,
+    symamd, or the related macros and definitions.
 
-	http://www.cise.ufl.edu/~davis/colamd/colamd.h
+    Authors:
 
-    for use in the colamd.c, colamdmex.c, and symamdmex.c files located at
+	The authors of the code itself are Stefan I. Larimore and Timothy A.
+	Davis (davis at cise.ufl.edu), University of Florida.  The algorithm was
+	developed in collaboration with John Gilbert, Xerox PARC, and Esmond
+	Ng, Oak Ridge National Laboratory.
 
-	http://www.cise.ufl.edu/~davis/colamd/
+    Date:
 
-    See those files for a description of colamd and symamd, and for the
-    copyright notice, which also applies to this file.
+	September 8, 2003.  Version 2.3.
+
+    Acknowledgements:
+
+	This work was supported by the National Science Foundation, under
+	grants DMS-9504974 and DMS-9803599.
+
+    Notice:
+
+	Copyright (c) 1998-2003 by the University of Florida.
+	All Rights Reserved.
+
+	THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
+	EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+
+	Permission is hereby granted to use, copy, modify, and/or distribute
+	this program, provided that the Copyright, this License, and the
+	Availability of the original version is retained on all copies and made
+	accessible to the end-user of any code or package that includes COLAMD
+	or any modified version of COLAMD. 
+
+    Availability:
+
+	The colamd/symamd library is available at
+
+	    http://www.cise.ufl.edu/research/sparse/colamd/
+
+	This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h
+	file.  It is required by the colamd.c, colamdmex.c, and symamdmex.c
+	files, and by any C code that calls the routines whose prototypes are
+	listed below, or that uses the colamd/symamd definitions listed below.
 
-    August 3, 1998.  Version 1.0.
 */
 
+#ifndef COLAMD_H
+#define COLAMD_H
+
+/* ========================================================================== */
+/* === Include files ======================================================== */
+/* ========================================================================== */
+
+#include <stdlib.h>
+
 /* ========================================================================== */
-/* === Definitions ========================================================== */
+/* === Knob and statistics definitions ====================================== */
 /* ========================================================================== */
 
 /* size of the knobs [ ] array.  Only knobs [0..1] are currently used. */
 #define COLAMD_KNOBS 20
 
-/* number of output statistics.  Only A [0..2] are currently used. */
+/* number of output statistics.  Only stats [0..6] are currently used. */
 #define COLAMD_STATS 20
 
-/* knobs [0] and A [0]: dense row knob and output statistic. */
+/* knobs [0] and stats [0]: dense row knob and output statistic. */
 #define COLAMD_DENSE_ROW 0
 
-/* knobs [1] and A [1]: dense column knob and output statistic. */
+/* knobs [1] and stats [1]: dense column knob and output statistic. */
 #define COLAMD_DENSE_COL 1
 
-/* A [2]: memory defragmentation count output statistic */
+/* stats [2]: memory defragmentation count output statistic */
 #define COLAMD_DEFRAG_COUNT 2
 
-/* A [3]: whether or not the input columns were jumbled or had duplicates */
-#define COLAMD_JUMBLED_COLS 3
+/* stats [3]: colamd status:  zero OK, > 0 warning or notice, < 0 error */
+#define COLAMD_STATUS 3
+
+/* stats [4..6]: error info, or info on jumbled columns */ 
+#define COLAMD_INFO1 4
+#define COLAMD_INFO2 5
+#define COLAMD_INFO3 6
+
+/* error codes returned in stats [3]: */
+#define COLAMD_OK				(0)
+#define COLAMD_OK_BUT_JUMBLED			(1)
+#define COLAMD_ERROR_A_not_present		(-1)
+#define COLAMD_ERROR_p_not_present		(-2)
+#define COLAMD_ERROR_nrow_negative		(-3)
+#define COLAMD_ERROR_ncol_negative		(-4)
+#define COLAMD_ERROR_nnz_negative		(-5)
+#define COLAMD_ERROR_p0_nonzero			(-6)
+#define COLAMD_ERROR_A_too_small		(-7)
+#define COLAMD_ERROR_col_length_negative	(-8)
+#define COLAMD_ERROR_row_index_out_of_bounds	(-9)
+#define COLAMD_ERROR_out_of_memory		(-10)
+#define COLAMD_ERROR_internal_error		(-999)
+
+/* ========================================================================== */
+/* === Row and Column structures ============================================ */
+/* ========================================================================== */
+
+/* User code that makes use of the colamd/symamd routines need not directly */
+/* reference these structures.  They are used only for the COLAMD_RECOMMENDED */
+/* macro. */
+
+typedef struct Colamd_Col_struct
+{
+    int start ;		/* index for A of first row in this column, or DEAD */
+			/* if column is dead */
+    int length ;	/* number of rows in this column */
+    union
+    {
+	int thickness ;	/* number of original columns represented by this */
+			/* col, if the column is alive */
+	int parent ;	/* parent in parent tree super-column structure, if */
+			/* the column is dead */
+    } shared1 ;
+    union
+    {
+	int score ;	/* the score used to maintain heap, if col is alive */
+	int order ;	/* pivot ordering of this column, if col is dead */
+    } shared2 ;
+    union
+    {
+	int headhash ;	/* head of a hash bucket, if col is at the head of */
+			/* a degree list */
+	int hash ;	/* hash value, if col is not in a degree list */
+	int prev ;	/* previous column in degree list, if col is in a */
+			/* degree list (but not at the head of a degree list) */
+    } shared3 ;
+    union
+    {
+	int degree_next ;	/* next column, if col is in a degree list */
+	int hash_next ;		/* next column, if col is in a hash list */
+    } shared4 ;
+
+} Colamd_Col ;
+
+typedef struct Colamd_Row_struct
+{
+    int start ;		/* index for A of first col in this row */
+    int length ;	/* number of principal columns in this row */
+    union
+    {
+	int degree ;	/* number of principal & non-principal columns in row */
+	int p ;		/* used as a row pointer in init_rows_cols () */
+    } shared1 ;
+    union
+    {
+	int mark ;	/* for computing set differences and marking dead rows*/
+	int first_column ;/* first column in row (used in garbage collection) */
+    } shared2 ;
+
+} Colamd_Row ;
+
+/* ========================================================================== */
+/* === Colamd recommended memory size ======================================= */
+/* ========================================================================== */
+
+/*
+    The recommended length Alen of the array A passed to colamd is given by
+    the COLAMD_RECOMMENDED (nnz, n_row, n_col) macro.  It returns -1 if any
+    argument is negative.  2*nnz space is required for the row and column
+    indices of the matrix. COLAMD_C (n_col) + COLAMD_R (n_row) space is
+    required for the Col and Row arrays, respectively, which are internal to
+    colamd.  An additional n_col space is the minimal amount of "elbow room",
+    and nnz/5 more space is recommended for run time efficiency.
+
+    This macro is not needed when using symamd.
+
+    Explicit typecast to int added Sept. 23, 2002, COLAMD version 2.2, to avoid
+    gcc -pedantic warning messages.
+*/
+
+#define COLAMD_C(n_col) ((int) (((n_col) + 1) * sizeof (Colamd_Col) / sizeof (int)))
+#define COLAMD_R(n_row) ((int) (((n_row) + 1) * sizeof (Colamd_Row) / sizeof (int)))
+
+#define COLAMD_RECOMMENDED(nnz, n_row, n_col)                                 \
+(                                                                             \
+((nnz) < 0 || (n_row) < 0 || (n_col) < 0)                                     \
+?                                                                             \
+    (-1)                                                                      \
+:                                                                             \
+    (2 * (nnz) + COLAMD_C (n_col) + COLAMD_R (n_row) + (n_col) + ((nnz) / 5)) \
+)
 
 /* ========================================================================== */
 /* === Prototypes of user-callable routines ================================= */
 /* ========================================================================== */
 
-int colamd_recommended		/* returns recommended value of Alen */
+int colamd_recommended		/* returns recommended value of Alen, */
+				/* or (-1) if input arguments are erroneous */
 (
     int nnz,			/* nonzeros in A */
     int n_row,			/* number of rows in A */
@@ -55,13 +206,41 @@ void colamd_set_defaults	/* sets default parameters */
     double knobs [COLAMD_KNOBS]	/* parameter settings for colamd */
 ) ;
 
-int colamd			/* returns TRUE if successful, FALSE otherwise*/
+int colamd			/* returns (1) if successful, (0) otherwise*/
 (				/* A and p arguments are modified on output */
     int n_row,			/* number of rows in A */
     int n_col,			/* number of columns in A */
     int Alen,			/* size of the array A */
     int A [],			/* row indices of A, of size Alen */
     int p [],			/* column pointers of A, of size n_col+1 */
-    double knobs [COLAMD_KNOBS]	/* parameter settings for colamd */
+    double knobs [COLAMD_KNOBS],/* parameter settings for colamd */
+    int stats [COLAMD_STATS]	/* colamd output statistics and error codes */
+) ;
+
+int symamd				/* return (1) if OK, (0) otherwise */
+(
+    int n,				/* number of rows and columns of A */
+    int A [],				/* row indices of A */
+    int p [],				/* column pointers of A */
+    int perm [],			/* output permutation, size n_col+1 */
+    double knobs [COLAMD_KNOBS],	/* parameters (uses defaults if NULL) */
+    int stats [COLAMD_STATS],		/* output statistics and error codes */
+    void * (*allocate) (size_t, size_t),
+    					/* pointer to calloc (ANSI C) or */
+					/* mxCalloc (for MATLAB mexFunction) */
+    void (*release) (void *)
+    					/* pointer to free (ANSI C) or */
+    					/* mxFree (for MATLAB mexFunction) */
+) ;
+
+void colamd_report
+(
+    int stats [COLAMD_STATS]
+) ;
+
+void symamd_report
+(
+    int stats [COLAMD_STATS]
 ) ;
 
+#endif /* COLAMD_H */
diff --git a/SRC/cpanel_bmod.c b/SRC/cpanel_bmod.c
index d73f2c5..c1246a1 100644
--- a/SRC/cpanel_bmod.c
+++ b/SRC/cpanel_bmod.c
@@ -21,7 +21,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 /* 
  * Function prototypes 
diff --git a/SRC/cpanel_dfs.c b/SRC/cpanel_dfs.c
index 6343c0b..f20a8d2 100644
--- a/SRC/cpanel_dfs.c
+++ b/SRC/cpanel_dfs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
 
 void
 cpanel_dfs (
diff --git a/SRC/cpivotL.c b/SRC/cpivotL.c
index f4640b4..db24a0d 100644
--- a/SRC/cpivotL.c
+++ b/SRC/cpivotL.c
@@ -21,7 +21,7 @@
 
 #include <math.h>
 #include <stdlib.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 #undef DEBUG
 
diff --git a/SRC/cpivotgrowth.c b/SRC/cpivotgrowth.c
index a077daa..63bd7bf 100644
--- a/SRC/cpivotgrowth.c
+++ b/SRC/cpivotgrowth.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 #include <math.h>
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
 
 float
 cPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
diff --git a/SRC/cpruneL.c b/SRC/cpruneL.c
index 39d3005..29f20d4 100644
--- a/SRC/cpruneL.c
+++ b/SRC/cpruneL.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
 
 void
 cpruneL(
diff --git a/SRC/creadhb.c b/SRC/creadhb.c
index cc41b06..e437d27 100644
--- a/SRC/creadhb.c
+++ b/SRC/creadhb.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -9,7 +8,7 @@
  */
 #include <stdio.h>
 #include <stdlib.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 
 /* Eat up the rest of the current line */
diff --git a/SRC/csnode_bmod.c b/SRC/csnode_bmod.c
index e552240..041737f 100644
--- a/SRC/csnode_bmod.c
+++ b/SRC/csnode_bmod.c
@@ -19,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 
 /*
diff --git a/SRC/csnode_dfs.c b/SRC/csnode_dfs.c
index 4892c44..19fb10c 100644
--- a/SRC/csnode_dfs.c
+++ b/SRC/csnode_dfs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
 
 int
 csnode_dfs (
diff --git a/SRC/csp_blas2.c b/SRC/csp_blas2.c
index e18e2c1..d1a0a53 100644
--- a/SRC/csp_blas2.c
+++ b/SRC/csp_blas2.c
@@ -11,7 +11,7 @@
  * Purpose:		Sparse BLAS 2, using some dense BLAS 2 operations.
  */
 
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 /* 
  * Function prototypes 
@@ -132,7 +132,8 @@ sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
 		luptr = L_NZ_START(fsupc);
 		nrow = nsupr - nsupc;
 
-	        solve_ops += 4 * nsupc * (nsupc - 1);
+                /* 1 c_div costs 10 flops */
+	        solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc;
 	        solve_ops += 8 * nrow * nsupc;
 
 		if ( nsupc == 1 ) {
@@ -185,7 +186,8 @@ sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
 	    	nsupc = L_FST_SUPC(k+1) - fsupc;
 	    	luptr = L_NZ_START(fsupc);
 		
-    	        solve_ops += 4 * nsupc * (nsupc + 1);
+                /* 1 c_div costs 10 flops */
+    	        solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
 
 		if ( nsupc == 1 ) {
 		    c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
@@ -279,7 +281,8 @@ sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
 		    }
 		}
 
-		solve_ops += 4 * nsupc * (nsupc + 1);
+                /* 1 c_div costs 10 flops */
+		solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
 
 		if ( nsupc == 1 ) {
 		    c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
@@ -358,7 +361,8 @@ sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
 		    }
 		}
 
-		solve_ops += 4 * nsupc * (nsupc + 1);
+                /* 1 c_div costs 10 flops */
+		solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
  
 		if ( nsupc == 1 ) {
                     cc_conj(&temp, &Lval[luptr]);
diff --git a/SRC/csp_blas2.c.bak b/SRC/csp_blas2.c.bak
deleted file mode 100644
index bc6cb28..0000000
--- a/SRC/csp_blas2.c.bak
+++ /dev/null
@@ -1,479 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
- * File name:		csp_blas2.c
- * Purpose:		Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-
-#include "csp_defs.h"
-
-/* 
- * Function prototypes 
- */
-void cusolve(int, int, complex*, complex*);
-void clsolve(int, int, complex*, complex*);
-void cmatvec(int, int, int, complex*, complex*, complex*);
-
-
-int
-sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
-         SuperMatrix *U, complex *x, SuperLUStat_t *stat, int *info)
-{
-/*
- *   Purpose
- *   =======
- *
- *   sp_ctrsv() solves one of the systems of equations   
- *       A*x = b,   or   A'*x = b,
- *   where b and x are n element vectors and A is a sparse unit , or   
- *   non-unit, upper or lower triangular matrix.   
- *   No test for singularity or near-singularity is included in this   
- *   routine. Such tests must be performed before calling this routine.   
- *
- *   Parameters   
- *   ==========   
- *
- *   uplo   - (input) char*
- *            On entry, uplo specifies whether the matrix is an upper or   
- *             lower triangular matrix as follows:   
- *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
- *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
- *
- *   trans  - (input) char*
- *             On entry, trans specifies the equations to be solved as   
- *             follows:   
- *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.   
- *                trans = 'C' or 'c'   A'*x = b.   
- *
- *   diag   - (input) char*
- *             On entry, diag specifies whether or not A is unit   
- *             triangular as follows:   
- *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
- *                diag = 'N' or 'n'   A is not assumed to be unit   
- *                                    triangular.   
- *	     
- *   L       - (input) SuperMatrix*
- *	       The factor L from the factorization Pr*A*Pc=L*U. Use
- *             compressed row subscripts storage for supernodes,
- *             i.e., L has types: Stype = SC, Dtype = SLU_C, Mtype = TRLU.
- *
- *   U       - (input) SuperMatrix*
- *	        The factor U from the factorization Pr*A*Pc=L*U.
- *	        U has types: Stype = NC, Dtype = SLU_C, Mtype = TRU.
- *    
- *   x       - (input/output) complex*
- *             Before entry, the incremented array X must contain the n   
- *             element right-hand side vector b. On exit, X is overwritten 
- *             with the solution vector x.
- *
- *   info    - (output) int*
- *             If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-	 ftcs2 = _cptofcd("N", strlen("N")),
-	 ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    SCformat *Lstore;
-    NCformat *Ustore;
-    complex   *Lval, *Uval;
-    int incx = 1, incy = 1;
-    complex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
-    complex comp_zero = {0.0, 0.0};
-    int nrow;
-    int fsupc, nsupr, nsupc, luptr, istart, irow;
-    int i, k, iptr, jcol;
-    complex *work;
-    flops_t solve_ops;
-
-    /* Test the input parameters */
-    *info = 0;
-    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
-    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") ) *info = -2;
-    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
-    if ( *info ) {
-	i = -(*info);
-	xerbla_("sp_ctrsv", &i);
-	return 0;
-    }
-
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-
-    if ( !(work = complexCalloc(L->nrow)) )
-	ABORT("Malloc fails for work in sp_ctrsv().");
-    
-    if ( lsame_(trans, "N") ) {	/* Form x := inv(A)*x. */
-	
-	if ( lsame_(uplo, "L") ) {
-	    /* Form x := inv(L)*x */
-    	    if ( L->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = 0; k <= Lstore->nsuper; k++) {
-		fsupc = L_FST_SUPC(k);
-		istart = L_SUB_START(fsupc);
-		nsupr = L_SUB_START(fsupc+1) - istart;
-		nsupc = L_FST_SUPC(k+1) - fsupc;
-		luptr = L_NZ_START(fsupc);
-		nrow = nsupr - nsupc;
-
-	        solve_ops += 4 * nsupc * (nsupc - 1);
-	        solve_ops += 8 * nrow * nsupc;
-
-		if ( nsupc == 1 ) {
-		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
-			irow = L_SUB(iptr);
-			++luptr;
-			cc_mult(&comp_zero, &x[fsupc], &Lval[luptr]);
-			c_sub(&x[irow], &x[irow], &comp_zero);
-		    }
-		} else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-		       	&x[fsupc], &incx);
-		
-		    CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#else
-		    ctrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
-		       	&x[fsupc], &incx);
-		
-		    cgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#endif
-#else
-		    clsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-		
-		    cmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
-                             &x[fsupc], &work[0] );
-#endif		
-		
-		    iptr = istart + nsupc;
-		    for (i = 0; i < nrow; ++i, ++iptr) {
-			irow = L_SUB(iptr);
-			c_sub(&x[irow], &x[irow], &work[i]); /* Scatter */
-			work[i] = comp_zero;
-
-		    }
-	 	}
-	    } /* for k ... */
-	    
-	} else {
-	    /* Form x := inv(U)*x */
-	    
-	    if ( U->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = Lstore->nsuper; k >= 0; k--) {
-	    	fsupc = L_FST_SUPC(k);
-	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-	    	nsupc = L_FST_SUPC(k+1) - fsupc;
-	    	luptr = L_NZ_START(fsupc);
-		
-    	        solve_ops += 4 * nsupc * (nsupc + 1);
-
-		if ( nsupc == 1 ) {
-		    c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
-		    for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
-			irow = U_SUB(i);
-			cc_mult(&comp_zero, &x[fsupc], &Uval[i]);
-			c_sub(&x[irow], &x[irow], &comp_zero);
-		    }
-		} else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		    CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
-		       &x[fsupc], &incx);
-#else
-		    ctrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#endif
-#else		
-		    cusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-#endif		
-
-		    for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-		        solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-		    	for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
-				i++) {
-			    irow = U_SUB(i);
-			cc_mult(&comp_zero, &x[jcol], &Uval[i]);
-			c_sub(&x[irow], &x[irow], &comp_zero);
-		    	}
-                    }
-		}
-	    } /* for k ... */
-	    
-	}
-    } else { /* Form x := inv(A')*x */
-	
-	if ( lsame_(uplo, "L") ) {
-	    /* Form x := inv(L')*x */
-    	    if ( L->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = Lstore->nsuper; k >= 0; --k) {
-	    	fsupc = L_FST_SUPC(k);
-	    	istart = L_SUB_START(fsupc);
-	    	nsupr = L_SUB_START(fsupc+1) - istart;
-	    	nsupc = L_FST_SUPC(k+1) - fsupc;
-	    	luptr = L_NZ_START(fsupc);
-
-		solve_ops += 8 * (nsupr - nsupc) * nsupc;
-
-		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-		    iptr = istart + nsupc;
-		    for (i = L_NZ_START(jcol) + nsupc; 
-				i < L_NZ_START(jcol+1); i++) {
-			irow = L_SUB(iptr);
-			cc_mult(&comp_zero, &x[irow], &Lval[i]);
-		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
-			iptr++;
-		    }
-		}
-		
-		if ( nsupc > 1 ) {
-		    solve_ops += 4 * nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-			&x[fsupc], &incx);
-#else
-		    ctrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
-			&x[fsupc], &incx);
-#endif
-		}
-	    }
-	} else {
-	    /* Form x := inv(U')*x */
-	    if ( U->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = 0; k <= Lstore->nsuper; k++) {
-	    	fsupc = L_FST_SUPC(k);
-	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-	    	nsupc = L_FST_SUPC(k+1) - fsupc;
-	    	luptr = L_NZ_START(fsupc);
-
-		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-			irow = U_SUB(i);
-			cc_mult(&comp_zero, &x[irow], &Uval[i]);
-		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
-		    }
-		}
-
-		solve_ops += 4 * nsupc * (nsupc + 1);
-
-		if ( nsupc == 1 ) {
-		    c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
-		} else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-		    CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-			    &x[fsupc], &incx);
-#else
-		    ctrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
-			    &x[fsupc], &incx);
-#endif
-		}
-	    } /* for k ... */
-	}
-    }
-
-    stat->ops[SOLVE] += solve_ops;
-    SUPERLU_FREE(work);
-    return 0;
-}
-
-
-
-int
-sp_cgemv(char *trans, complex alpha, SuperMatrix *A, complex *x, 
-	 int incx, complex beta, complex *y, int incy)
-{
-/*  Purpose   
-    =======   
-
-    sp_cgemv()  performs one of the matrix-vector operations   
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
-    where alpha and beta are scalars, x and y are vectors and A is a
-    sparse A->nrow by A->ncol matrix.   
-
-    Parameters   
-    ==========   
-
-    TRANS  - (input) char*
-             On entry, TRANS specifies the operation to be performed as   
-             follows:   
-                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
-                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
-                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
-
-    ALPHA  - (input) complex
-             On entry, ALPHA specifies the scalar alpha.   
-
-    A      - (input) SuperMatrix*
-             Before entry, the leading m by n part of the array A must   
-             contain the matrix of coefficients.   
-
-    X      - (input) complex*, array of DIMENSION at least   
-             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
-             Before entry, the incremented array X must contain the   
-             vector x.   
-
-    INCX   - (input) int
-             On entry, INCX specifies the increment for the elements of   
-             X. INCX must not be zero.   
-
-    BETA   - (input) complex
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then Y need not be set on input.   
-
-    Y      - (output) complex*,  array of DIMENSION at least   
-             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
-             Before entry with BETA non-zero, the incremented array Y   
-             must contain the vector y. On exit, Y is overwritten by the 
-             updated vector y.
-	     
-    INCY   - (input) int
-             On entry, INCY specifies the increment for the elements of   
-             Y. INCY must not be zero.   
-
-    ==== Sparse Level 2 Blas routine.   
-*/
-
-    /* Local variables */
-    NCformat *Astore;
-    complex   *Aval;
-    int info;
-    complex temp, temp1;
-    int lenx, leny, i, j, irow;
-    int iy, jx, jy, kx, ky;
-    int notran;
-    complex comp_zero = {0.0, 0.0};
-    complex comp_one = {1.0, 0.0};
-
-    notran = lsame_(trans, "N");
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Test the input parameters */
-    info = 0;
-    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
-    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
-    else if (incx == 0) info = 5;
-    else if (incy == 0)	info = 8;
-    if (info != 0) {
-	xerbla_("sp_cgemv ", &info);
-	return 0;
-    }
-
-    /* Quick return if possible. */
-    if (A->nrow == 0 || A->ncol == 0 || 
-	c_eq(&alpha, &comp_zero) && 
-	c_eq(&beta, &comp_one))
-	return 0;
-
-
-    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
-       up the start points in  X  and  Y. */
-    if (lsame_(trans, "N")) {
-	lenx = A->ncol;
-	leny = A->nrow;
-    } else {
-	lenx = A->nrow;
-	leny = A->ncol;
-    }
-    if (incx > 0) kx = 0;
-    else kx =  - (lenx - 1) * incx;
-    if (incy > 0) ky = 0;
-    else ky =  - (leny - 1) * incy;
-
-    /* Start the operations. In this version the elements of A are   
-       accessed sequentially with one pass through A. */
-    /* First form  y := beta*y. */
-    if ( !c_eq(&beta, &comp_one) ) {
-	if (incy == 1) {
-	    if ( c_eq(&beta, &comp_zero) )
-		for (i = 0; i < leny; ++i) y[i] = comp_zero;
-	    else
-		for (i = 0; i < leny; ++i) 
-		  cc_mult(&y[i], &beta, &y[i]);
-	} else {
-	    iy = ky;
-	    if ( c_eq(&beta, &comp_zero) )
-		for (i = 0; i < leny; ++i) {
-		    y[iy] = comp_zero;
-		    iy += incy;
-		}
-	    else
-		for (i = 0; i < leny; ++i) {
-		    cc_mult(&y[iy], &beta, &y[iy]);
-		    iy += incy;
-		}
-	}
-    }
-    
-    if ( c_eq(&alpha, &comp_zero) ) return 0;
-
-    if ( notran ) {
-	/* Form  y := alpha*A*x + y. */
-	jx = kx;
-	if (incy == 1) {
-	    for (j = 0; j < A->ncol; ++j) {
-		if ( !c_eq(&x[jx], &comp_zero) ) {
-		    cc_mult(&temp, &alpha, &x[jx]);
-		    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-			irow = Astore->rowind[i];
-			cc_mult(&temp1, &temp,  &Aval[i]);
-			c_add(&y[irow], &y[irow], &temp1);
-		    }
-		}
-		jx += incx;
-	    }
-	} else {
-	    ABORT("Not implemented.");
-	}
-    } else {
-	/* Form  y := alpha*A'*x + y. */
-	jy = ky;
-	if (incx == 1) {
-	    for (j = 0; j < A->ncol; ++j) {
-		temp = comp_zero;
-		for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-		    irow = Astore->rowind[i];
-		    cc_mult(&temp1, &Aval[i], &x[irow]);
-		    c_add(&temp, &temp, &temp1);
-		}
-		cc_mult(&temp1, &alpha, &temp);
-		c_add(&y[jy], &y[jy], &temp1);
-		jy += incy;
-	    }
-	} else {
-	    ABORT("Not implemented.");
-	}
-    }
-    return 0;    
-} /* sp_cgemv */
-
diff --git a/SRC/csp_blas3.c b/SRC/csp_blas3.c
index 8dc9f5a..e11c11d 100644
--- a/SRC/csp_blas3.c
+++ b/SRC/csp_blas3.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
  * Purpose:		Sparse BLAS3, using some dense BLAS3 operations.
  */
 
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
 
 int
 sp_cgemm(char *transa, char *transb, int m, int n, int k, 
diff --git a/SRC/cutil.c b/SRC/cutil.c
index 4c7f985..bb50965 100644
--- a/SRC/cutil.c
+++ b/SRC/cutil.c
@@ -20,7 +20,7 @@
 */
 
 #include <math.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 void
 cCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 
@@ -240,7 +240,8 @@ cPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
       for (j = c; j < c + nsup; ++j) {
 	d = Astore->nzval_colptr[j];
 	for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
-	  printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d++], dp[d++]);
+	  printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d], dp[d+1]);
+          d += 2;	
 	}
       }
     }
@@ -267,16 +268,19 @@ void
 cPrint_Dense_Matrix(char *what, SuperMatrix *A)
 {
     DNformat     *Astore;
-    register int i;
+    register int i, j, lda = Astore->lda;
     float       *dp;
     
     printf("\nDense matrix %s:\n", what);
     printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
     Astore = (DNformat *) A->Store;
     dp = (float *) Astore->nzval;
-    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
+    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
     printf("\nnzval: ");
-    for (i = 0; i < 2*A->nrow; ++i) printf("%f  ", dp[i]);
+    for (j = 0; j < A->ncol; ++j) {
+        for (i = 0; i < 2*A->nrow; ++i) printf("%f  ", dp[i + j*2*lda]);
+        printf("\n");
+    }
     printf("\n");
     fflush(stdout);
 }
diff --git a/SRC/dcolumn_bmod.c b/SRC/dcolumn_bmod.c
index 43fc18f..0ba9270 100644
--- a/SRC/dcolumn_bmod.c
+++ b/SRC/dcolumn_bmod.c
@@ -21,7 +21,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 /* 
  * Function prototypes 
diff --git a/SRC/dcolumn_dfs.c b/SRC/dcolumn_dfs.c
index 96e6222..c644ef7 100644
--- a/SRC/dcolumn_dfs.c
+++ b/SRC/dcolumn_dfs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 /* What type of supernodes we want */
 #define T2_SUPER
diff --git a/SRC/dcomplex.c b/SRC/dcomplex.c
index 85d2be3..396c15c 100644
--- a/SRC/dcomplex.c
+++ b/SRC/dcomplex.c
@@ -10,8 +10,9 @@
  * This file defines common arithmetic operations for complex type.
  */
 #include <math.h>
+#include <stdlib.h>
 #include <stdio.h>
-#include "dcomplex.h"
+#include "slu_dcomplex.h"
 
 
 /* Complex Division c = a/b */
@@ -26,8 +27,8 @@ void z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b)
 	abi = - abi;
     if( abr <= abi ) {
 	if (abi == 0) {
-	    fprintf(stderr, "z_div.c: division by zero");
-	    exit (-1);
+	    fprintf(stderr, "z_div.c: division by zero\n");
+            exit(-1);
 	}	  
 	ratio = b->r / b->i ;
 	den = b->i * (1 + ratio*ratio);
diff --git a/SRC/dcopy_to_ucol.c b/SRC/dcopy_to_ucol.c
index 09670df..453e33a 100644
--- a/SRC/dcopy_to_ucol.c
+++ b/SRC/dcopy_to_ucol.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
 
 int
 dcopy_to_ucol(
diff --git a/SRC/dgscon.c b/SRC/dgscon.c
index 9c313b5..059da61 100644
--- a/SRC/dgscon.c
+++ b/SRC/dgscon.c
@@ -11,7 +11,7 @@
  * History:     Modified from lapack routines DGECON.
  */
 #include <math.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 void
 dgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/dgsequ.c b/SRC/dgsequ.c
index ac569b9..0daee10 100644
--- a/SRC/dgsequ.c
+++ b/SRC/dgsequ.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
  * History:     Modified from LAPACK routine DGEEQU
  */
 #include <math.h>
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
 
 void
 dgsequ(SuperMatrix *A, double *r, double *c, double *rowcnd,
diff --git a/SRC/dgsrfs.c b/SRC/dgsrfs.c
index 922093b..a71cc38 100644
--- a/SRC/dgsrfs.c
+++ b/SRC/dgsrfs.c
@@ -11,7 +11,7 @@
  * History:     Modified from lapack routine DGERFS
  */
 #include <math.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 void
 dgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/dgssv.c b/SRC/dgssv.c
index e7725a4..99e84dd 100644
--- a/SRC/dgssv.c
+++ b/SRC/dgssv.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -7,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 void
 dgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
diff --git a/SRC/dgssvx.c b/SRC/dgssvx.c
index a4baab3..b7e1a1c 100644
--- a/SRC/dgssvx.c
+++ b/SRC/dgssvx.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 void
 dgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
diff --git a/SRC/dgstrf.c b/SRC/dgstrf.c
index 5696cff..aba4c0b 100644
--- a/SRC/dgstrf.c
+++ b/SRC/dgstrf.c
@@ -19,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 void
 dgstrf (superlu_options_t *options, SuperMatrix *A, double drop_tol,
@@ -182,8 +182,8 @@ dgstrf (superlu_options_t *options, SuperMatrix *A, double drop_tol,
  */
     /* Local working arrays */
     NCPformat *Astore;
-    int       *iperm_r; /* inverse of perm_r;
-			   used when options->Fact == SamePattern_SameRowPerm */
+    int       *iperm_r = NULL; /* inverse of perm_r; used when 
+                                  options->Fact == SamePattern_SameRowPerm */
     int       *iperm_c; /* inverse of perm_c */
     int       *iwork;
     double    *dwork;
diff --git a/SRC/dgstrs.c b/SRC/dgstrs.c
index 4807a51..04cb38d 100644
--- a/SRC/dgstrs.c
+++ b/SRC/dgstrs.c
@@ -19,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 
 /* 
diff --git a/SRC/dgstrs.c.bak b/SRC/dgstrs.c.bak
deleted file mode 100644
index 04efbbd..0000000
--- a/SRC/dgstrs.c.bak
+++ /dev/null
@@ -1,334 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "dsp_defs.h"
-
-
-/* 
- * Function prototypes 
- */
-void dusolve(int, int, double*, double*);
-void dlsolve(int, int, double*, double*);
-void dmatvec(int, int, int, double*, double*, double*);
-
-
-void
-dgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
-        int *perm_c, int *perm_r, SuperMatrix *B,
-        SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * DGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- * DGSTRF.
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *
- * L       (input) SuperMatrix*
- *         The factor L from the factorization Pr*A*Pc=L*U as computed by
- *         dgstrf(). Use compressed row subscripts storage for supernodes,
- *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- *
- * U       (input) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U as computed by
- *         dgstrf(). Use column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
- *
- * perm_c  (input) int*, dimension (L->ncol)
- *	   Column permutation vector, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *
- * perm_r  (input) int*, dimension (L->nrow)
- *         Row permutation vector, which defines the permutation matrix Pr; 
- *         perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation count.
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- * 	   = 0: successful exit
- *	   < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-#endif
-    int      incx = 1, incy = 1;
-#ifdef USE_VENDOR_BLAS
-    double   alpha = 1.0, beta = 1.0;
-    double   *work_col;
-#endif
-    DNformat *Bstore;
-    double   *Bmat;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double   *Lval, *Uval;
-    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
-    int      i, j, k, iptr, jcol, n, ldb, nrhs;
-    double   *work, *rhs_work, *soln;
-    flops_t  solve_ops;
-    void dprint_soln();
-
-    /* Test input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    ldb = Bstore->lda;
-    nrhs = B->ncol;
-    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-	      L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU )
-	*info = -2;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-	      U->Stype != SLU_NC || U->Dtype != SLU_D || U->Mtype != SLU_TRU )
-	*info = -3;
-    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
-	      B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
-	*info = -6;
-    if ( *info ) {
-	i = -(*info);
-	xerbla_("dgstrs", &i);
-	return;
-    }
-
-    n = L->nrow;
-    work = doubleCalloc(n * nrhs);
-    if ( !work ) ABORT("Malloc fails for local work[].");
-    soln = doubleMalloc(n);
-    if ( !soln ) ABORT("Malloc fails for local soln[].");
-
-    Bmat = Bstore->nzval;
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    
-    if ( trans == NOTRANS ) {
-	/* Permute right hand sides to form Pr*B */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-	
-	/* Forward solve PLy=Pb. */
-	for (k = 0; k <= Lstore->nsuper; k++) {
-	    fsupc = L_FST_SUPC(k);
-	    istart = L_SUB_START(fsupc);
-	    nsupr = L_SUB_START(fsupc+1) - istart;
-	    nsupc = L_FST_SUPC(k+1) - fsupc;
-	    nrow = nsupr - nsupc;
-
-	    solve_ops += nsupc * (nsupc - 1) * nrhs;
-	    solve_ops += 2 * nrow * nsupc * nrhs;
-	    
-	    if ( nsupc == 1 ) {
-		for (j = 0; j < nrhs; j++) {
-		    rhs_work = &Bmat[j*ldb];
-	    	    luptr = L_NZ_START(fsupc);
-		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
-			irow = L_SUB(iptr);
-			++luptr;
-			rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
-		    }
-		}
-	    } else {
-	    	luptr = L_NZ_START(fsupc);
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		ftcs1 = _cptofcd("L", strlen("L"));
-		ftcs2 = _cptofcd("N", strlen("N"));
-		ftcs3 = _cptofcd("U", strlen("U"));
-		STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-		
-		SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
-			&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-			&beta, &work[0], &n );
-#else
-		dtrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-		
-		dgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
-			&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-			&beta, &work[0], &n );
-#endif
-		for (j = 0; j < nrhs; j++) {
-		    rhs_work = &Bmat[j*ldb];
-		    work_col = &work[j*n];
-		    iptr = istart + nsupc;
-		    for (i = 0; i < nrow; i++) {
-			irow = L_SUB(iptr);
-			rhs_work[irow] -= work_col[i]; /* Scatter */
-			work_col[i] = 0.0;
-			iptr++;
-		    }
-		}
-#else		
-		for (j = 0; j < nrhs; j++) {
-		    rhs_work = &Bmat[j*ldb];
-		    dlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
-		    dmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
-			    &rhs_work[fsupc], &work[0] );
-
-		    iptr = istart + nsupc;
-		    for (i = 0; i < nrow; i++) {
-			irow = L_SUB(iptr);
-			rhs_work[irow] -= work[i];
-			work[i] = 0.0;
-			iptr++;
-		    }
-		}
-#endif		    
-	    } /* else ... */
-	} /* for L-solve */
-
-#ifdef DEBUG
-  	printf("After L-solve: y=\n");
-	dprint_soln(n, nrhs, Bmat);
-#endif
-
-	/*
-	 * Back solve Ux=y.
-	 */
-	for (k = Lstore->nsuper; k >= 0; k--) {
-	    fsupc = L_FST_SUPC(k);
-	    istart = L_SUB_START(fsupc);
-	    nsupr = L_SUB_START(fsupc+1) - istart;
-	    nsupc = L_FST_SUPC(k+1) - fsupc;
-	    luptr = L_NZ_START(fsupc);
-
-	    solve_ops += nsupc * (nsupc + 1) * nrhs;
-
-	    if ( nsupc == 1 ) {
-		rhs_work = &Bmat[0];
-		for (j = 0; j < nrhs; j++) {
-		    rhs_work[fsupc] /= Lval[luptr];
-		    rhs_work += ldb;
-		}
-	    } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		ftcs1 = _cptofcd("L", strlen("L"));
-		ftcs2 = _cptofcd("U", strlen("U"));
-		ftcs3 = _cptofcd("N", strlen("N"));
-		STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#else
-		dtrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#endif
-#else		
-		for (j = 0; j < nrhs; j++)
-		    dusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-#endif		
-	    }
-
-	    for (j = 0; j < nrhs; ++j) {
-		rhs_work = &Bmat[j*ldb];
-		for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
-		    solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
-			irow = U_SUB(i);
-			rhs_work[irow] -= rhs_work[jcol] * Uval[i];
-		    }
-		}
-	    }
-	    
-	} /* for U-solve */
-
-#ifdef DEBUG
-  	printf("After U-solve: x=\n");
-	dprint_soln(n, nrhs, Bmat);
-#endif
-
-	/* Compute the final solution X := Pc*X. */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-	
-        stat->ops[SOLVE] = solve_ops;
-
-    } else { /* Solve A'*X=B */
-	/* Permute right hand sides to form Pc'*B. */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-
-	stat->ops[SOLVE] = 0;
-	
-	for (k = 0; k < nrhs; ++k) {
-	    
-	    /* Multiply by inv(U'). */
-	    sp_dtrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-	    
-	    /* Multiply by inv(L'). */
-	    sp_dtrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-	    
-	}
-	
-	/* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-
-    }
-
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(soln);
-}
-
-/*
- * Diagnostic print of the solution vector 
- */
-void
-dprint_soln(int n, int nrhs, double *soln)
-{
-    int i;
-
-    for (i = 0; i < n; i++) 
-  	printf("\t%d: %.4f\n", i, soln[i]);
-}
diff --git a/SRC/dgstrsL.c b/SRC/dgstrsL.c
index e13b111..c7f20e5 100644
--- a/SRC/dgstrsL.c
+++ b/SRC/dgstrsL.c
@@ -20,8 +20,8 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
+#include "slu_util.h"
 
 
 /* 
diff --git a/SRC/dlacon.c b/SRC/dlacon.c
index d5dd354..932b891 100644
--- a/SRC/dlacon.c
+++ b/SRC/dlacon.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,7 +7,7 @@
  *
  */
 #include <math.h>
-#include "Cnames.h"
+#include "slu_Cnames.h"
 
 int
 dlacon_(int *n, double *v, double *x, int *isgn, double *est, int *kase)
diff --git a/SRC/dlamch.c b/SRC/dlamch.c
index 12b41b4..53c1f90 100644
--- a/SRC/dlamch.c
+++ b/SRC/dlamch.c
@@ -1,4 +1,6 @@
 #include <stdio.h>
+#include "slu_Cnames.h"
+
 #define TRUE_ (1)
 #define FALSE_ (0)
 #define abs(x) ((x) >= 0 ? (x) : -(x))
diff --git a/SRC/dlangs.c b/SRC/dlangs.c
index 5a642ca..1dd5dfc 100644
--- a/SRC/dlangs.c
+++ b/SRC/dlangs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
  * History:     Modified from lapack routine DLANGE
  */
 #include <math.h>
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
 
 double dlangs(char *norm, SuperMatrix *A)
 {
diff --git a/SRC/dlaqgs.c b/SRC/dlaqgs.c
index 4873a91..6a7a7b8 100644
--- a/SRC/dlaqgs.c
+++ b/SRC/dlaqgs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,7 +11,7 @@
  * History:     Modified from LAPACK routine DLAQGE
  */
 #include <math.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 void
 dlaqgs(SuperMatrix *A, double *r, double *c, 
diff --git a/SRC/dmemory.c b/SRC/dmemory.c
index c2e24a6..c56cb10 100644
--- a/SRC/dmemory.c
+++ b/SRC/dmemory.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 /* Constants */
 #define NO_MEMTYPE  4      /* 0: lusup;
@@ -193,9 +193,10 @@ dLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
 	    dSetupSpace(work, lwork, &Glu->MemModel);
 	}
 	
-#ifdef DEBUG		   
-	printf("dLUMemInit() called: annz %d, MemModel %d\n", 
-		annz, Glu->MemModel);
+#if ( PRNTlevel >= 1 )
+	printf("dLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n", 
+	       FILL, nzlmax, nzumax);
+	fflush(stdout);
 #endif	
 	
 	/* Integer pointers for L\U factors */
@@ -234,6 +235,11 @@ dLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
 		printf("Not enough memory to perform factorization.\n");
 		return (dmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
 	    }
+#if ( PRNTlevel >= 1)
+	    printf("dLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", 
+		   nzlmax, nzumax);
+	    fflush(stdout);
+#endif
 	    lusup = (double *) dexpand( &nzlumax, LUSUP, 0, 0, Glu );
 	    ucol  = (double *) dexpand( &nzumax, UCOL, 0, 0, Glu );
 	    lsub  = (int *)    dexpand( &nzlmax, LSUB, 0, 0, Glu );
@@ -476,8 +482,7 @@ void
     else lword = sizeof(double);
 
     if ( Glu->MemModel == SYSTEM ) {
-	new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
-/*	new_mem = (void *) calloc(new_len, lword); */
+	new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
 	if ( no_expand != 0 ) {
 	    tries = 0;
 	    if ( keep_prev ) {
@@ -487,8 +492,7 @@ void
 		    if ( ++tries > 10 ) return (NULL);
 		    alpha = Reduce(alpha);
 		    new_len = alpha * *prev_len;
-		    new_mem = (void *) SUPERLU_MALLOC(new_len * lword); 
-/*		    new_mem = (void *) calloc(new_len, lword); */
+		    new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
 		}
 	    }
 	    if ( type == LSUB || type == USUB ) {
@@ -641,7 +645,7 @@ dallocateA(int n, int nnz, double **a, int **asub, int **xa)
 double *doubleMalloc(int n)
 {
     double *buf;
-    buf = (double *) SUPERLU_MALLOC(n * sizeof(double)); 
+    buf = (double *) SUPERLU_MALLOC((size_t)n * sizeof(double)); 
     if ( !buf ) {
 	ABORT("SUPERLU_MALLOC failed for buf in doubleMalloc()\n");
     }
@@ -653,7 +657,7 @@ double *doubleCalloc(int n)
     double *buf;
     register int i;
     double zero = 0.0;
-    buf = (double *) SUPERLU_MALLOC(n * sizeof(double));
+    buf = (double *) SUPERLU_MALLOC((size_t)n * sizeof(double));
     if ( !buf ) {
 	ABORT("SUPERLU_MALLOC failed for buf in doubleCalloc()\n");
     }
diff --git a/SRC/dmyblas2.c b/SRC/dmyblas2.c
index e6bbdd1..e02660a 100644
--- a/SRC/dmyblas2.c
+++ b/SRC/dmyblas2.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
diff --git a/SRC/dpanel_bmod.c b/SRC/dpanel_bmod.c
index bdc8c77..0019fe8 100644
--- a/SRC/dpanel_bmod.c
+++ b/SRC/dpanel_bmod.c
@@ -21,7 +21,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 /* 
  * Function prototypes 
diff --git a/SRC/dpanel_dfs.c b/SRC/dpanel_dfs.c
index da2f18c..6a4c742 100644
--- a/SRC/dpanel_dfs.c
+++ b/SRC/dpanel_dfs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
 
 void
 dpanel_dfs (
diff --git a/SRC/dpivotL.c b/SRC/dpivotL.c
index 9263427..bf43e6a 100644
--- a/SRC/dpivotL.c
+++ b/SRC/dpivotL.c
@@ -21,7 +21,7 @@
 
 #include <math.h>
 #include <stdlib.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 #undef DEBUG
 
diff --git a/SRC/dpivotgrowth.c b/SRC/dpivotgrowth.c
index ac943c1..41924f7 100644
--- a/SRC/dpivotgrowth.c
+++ b/SRC/dpivotgrowth.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 #include <math.h>
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
 
 double
 dPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
diff --git a/SRC/dpruneL.c b/SRC/dpruneL.c
index 1e7d53d..c782ca1 100644
--- a/SRC/dpruneL.c
+++ b/SRC/dpruneL.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
 
 void
 dpruneL(
diff --git a/SRC/dreadhb.c b/SRC/dreadhb.c
index 44d6ced..b32a225 100644
--- a/SRC/dreadhb.c
+++ b/SRC/dreadhb.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -9,7 +8,7 @@
  */
 #include <stdio.h>
 #include <stdlib.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 
 /* Eat up the rest of the current line */
diff --git a/SRC/dsnode_bmod.c b/SRC/dsnode_bmod.c
index 3e259ac..ec06144 100644
--- a/SRC/dsnode_bmod.c
+++ b/SRC/dsnode_bmod.c
@@ -19,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 
 /*
diff --git a/SRC/dsnode_dfs.c b/SRC/dsnode_dfs.c
index aab16d7..3823e85 100644
--- a/SRC/dsnode_dfs.c
+++ b/SRC/dsnode_dfs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
 
 int
 dsnode_dfs (
diff --git a/SRC/dsp_blas2.c b/SRC/dsp_blas2.c
index 52162db..420c349 100644
--- a/SRC/dsp_blas2.c
+++ b/SRC/dsp_blas2.c
@@ -11,7 +11,7 @@
  * Purpose:		Sparse BLAS 2, using some dense BLAS 2 operations.
  */
 
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 /* 
  * Function prototypes 
diff --git a/SRC/dsp_blas2.c.bak b/SRC/dsp_blas2.c.bak
deleted file mode 100644
index 5133ec6..0000000
--- a/SRC/dsp_blas2.c.bak
+++ /dev/null
@@ -1,469 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
- * File name:		dsp_blas2.c
- * Purpose:		Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-
-#include "dsp_defs.h"
-
-/* 
- * Function prototypes 
- */
-void dusolve(int, int, double*, double*);
-void dlsolve(int, int, double*, double*);
-void dmatvec(int, int, int, double*, double*, double*);
-
-
-int
-sp_dtrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
-         SuperMatrix *U, double *x, SuperLUStat_t *stat, int *info)
-{
-/*
- *   Purpose
- *   =======
- *
- *   sp_dtrsv() solves one of the systems of equations   
- *       A*x = b,   or   A'*x = b,
- *   where b and x are n element vectors and A is a sparse unit , or   
- *   non-unit, upper or lower triangular matrix.   
- *   No test for singularity or near-singularity is included in this   
- *   routine. Such tests must be performed before calling this routine.   
- *
- *   Parameters   
- *   ==========   
- *
- *   uplo   - (input) char*
- *            On entry, uplo specifies whether the matrix is an upper or   
- *             lower triangular matrix as follows:   
- *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
- *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
- *
- *   trans  - (input) char*
- *             On entry, trans specifies the equations to be solved as   
- *             follows:   
- *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.   
- *                trans = 'C' or 'c'   A'*x = b.   
- *
- *   diag   - (input) char*
- *             On entry, diag specifies whether or not A is unit   
- *             triangular as follows:   
- *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
- *                diag = 'N' or 'n'   A is not assumed to be unit   
- *                                    triangular.   
- *	     
- *   L       - (input) SuperMatrix*
- *	       The factor L from the factorization Pr*A*Pc=L*U. Use
- *             compressed row subscripts storage for supernodes,
- *             i.e., L has types: Stype = SC, Dtype = SLU_D, Mtype = TRLU.
- *
- *   U       - (input) SuperMatrix*
- *	        The factor U from the factorization Pr*A*Pc=L*U.
- *	        U has types: Stype = NC, Dtype = SLU_D, Mtype = TRU.
- *    
- *   x       - (input/output) double*
- *             Before entry, the incremented array X must contain the n   
- *             element right-hand side vector b. On exit, X is overwritten 
- *             with the solution vector x.
- *
- *   info    - (output) int*
- *             If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-	 ftcs2 = _cptofcd("N", strlen("N")),
-	 ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double   *Lval, *Uval;
-    int incx = 1, incy = 1;
-    double alpha = 1.0, beta = 1.0;
-    int nrow;
-    int fsupc, nsupr, nsupc, luptr, istart, irow;
-    int i, k, iptr, jcol;
-    double *work;
-    flops_t solve_ops;
-
-    /* Test the input parameters */
-    *info = 0;
-    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
-    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") ) *info = -2;
-    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
-    if ( *info ) {
-	i = -(*info);
-	xerbla_("sp_dtrsv", &i);
-	return 0;
-    }
-
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-
-    if ( !(work = doubleCalloc(L->nrow)) )
-	ABORT("Malloc fails for work in sp_dtrsv().");
-    
-    if ( lsame_(trans, "N") ) {	/* Form x := inv(A)*x. */
-	
-	if ( lsame_(uplo, "L") ) {
-	    /* Form x := inv(L)*x */
-    	    if ( L->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = 0; k <= Lstore->nsuper; k++) {
-		fsupc = L_FST_SUPC(k);
-		istart = L_SUB_START(fsupc);
-		nsupr = L_SUB_START(fsupc+1) - istart;
-		nsupc = L_FST_SUPC(k+1) - fsupc;
-		luptr = L_NZ_START(fsupc);
-		nrow = nsupr - nsupc;
-
-	        solve_ops += nsupc * (nsupc - 1);
-	        solve_ops += 2 * nrow * nsupc;
-
-		if ( nsupc == 1 ) {
-		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
-			irow = L_SUB(iptr);
-			++luptr;
-			x[irow] -= x[fsupc] * Lval[luptr];
-		    }
-		} else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		    STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-		       	&x[fsupc], &incx);
-		
-		    SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#else
-		    dtrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
-		       	&x[fsupc], &incx);
-		
-		    dgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#endif
-#else
-		    dlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-		
-		    dmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
-                             &x[fsupc], &work[0] );
-#endif		
-		
-		    iptr = istart + nsupc;
-		    for (i = 0; i < nrow; ++i, ++iptr) {
-			irow = L_SUB(iptr);
-			x[irow] -= work[i];	/* Scatter */
-			work[i] = 0.0;
-
-		    }
-	 	}
-	    } /* for k ... */
-	    
-	} else {
-	    /* Form x := inv(U)*x */
-	    
-	    if ( U->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = Lstore->nsuper; k >= 0; k--) {
-	    	fsupc = L_FST_SUPC(k);
-	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-	    	nsupc = L_FST_SUPC(k+1) - fsupc;
-	    	luptr = L_NZ_START(fsupc);
-		
-    	        solve_ops += nsupc * (nsupc + 1);
-
-		if ( nsupc == 1 ) {
-		    x[fsupc] /= Lval[luptr];
-		    for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
-			irow = U_SUB(i);
-			x[irow] -= x[fsupc] * Uval[i];
-		    }
-		} else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		    STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
-		       &x[fsupc], &incx);
-#else
-		    dtrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#endif
-#else		
-		    dusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-#endif		
-
-		    for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-		        solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-		    	for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
-				i++) {
-			    irow = U_SUB(i);
-			    x[irow] -= x[jcol] * Uval[i];
-		    	}
-                    }
-		}
-	    } /* for k ... */
-	    
-	}
-    } else { /* Form x := inv(A')*x */
-	
-	if ( lsame_(uplo, "L") ) {
-	    /* Form x := inv(L')*x */
-    	    if ( L->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = Lstore->nsuper; k >= 0; --k) {
-	    	fsupc = L_FST_SUPC(k);
-	    	istart = L_SUB_START(fsupc);
-	    	nsupr = L_SUB_START(fsupc+1) - istart;
-	    	nsupc = L_FST_SUPC(k+1) - fsupc;
-	    	luptr = L_NZ_START(fsupc);
-
-		solve_ops += 2 * (nsupr - nsupc) * nsupc;
-
-		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-		    iptr = istart + nsupc;
-		    for (i = L_NZ_START(jcol) + nsupc; 
-				i < L_NZ_START(jcol+1); i++) {
-			irow = L_SUB(iptr);
-			x[jcol] -= x[irow] * Lval[i];
-			iptr++;
-		    }
-		}
-		
-		if ( nsupc > 1 ) {
-		    solve_ops += nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-		    STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-			&x[fsupc], &incx);
-#else
-		    dtrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
-			&x[fsupc], &incx);
-#endif
-		}
-	    }
-	} else {
-	    /* Form x := inv(U')*x */
-	    if ( U->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = 0; k <= Lstore->nsuper; k++) {
-	    	fsupc = L_FST_SUPC(k);
-	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-	    	nsupc = L_FST_SUPC(k+1) - fsupc;
-	    	luptr = L_NZ_START(fsupc);
-
-		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-		    solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-			irow = U_SUB(i);
-			x[jcol] -= x[irow] * Uval[i];
-		    }
-		}
-
-		solve_ops += nsupc * (nsupc + 1);
-
-		if ( nsupc == 1 ) {
-		    x[fsupc] /= Lval[luptr];
-		} else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-		    STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-			    &x[fsupc], &incx);
-#else
-		    dtrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
-			    &x[fsupc], &incx);
-#endif
-		}
-	    } /* for k ... */
-	}
-    }
-
-    stat->ops[SOLVE] += solve_ops;
-    SUPERLU_FREE(work);
-    return 0;
-}
-
-
-
-
-int
-sp_dgemv(char *trans, double alpha, SuperMatrix *A, double *x, 
-	 int incx, double beta, double *y, int incy)
-{
-/*  Purpose   
-    =======   
-
-    sp_dgemv()  performs one of the matrix-vector operations   
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
-    where alpha and beta are scalars, x and y are vectors and A is a
-    sparse A->nrow by A->ncol matrix.   
-
-    Parameters   
-    ==========   
-
-    TRANS  - (input) char*
-             On entry, TRANS specifies the operation to be performed as   
-             follows:   
-                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
-                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
-                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
-
-    ALPHA  - (input) double
-             On entry, ALPHA specifies the scalar alpha.   
-
-    A      - (input) SuperMatrix*
-             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
-             Currently, the type of A can be:
-                 Stype = NC or NCP; Dtype = SLU_D; Mtype = GE. 
-             In the future, more general A can be handled.
-
-    X      - (input) double*, array of DIMENSION at least   
-             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
-             Before entry, the incremented array X must contain the   
-             vector x.   
-
-    INCX   - (input) int
-             On entry, INCX specifies the increment for the elements of   
-             X. INCX must not be zero.   
-
-    BETA   - (input) double
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then Y need not be set on input.   
-
-    Y      - (output) double*,  array of DIMENSION at least   
-             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
-             Before entry with BETA non-zero, the incremented array Y   
-             must contain the vector y. On exit, Y is overwritten by the 
-             updated vector y.
-	     
-    INCY   - (input) int
-             On entry, INCY specifies the increment for the elements of   
-             Y. INCY must not be zero.   
-
-    ==== Sparse Level 2 Blas routine.   
-*/
-
-    /* Local variables */
-    NCformat *Astore;
-    double   *Aval;
-    int info;
-    double temp;
-    int lenx, leny, i, j, irow;
-    int iy, jx, jy, kx, ky;
-    int notran;
-
-    notran = lsame_(trans, "N");
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Test the input parameters */
-    info = 0;
-    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
-    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
-    else if (incx == 0) info = 5;
-    else if (incy == 0)	info = 8;
-    if (info != 0) {
-	xerbla_("sp_dgemv ", &info);
-	return 0;
-    }
-
-    /* Quick return if possible. */
-    if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
-	return 0;
-
-    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
-       up the start points in  X  and  Y. */
-    if (lsame_(trans, "N")) {
-	lenx = A->ncol;
-	leny = A->nrow;
-    } else {
-	lenx = A->nrow;
-	leny = A->ncol;
-    }
-    if (incx > 0) kx = 0;
-    else kx =  - (lenx - 1) * incx;
-    if (incy > 0) ky = 0;
-    else ky =  - (leny - 1) * incy;
-
-    /* Start the operations. In this version the elements of A are   
-       accessed sequentially with one pass through A. */
-    /* First form  y := beta*y. */
-    if (beta != 1.) {
-	if (incy == 1) {
-	    if (beta == 0.)
-		for (i = 0; i < leny; ++i) y[i] = 0.;
-	    else
-		for (i = 0; i < leny; ++i) y[i] = beta * y[i];
-	} else {
-	    iy = ky;
-	    if (beta == 0.)
-		for (i = 0; i < leny; ++i) {
-		    y[iy] = 0.;
-		    iy += incy;
-		}
-	    else
-		for (i = 0; i < leny; ++i) {
-		    y[iy] = beta * y[iy];
-		    iy += incy;
-		}
-	}
-    }
-    
-    if (alpha == 0.) return 0;
-
-    if ( notran ) {
-	/* Form  y := alpha*A*x + y. */
-	jx = kx;
-	if (incy == 1) {
-	    for (j = 0; j < A->ncol; ++j) {
-		if (x[jx] != 0.) {
-		    temp = alpha * x[jx];
-		    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-			irow = Astore->rowind[i];
-			y[irow] += temp * Aval[i];
-		    }
-		}
-		jx += incx;
-	    }
-	} else {
-	    ABORT("Not implemented.");
-	}
-    } else {
-	/* Form  y := alpha*A'*x + y. */
-	jy = ky;
-	if (incx == 1) {
-	    for (j = 0; j < A->ncol; ++j) {
-		temp = 0.;
-		for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-		    irow = Astore->rowind[i];
-		    temp += Aval[i] * x[irow];
-		}
-		y[jy] += alpha * temp;
-		jy += incy;
-	    }
-	} else {
-	    ABORT("Not implemented.");
-	}
-    }
-    return 0;
-} /* sp_dgemv */
-
-
-
diff --git a/SRC/dsp_blas3.c b/SRC/dsp_blas3.c
index 7057b79..3aaf3c7 100644
--- a/SRC/dsp_blas3.c
+++ b/SRC/dsp_blas3.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
  * Purpose:		Sparse BLAS3, using some dense BLAS3 operations.
  */
 
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
 
 int
 sp_dgemm(char *transa, char *transb, int m, int n, int k, 
diff --git a/SRC/dutil.c b/SRC/dutil.c
index f4221a8..6956c29 100644
--- a/SRC/dutil.c
+++ b/SRC/dutil.c
@@ -20,7 +20,7 @@
 */
 
 #include <math.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 void
 dCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 
@@ -267,16 +267,19 @@ void
 dPrint_Dense_Matrix(char *what, SuperMatrix *A)
 {
     DNformat     *Astore;
-    register int i;
+    register int i, j, lda = Astore->lda;
     double       *dp;
     
     printf("\nDense matrix %s:\n", what);
     printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
     Astore = (DNformat *) A->Store;
     dp = (double *) Astore->nzval;
-    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
+    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
     printf("\nnzval: ");
-    for (i = 0; i < A->nrow; ++i) printf("%f  ", dp[i]);
+    for (j = 0; j < A->ncol; ++j) {
+        for (i = 0; i < A->nrow; ++i) printf("%f  ", dp[i + j*lda]);
+        printf("\n");
+    }
     printf("\n");
     fflush(stdout);
 }
diff --git a/SRC/dzsum1.c b/SRC/dzsum1.c
index 7c00e5a..1f0c8a8 100644
--- a/SRC/dzsum1.c
+++ b/SRC/dzsum1.c
@@ -1,4 +1,5 @@
-#include "dcomplex.h"
+#include "slu_Cnames.h"
+#include "slu_dcomplex.h"
 
 double dzsum1_(int *n, doublecomplex *cx, int *incx)
 {
diff --git a/SRC/get_perm_c.c b/SRC/get_perm_c.c
index 19dbc7b..fa8fe6b 100644
--- a/SRC/get_perm_c.c
+++ b/SRC/get_perm_c.c
@@ -5,7 +5,7 @@
  * November 15, 1997
  *
  */
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 #include "colamd.h"
 
 extern int  genmmd_(int *, int *, int *, int *, int *, int *, int *, 
@@ -22,12 +22,11 @@ get_colamd(
 	   )
 {
     int Alen, *A, i, info, *p;
-    double *knobs;
+    double knobs[COLAMD_KNOBS];
+    int stats[COLAMD_STATS];
 
     Alen = colamd_recommended(nnz, m, n);
 
-    if ( !(knobs = (double *) SUPERLU_MALLOC(COLAMD_KNOBS * sizeof(double))) )
-        ABORT("Malloc fails for knobs");
     colamd_set_defaults(knobs);
 
     if (!(A = (int *) SUPERLU_MALLOC(Alen * sizeof(int))) )
@@ -36,12 +35,11 @@ get_colamd(
         ABORT("Malloc fails for p[]");
     for (i = 0; i <= n; ++i) p[i] = colptr[i];
     for (i = 0; i < nnz; ++i) A[i] = rowind[i];
-    info = colamd(m, n, Alen, A, p, knobs);
+    info = colamd(m, n, Alen, A, p, knobs, stats);
     if ( info == FALSE ) ABORT("COLAMD failed");
 
     for (i = 0; i < n; ++i) perm_c[p[i]] = i;
 
-    SUPERLU_FREE(knobs);
     SUPERLU_FREE(A);
     SUPERLU_FREE(p);
 }
@@ -434,13 +432,12 @@ get_perm_c(int ispec, SuperMatrix *A, int *perm_c)
 	/* Transform perm_c into 0-based indexing. */
 	for (i = 0; i < n; ++i) --perm_c[i];
 
-	SUPERLU_FREE(b_colptr);
-	SUPERLU_FREE(b_rowind);
 	SUPERLU_FREE(invp);
 	SUPERLU_FREE(dhead);
 	SUPERLU_FREE(qsize);
 	SUPERLU_FREE(llist);
 	SUPERLU_FREE(marker);
+	SUPERLU_FREE(b_rowind);
 
 	t = SuperLU_timer_() - t;
 	/*  printf("call GENMMD time = %8.3f\n", t);*/
@@ -449,4 +446,5 @@ get_perm_c(int ispec, SuperMatrix *A, int *perm_c)
 	for (i = 0; i < n; ++i) perm_c[i] = i;
     }
 
+    SUPERLU_FREE(b_colptr);
 }
diff --git a/SRC/heap_relax_snode.c b/SRC/heap_relax_snode.c
index f731b64..1a40e26 100644
--- a/SRC/heap_relax_snode.c
+++ b/SRC/heap_relax_snode.c
@@ -18,7 +18,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 void
 heap_relax_snode (
diff --git a/SRC/icmax1.c b/SRC/icmax1.c
index b29d5ee..1e254b0 100644
--- a/SRC/icmax1.c
+++ b/SRC/icmax1.c
@@ -1,5 +1,6 @@
 #include <math.h>
-#include "scomplex.h"
+#include "slu_scomplex.h"
+#include "slu_Cnames.h"
 
 int icmax1_(int *n, complex *cx, int *incx)
 {
diff --git a/SRC/izmax1.c b/SRC/izmax1.c
index c31bd66..1a73e1f 100644
--- a/SRC/izmax1.c
+++ b/SRC/izmax1.c
@@ -1,4 +1,6 @@
-#include "dcomplex.h"
+#include <math.h>
+#include "slu_Cnames.h"
+#include "slu_dcomplex.h"
 
 int
 izmax1_(int *n, doublecomplex *cx, int *incx)
@@ -60,17 +62,17 @@ izmax1_(int *n, doublecomplex *cx, int *incx)
 /*     CODE FOR INCREMENT NOT EQUAL TO 1 */
 
     ix = 1;
-    smax = (d__1 = CX(1).r, abs(d__1));
+    smax = (d__1 = CX(1).r, fabs(d__1));
     ix += *incx;
     i__1 = *n;
     for (i = 2; i <= *n; ++i) {
 	i__2 = ix;
-	if ((d__1 = CX(ix).r, abs(d__1)) <= smax) {
+	if ((d__1 = CX(ix).r, fabs(d__1)) <= smax) {
 	    goto L10;
 	}
 	ret_val = i;
 	i__2 = ix;
-	smax = (d__1 = CX(ix).r, abs(d__1));
+	smax = (d__1 = CX(ix).r, fabs(d__1));
 L10:
 	ix += *incx;
 /* L20: */
@@ -80,16 +82,16 @@ L10:
 /*     CODE FOR INCREMENT EQUAL TO 1 */
 
 L30:
-    smax = (d__1 = CX(1).r, abs(d__1));
+    smax = (d__1 = CX(1).r, fabs(d__1));
     i__1 = *n;
     for (i = 2; i <= *n; ++i) {
 	i__2 = i;
-	if ((d__1 = CX(i).r, abs(d__1)) <= smax) {
+	if ((d__1 = CX(i).r, fabs(d__1)) <= smax) {
 	    goto L40;
 	}
 	ret_val = i;
 	i__2 = i;
-	smax = (d__1 = CX(i).r, abs(d__1));
+	smax = (d__1 = CX(i).r, fabs(d__1));
 L40:
 	;
     }
diff --git a/SRC/lsame.c b/SRC/lsame.c
index fba47c6..113c6d0 100644
--- a/SRC/lsame.c
+++ b/SRC/lsame.c
@@ -1,3 +1,5 @@
+#include "slu_Cnames.h"
+
 int lsame_(char *ca, char *cb)
 {
 /*  -- LAPACK auxiliary routine (version 2.0) --   
diff --git a/SRC/memory.c b/SRC/memory.c
index c5e7831..25868f6 100644
--- a/SRC/memory.c
+++ b/SRC/memory.c
@@ -8,7 +8,7 @@
 /** Precision-independent memory-related routines.
     (Shared by [sdcz]memory.c) **/
 
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 
 #if ( DEBUGlevel>=1 )           /* Debug malloc/free. */
@@ -16,6 +16,7 @@ int superlu_malloc_total = 0;
 
 #define PAD_FACTOR  2
 #define DWORD  (sizeof(double)) /* Be sure it's no smaller than double. */
+/* size_t is usually defined as 'unsigned long' */
 
 void *superlu_malloc(size_t size)
 {
@@ -23,7 +24,7 @@ void *superlu_malloc(size_t size)
 
     buf = (char *) malloc(size + DWORD);
     if ( !buf ) {
-	printf("superlu_malloc fails: malloc_total %.0f MB, size %d\n",
+	printf("superlu_malloc fails: malloc_total %.0f MB, size %ld\n",
 	       superlu_malloc_total*1e-6, size);
 	ABORT("superlu_malloc: out of memory");
     }
diff --git a/SRC/colamd.c b/SRC/old_colamd.c
similarity index 100%
copy from SRC/colamd.c
copy to SRC/old_colamd.c
diff --git a/SRC/colamd.h b/SRC/old_colamd.h
similarity index 100%
copy from SRC/colamd.h
copy to SRC/old_colamd.h
diff --git a/SRC/relax_snode.c b/SRC/relax_snode.c
index f2bc0e5..ef20127 100644
--- a/SRC/relax_snode.c
+++ b/SRC/relax_snode.c
@@ -18,7 +18,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 void
 relax_snode (
diff --git a/SRC/scolumn_bmod.c b/SRC/scolumn_bmod.c
index 1914626..303b3d4 100644
--- a/SRC/scolumn_bmod.c
+++ b/SRC/scolumn_bmod.c
@@ -21,7 +21,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 /* 
  * Function prototypes 
diff --git a/SRC/scolumn_dfs.c b/SRC/scolumn_dfs.c
index c29f260..923b25d 100644
--- a/SRC/scolumn_dfs.c
+++ b/SRC/scolumn_dfs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 /* What type of supernodes we want */
 #define T2_SUPER
diff --git a/SRC/scomplex.c b/SRC/scomplex.c
index 8cbbeea..d353281 100644
--- a/SRC/scomplex.c
+++ b/SRC/scomplex.c
@@ -10,8 +10,9 @@
  * This file defines common arithmetic operations for complex type.
  */
 #include <math.h>
+#include <stdlib.h>
 #include <stdio.h>
-#include "scomplex.h"
+#include "slu_scomplex.h"
 
 
 /* Complex Division c = a/b */
@@ -26,8 +27,8 @@ void c_div(complex *c, complex *a, complex *b)
 	abi = - abi;
     if( abr <= abi ) {
 	if (abi == 0) {
-	    fprintf(stderr, "z_div.c: division by zero");
-	    exit (-1);
+	    fprintf(stderr, "z_div.c: division by zero\n");
+            exit(-1);
 	}	  
 	ratio = b->r / b->i ;
 	den = b->i * (1 + ratio*ratio);
diff --git a/SRC/scopy_to_ucol.c b/SRC/scopy_to_ucol.c
index 99de989..daed16a 100644
--- a/SRC/scopy_to_ucol.c
+++ b/SRC/scopy_to_ucol.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
 
 int
 scopy_to_ucol(
diff --git a/SRC/scsum1.c b/SRC/scsum1.c
index 963ba21..5ab8fc2 100644
--- a/SRC/scsum1.c
+++ b/SRC/scsum1.c
@@ -1,4 +1,5 @@
-#include "scomplex.h"
+#include "slu_Cnames.h"
+#include "slu_scomplex.h"
 
 double scsum1_(int *n, complex *cx, int *incx)
 {
diff --git a/SRC/sgscon.c b/SRC/sgscon.c
index f000021..9080602 100644
--- a/SRC/sgscon.c
+++ b/SRC/sgscon.c
@@ -11,7 +11,7 @@
  * History:     Modified from lapack routines SGECON.
  */
 #include <math.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 void
 sgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/sgsequ.c b/SRC/sgsequ.c
index 47408b7..10a2ffc 100644
--- a/SRC/sgsequ.c
+++ b/SRC/sgsequ.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
  * History:     Modified from LAPACK routine SGEEQU
  */
 #include <math.h>
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
 
 void
 sgsequ(SuperMatrix *A, float *r, float *c, float *rowcnd,
diff --git a/SRC/sgsrfs.c b/SRC/sgsrfs.c
index 42c2d98..9d03b04 100644
--- a/SRC/sgsrfs.c
+++ b/SRC/sgsrfs.c
@@ -11,7 +11,7 @@
  * History:     Modified from lapack routine SGERFS
  */
 #include <math.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 void
 sgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/sgssv.c b/SRC/sgssv.c
index 703f1bc..2e622bf 100644
--- a/SRC/sgssv.c
+++ b/SRC/sgssv.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -7,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 void
 sgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
diff --git a/SRC/sgssvx.c b/SRC/sgssvx.c
index 7658789..f611b9f 100644
--- a/SRC/sgssvx.c
+++ b/SRC/sgssvx.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 void
 sgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
diff --git a/SRC/sgstrf.c b/SRC/sgstrf.c
index 93894dc..b65f93d 100644
--- a/SRC/sgstrf.c
+++ b/SRC/sgstrf.c
@@ -19,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 void
 sgstrf (superlu_options_t *options, SuperMatrix *A, float drop_tol,
@@ -182,8 +182,8 @@ sgstrf (superlu_options_t *options, SuperMatrix *A, float drop_tol,
  */
     /* Local working arrays */
     NCPformat *Astore;
-    int       *iperm_r; /* inverse of perm_r;
-			   used when options->Fact == SamePattern_SameRowPerm */
+    int       *iperm_r = NULL; /* inverse of perm_r; used when 
+                                  options->Fact == SamePattern_SameRowPerm */
     int       *iperm_c; /* inverse of perm_c */
     int       *iwork;
     float    *swork;
diff --git a/SRC/sgstrs.c b/SRC/sgstrs.c
index 3a72f5e..367e088 100644
--- a/SRC/sgstrs.c
+++ b/SRC/sgstrs.c
@@ -19,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 
 /* 
diff --git a/SRC/sgstrs.c.bak b/SRC/sgstrs.c.bak
deleted file mode 100644
index f2977eb..0000000
--- a/SRC/sgstrs.c.bak
+++ /dev/null
@@ -1,334 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "ssp_defs.h"
-
-
-/* 
- * Function prototypes 
- */
-void susolve(int, int, float*, float*);
-void slsolve(int, int, float*, float*);
-void smatvec(int, int, int, float*, float*, float*);
-
-
-void
-sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
-        int *perm_c, int *perm_r, SuperMatrix *B,
-        SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * SGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- * SGSTRF.
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *
- * L       (input) SuperMatrix*
- *         The factor L from the factorization Pr*A*Pc=L*U as computed by
- *         sgstrf(). Use compressed row subscripts storage for supernodes,
- *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
- *
- * U       (input) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U as computed by
- *         sgstrf(). Use column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
- *
- * perm_c  (input) int*, dimension (L->ncol)
- *	   Column permutation vector, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *
- * perm_r  (input) int*, dimension (L->nrow)
- *         Row permutation vector, which defines the permutation matrix Pr; 
- *         perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation count.
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- * 	   = 0: successful exit
- *	   < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-#endif
-    int      incx = 1, incy = 1;
-#ifdef USE_VENDOR_BLAS
-    float   alpha = 1.0, beta = 1.0;
-    float   *work_col;
-#endif
-    DNformat *Bstore;
-    float   *Bmat;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    float   *Lval, *Uval;
-    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
-    int      i, j, k, iptr, jcol, n, ldb, nrhs;
-    float   *work, *rhs_work, *soln;
-    flops_t  solve_ops;
-    void sprint_soln();
-
-    /* Test input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    ldb = Bstore->lda;
-    nrhs = B->ncol;
-    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-	      L->Stype != SLU_SC || L->Dtype != SLU_S || L->Mtype != SLU_TRLU )
-	*info = -2;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-	      U->Stype != SLU_NC || U->Dtype != SLU_S || U->Mtype != SLU_TRU )
-	*info = -3;
-    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
-	      B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
-	*info = -6;
-    if ( *info ) {
-	i = -(*info);
-	xerbla_("sgstrs", &i);
-	return;
-    }
-
-    n = L->nrow;
-    work = floatCalloc(n * nrhs);
-    if ( !work ) ABORT("Malloc fails for local work[].");
-    soln = floatMalloc(n);
-    if ( !soln ) ABORT("Malloc fails for local soln[].");
-
-    Bmat = Bstore->nzval;
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    
-    if ( trans == NOTRANS ) {
-	/* Permute right hand sides to form Pr*B */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-	
-	/* Forward solve PLy=Pb. */
-	for (k = 0; k <= Lstore->nsuper; k++) {
-	    fsupc = L_FST_SUPC(k);
-	    istart = L_SUB_START(fsupc);
-	    nsupr = L_SUB_START(fsupc+1) - istart;
-	    nsupc = L_FST_SUPC(k+1) - fsupc;
-	    nrow = nsupr - nsupc;
-
-	    solve_ops += nsupc * (nsupc - 1) * nrhs;
-	    solve_ops += 2 * nrow * nsupc * nrhs;
-	    
-	    if ( nsupc == 1 ) {
-		for (j = 0; j < nrhs; j++) {
-		    rhs_work = &Bmat[j*ldb];
-	    	    luptr = L_NZ_START(fsupc);
-		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
-			irow = L_SUB(iptr);
-			++luptr;
-			rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
-		    }
-		}
-	    } else {
-	    	luptr = L_NZ_START(fsupc);
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		ftcs1 = _cptofcd("L", strlen("L"));
-		ftcs2 = _cptofcd("N", strlen("N"));
-		ftcs3 = _cptofcd("U", strlen("U"));
-		STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-		
-		SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
-			&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-			&beta, &work[0], &n );
-#else
-		strsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-		
-		sgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
-			&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-			&beta, &work[0], &n );
-#endif
-		for (j = 0; j < nrhs; j++) {
-		    rhs_work = &Bmat[j*ldb];
-		    work_col = &work[j*n];
-		    iptr = istart + nsupc;
-		    for (i = 0; i < nrow; i++) {
-			irow = L_SUB(iptr);
-			rhs_work[irow] -= work_col[i]; /* Scatter */
-			work_col[i] = 0.0;
-			iptr++;
-		    }
-		}
-#else		
-		for (j = 0; j < nrhs; j++) {
-		    rhs_work = &Bmat[j*ldb];
-		    slsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
-		    smatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
-			    &rhs_work[fsupc], &work[0] );
-
-		    iptr = istart + nsupc;
-		    for (i = 0; i < nrow; i++) {
-			irow = L_SUB(iptr);
-			rhs_work[irow] -= work[i];
-			work[i] = 0.0;
-			iptr++;
-		    }
-		}
-#endif		    
-	    } /* else ... */
-	} /* for L-solve */
-
-#ifdef DEBUG
-  	printf("After L-solve: y=\n");
-	sprint_soln(n, nrhs, Bmat);
-#endif
-
-	/*
-	 * Back solve Ux=y.
-	 */
-	for (k = Lstore->nsuper; k >= 0; k--) {
-	    fsupc = L_FST_SUPC(k);
-	    istart = L_SUB_START(fsupc);
-	    nsupr = L_SUB_START(fsupc+1) - istart;
-	    nsupc = L_FST_SUPC(k+1) - fsupc;
-	    luptr = L_NZ_START(fsupc);
-
-	    solve_ops += nsupc * (nsupc + 1) * nrhs;
-
-	    if ( nsupc == 1 ) {
-		rhs_work = &Bmat[0];
-		for (j = 0; j < nrhs; j++) {
-		    rhs_work[fsupc] /= Lval[luptr];
-		    rhs_work += ldb;
-		}
-	    } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		ftcs1 = _cptofcd("L", strlen("L"));
-		ftcs2 = _cptofcd("U", strlen("U"));
-		ftcs3 = _cptofcd("N", strlen("N"));
-		STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#else
-		strsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#endif
-#else		
-		for (j = 0; j < nrhs; j++)
-		    susolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-#endif		
-	    }
-
-	    for (j = 0; j < nrhs; ++j) {
-		rhs_work = &Bmat[j*ldb];
-		for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
-		    solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
-			irow = U_SUB(i);
-			rhs_work[irow] -= rhs_work[jcol] * Uval[i];
-		    }
-		}
-	    }
-	    
-	} /* for U-solve */
-
-#ifdef DEBUG
-  	printf("After U-solve: x=\n");
-	sprint_soln(n, nrhs, Bmat);
-#endif
-
-	/* Compute the final solution X := Pc*X. */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-	
-        stat->ops[SOLVE] = solve_ops;
-
-    } else { /* Solve A'*X=B */
-	/* Permute right hand sides to form Pc'*B. */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-
-	stat->ops[SOLVE] = 0;
-	
-	for (k = 0; k < nrhs; ++k) {
-	    
-	    /* Multiply by inv(U'). */
-	    sp_strsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-	    
-	    /* Multiply by inv(L'). */
-	    sp_strsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-	    
-	}
-	
-	/* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-
-    }
-
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(soln);
-}
-
-/*
- * Diagnostic print of the solution vector 
- */
-void
-sprint_soln(int n, int nrhs, float *soln)
-{
-    int i;
-
-    for (i = 0; i < n; i++) 
-  	printf("\t%d: %.4f\n", i, soln[i]);
-}
diff --git a/SRC/slacon.c b/SRC/slacon.c
index 0dafbb2..ccf4d3a 100644
--- a/SRC/slacon.c
+++ b/SRC/slacon.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,7 +7,7 @@
  *
  */
 #include <math.h>
-#include "Cnames.h"
+#include "slu_Cnames.h"
 
 int
 slacon_(int *n, float *v, float *x, int *isgn, float *est, int *kase)
diff --git a/SRC/slamch.c b/SRC/slamch.c
index 4e44ad4..2581c0d 100644
--- a/SRC/slamch.c
+++ b/SRC/slamch.c
@@ -1,4 +1,6 @@
 #include <stdio.h>
+#include "slu_Cnames.h"
+
 #define TRUE_ (1)
 #define FALSE_ (0)
 #define min(a,b) ((a) <= (b) ? (a) : (b))
diff --git a/SRC/slangs.c b/SRC/slangs.c
index 63d0d66..a680db4 100644
--- a/SRC/slangs.c
+++ b/SRC/slangs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
  * History:     Modified from lapack routine SLANGE
  */
 #include <math.h>
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
 
 float slangs(char *norm, SuperMatrix *A)
 {
diff --git a/SRC/slaqgs.c b/SRC/slaqgs.c
index f5287cb..f65931e 100644
--- a/SRC/slaqgs.c
+++ b/SRC/slaqgs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
  * History:     Modified from LAPACK routine SLAQGE
  */
 #include <math.h>
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
 
 void
 slaqgs(SuperMatrix *A, float *r, float *c, 
diff --git a/SRC/Cnames.h b/SRC/slu_Cnames.h
similarity index 82%
rename from SRC/Cnames.h
rename to SRC/slu_Cnames.h
index 7a6d2da..8b8df4e 100644
--- a/SRC/Cnames.h
+++ b/SRC/slu_Cnames.h
@@ -67,6 +67,7 @@
  *           FORTRAN CALL               C DECLARATION
  *           call dgemm(...)           void dgemm__(...)
  */
+/* BLAS */
 #define sasum_    sasum__
 #define isamax_   isamax__
 #define scopy_    scopy__
@@ -127,8 +128,26 @@
 #define zhemv_    zhemv__
 #define zher2_    zher2__
 
+/* LAPACK */
+#define dlamch_   dlamch__
+#define slamch_   slamch__
+#define xerbla_   xerbla__
+#define lsame_    lsame__
+#define dlacon_   dlacon__
+#define slacon_   slacon__
+#define icmax1_   icmax1__
+#define scsum1_   scsum1__
+#define clacon_   clacon__
+#define dzsum1_   dzsum1__
+#define izmax1_   izmax1__
+#define zlacon_   zlacon__
+
+/* Fortran interface */
 #define c_bridge_dgssv_ c_bridge_dgssv__
+#define c_fortran_sgssv_ c_fortran_sgssv__
 #define c_fortran_dgssv_ c_fortran_dgssv__
+#define c_fortran_cgssv_ c_fortran_cgssv__
+#define c_fortran_zgssv_ c_fortran_zgssv__
 #endif
 
 #if (F77_CALL_C == UPCASE)
@@ -139,6 +158,7 @@
  *           FORTRAN CALL               C DECLARATION
  *           call dgemm(...)           void DGEMM(...)
  */
+/* BLAS */
 #define sasum_    SASUM
 #define isamax_   ISAMAX
 #define scopy_    SCOPY
@@ -199,8 +219,26 @@
 #define zhemv_    CHEMV
 #define zher2_    CHER2
 
+/* LAPACK */
+#define dlamch_   DLAMCH
+#define slamch_   SLAMCH
+#define xerbla_   XERBLA
+#define lsame_    LSAME
+#define dlacon_   DLACON
+#define slacon_   SLACON
+#define icmax1_   ICMAX1
+#define scsum1_   SCSUM1
+#define clacon_   CLACON
+#define dzsum1_   DZSUM1
+#define izmax1_   IZMAX1
+#define zlacon_   ZLACON
+
+/* Fortran interface */
 #define c_bridge_dgssv_ C_BRIDGE_DGSSV
+#define c_fortran_sgssv_ C_FORTRAN_SGSSV
 #define c_fortran_dgssv_ C_FORTRAN_DGSSV
+#define c_fortran_cgssv_ C_FORTRAN_CGSSV
+#define c_fortran_zgssv_ C_FORTRAN_ZGSSV
 #endif
 
 #if (F77_CALL_C == NOCHANGE)
@@ -211,6 +249,7 @@
  *           FORTRAN CALL               C DECLARATION
  *           call dgemm(...)           void dgemm(...)
  */
+/* BLAS */
 #define sasum_    sasum
 #define isamax_   isamax
 #define scopy_    scopy
@@ -271,8 +310,26 @@
 #define zhemv_    zhemv
 #define zher2_    zher2
 
+/* LAPACK */
+#define dlamch_   dlamch
+#define slamch_   slamch
+#define xerbla_   xerbla
+#define lsame_    lsame
+#define dlacon_   dlacon
+#define slacon_   slacon
+#define icmax1_   icmax1
+#define scsum1_   scsum1
+#define clacon_   clacon
+#define dzsum1_   dzsum1
+#define izmax1_   izmax1
+#define zlacon_   zlacon
+
+/* Fortran interface */
 #define c_bridge_dgssv_ c_bridge_dgssv
+#define c_fortran_sgssv_ c_fortran_sgssv
 #define c_fortran_dgssv_ c_fortran_dgssv
+#define c_fortran_cgssv_ c_fortran_cgssv
+#define c_fortran_zgssv_ c_fortran_zgssv
 #endif
 
 #endif /* __SUPERLU_CNAMES */
diff --git a/SRC/csp_defs.h b/SRC/slu_cdefs.h
similarity index 99%
rename from SRC/csp_defs.h
rename to SRC/slu_cdefs.h
index d8b5def..31bb482 100644
--- a/SRC/csp_defs.h
+++ b/SRC/slu_cdefs.h
@@ -23,10 +23,10 @@
 /* Define my integer type int_t */
 typedef int int_t; /* default */
 
-#include "Cnames.h"
+#include "slu_Cnames.h"
 #include "supermatrix.h"
-#include "util.h"
-#include "scomplex.h"
+#include "slu_util.h"
+#include "slu_scomplex.h"
 
 
 /*
diff --git a/SRC/dcomplex.h b/SRC/slu_dcomplex.h
similarity index 100%
rename from SRC/dcomplex.h
rename to SRC/slu_dcomplex.h
diff --git a/SRC/dsp_defs.h b/SRC/slu_ddefs.h
similarity index 99%
rename from SRC/dsp_defs.h
rename to SRC/slu_ddefs.h
index e02bd0f..292622f 100644
--- a/SRC/dsp_defs.h
+++ b/SRC/slu_ddefs.h
@@ -23,9 +23,9 @@
 /* Define my integer type int_t */
 typedef int int_t; /* default */
 
-#include "Cnames.h"
+#include "slu_Cnames.h"
 #include "supermatrix.h"
-#include "util.h"
+#include "slu_util.h"
 
 
 /*
diff --git a/SRC/scomplex.h b/SRC/slu_scomplex.h
similarity index 100%
rename from SRC/scomplex.h
rename to SRC/slu_scomplex.h
diff --git a/SRC/ssp_defs.h b/SRC/slu_sdefs.h
similarity index 99%
rename from SRC/ssp_defs.h
rename to SRC/slu_sdefs.h
index 34dd266..b9cff8c 100644
--- a/SRC/ssp_defs.h
+++ b/SRC/slu_sdefs.h
@@ -23,9 +23,9 @@
 /* Define my integer type int_t */
 typedef int int_t; /* default */
 
-#include "Cnames.h"
+#include "slu_Cnames.h"
 #include "supermatrix.h"
-#include "util.h"
+#include "slu_util.h"
 
 
 /*
diff --git a/SRC/util.h b/SRC/slu_util.h
similarity index 97%
rename from SRC/util.h
rename to SRC/slu_util.h
index f16ff89..bf115c6 100644
--- a/SRC/util.h
+++ b/SRC/slu_util.h
@@ -93,7 +93,6 @@ typedef enum {
     RCOND,   /* estimate reciprocal condition number */
     SOLVE,   /* forward and back solves */
     REFINE,  /* perform iterative refinement */
-    FLOAT,   /* time spent in floating-point operations */
     TRSV,    /* fraction of FACT spent in xTRSV */
     GEMV,    /* fraction of FACT spent in xGEMV */
     FERR,    /* estimate error bounds after iterative refinement */
@@ -126,10 +125,9 @@ typedef unsigned char Logical;
  *             assuming that a factorization of a matrix with the same
  *             sparsity	pattern and similar numerical values was performed
  *             prior to this one. Therefore, this factorization will reuse
- *             both row and column scaling factors R and C, and the
- *             both row and column permutation vectors perm_r and perm_c,
- *             distributed data structure set up from the previous symbolic
- *             factorization.
+ *             both row and column scaling factors R and C, both row and
+ *             column permutation vectors perm_r and perm_c, and the
+ *             data structure set up from the previous symbolic factorization.
  *        = FACTORED: On entry, L, U, perm_r and perm_c contain the 
  *              factored form of A. If DiagScale is not NOEQUIL, the matrix
  *              A has been equilibrated with scaling factors R and C.
diff --git a/SRC/zsp_defs.h b/SRC/slu_zdefs.h
similarity index 99%
rename from SRC/zsp_defs.h
rename to SRC/slu_zdefs.h
index f0450d4..a201ff5 100644
--- a/SRC/zsp_defs.h
+++ b/SRC/slu_zdefs.h
@@ -23,10 +23,10 @@
 /* Define my integer type int_t */
 typedef int int_t; /* default */
 
-#include "Cnames.h"
+#include "slu_Cnames.h"
 #include "supermatrix.h"
-#include "util.h"
-#include "dcomplex.h"
+#include "slu_util.h"
+#include "slu_dcomplex.h"
 
 
 /*
diff --git a/SRC/smemory.c b/SRC/smemory.c
index 79da748..0278615 100644
--- a/SRC/smemory.c
+++ b/SRC/smemory.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 /* Constants */
 #define NO_MEMTYPE  4      /* 0: lusup;
@@ -193,9 +193,10 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
 	    sSetupSpace(work, lwork, &Glu->MemModel);
 	}
 	
-#ifdef DEBUG		   
-	printf("sLUMemInit() called: annz %d, MemModel %d\n", 
-		annz, Glu->MemModel);
+#if ( PRNTlevel >= 1 )
+	printf("sLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n", 
+	       FILL, nzlmax, nzumax);
+	fflush(stdout);
 #endif	
 	
 	/* Integer pointers for L\U factors */
@@ -234,6 +235,11 @@ sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
 		printf("Not enough memory to perform factorization.\n");
 		return (smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
 	    }
+#if ( PRNTlevel >= 1)
+	    printf("sLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", 
+		   nzlmax, nzumax);
+	    fflush(stdout);
+#endif
 	    lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
 	    ucol  = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
 	    lsub  = (int *)    sexpand( &nzlmax, LSUB, 0, 0, Glu );
@@ -476,8 +482,7 @@ void
     else lword = sizeof(float);
 
     if ( Glu->MemModel == SYSTEM ) {
-	new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
-/*	new_mem = (void *) calloc(new_len, lword); */
+	new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
 	if ( no_expand != 0 ) {
 	    tries = 0;
 	    if ( keep_prev ) {
@@ -487,8 +492,7 @@ void
 		    if ( ++tries > 10 ) return (NULL);
 		    alpha = Reduce(alpha);
 		    new_len = alpha * *prev_len;
-		    new_mem = (void *) SUPERLU_MALLOC(new_len * lword); 
-/*		    new_mem = (void *) calloc(new_len, lword); */
+		    new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
 		}
 	    }
 	    if ( type == LSUB || type == USUB ) {
@@ -641,7 +645,7 @@ sallocateA(int n, int nnz, float **a, int **asub, int **xa)
 float *floatMalloc(int n)
 {
     float *buf;
-    buf = (float *) SUPERLU_MALLOC(n * sizeof(float)); 
+    buf = (float *) SUPERLU_MALLOC((size_t)n * sizeof(float)); 
     if ( !buf ) {
 	ABORT("SUPERLU_MALLOC failed for buf in floatMalloc()\n");
     }
@@ -653,7 +657,7 @@ float *floatCalloc(int n)
     float *buf;
     register int i;
     float zero = 0.0;
-    buf = (float *) SUPERLU_MALLOC(n * sizeof(float));
+    buf = (float *) SUPERLU_MALLOC((size_t)n * sizeof(float));
     if ( !buf ) {
 	ABORT("SUPERLU_MALLOC failed for buf in floatCalloc()\n");
     }
diff --git a/SRC/smyblas2.c b/SRC/smyblas2.c
index 729e17f..00f65c5 100644
--- a/SRC/smyblas2.c
+++ b/SRC/smyblas2.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
diff --git a/SRC/sp_coletree.c b/SRC/sp_coletree.c
index 8f65623..1685661 100644
--- a/SRC/sp_coletree.c
+++ b/SRC/sp_coletree.c
@@ -3,7 +3,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 /* 
  *  Implementation of disjoint set union routines.
diff --git a/SRC/sp_ienv.c b/SRC/sp_ienv.c
index 516b7b2..0680e02 100644
--- a/SRC/sp_ienv.c
+++ b/SRC/sp_ienv.c
@@ -2,6 +2,8 @@
  * File name:		sp_ienv.c
  * History:             Modified from lapack routine ILAENV
  */
+#include "slu_Cnames.h"
+
 int
 sp_ienv(int ispec)
 {
diff --git a/SRC/sp_preorder.c b/SRC/sp_preorder.c
index 17ad84c..524a8ee 100644
--- a/SRC/sp_preorder.c
+++ b/SRC/sp_preorder.c
@@ -1,4 +1,4 @@
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 void
 sp_preorder(superlu_options_t *options,  SuperMatrix *A, int *perm_c, 
diff --git a/SRC/spanel_bmod.c b/SRC/spanel_bmod.c
index 7cfbc28..e98ac9b 100644
--- a/SRC/spanel_bmod.c
+++ b/SRC/spanel_bmod.c
@@ -21,7 +21,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 /* 
  * Function prototypes 
diff --git a/SRC/spanel_dfs.c b/SRC/spanel_dfs.c
index 7f5f3c7..cb4417c 100644
--- a/SRC/spanel_dfs.c
+++ b/SRC/spanel_dfs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
 
 void
 spanel_dfs (
diff --git a/SRC/spivotL.c b/SRC/spivotL.c
index 6243065..9c300a4 100644
--- a/SRC/spivotL.c
+++ b/SRC/spivotL.c
@@ -21,7 +21,7 @@
 
 #include <math.h>
 #include <stdlib.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 #undef DEBUG
 
diff --git a/SRC/spivotgrowth.c b/SRC/spivotgrowth.c
index 188ddcc..6aac212 100644
--- a/SRC/spivotgrowth.c
+++ b/SRC/spivotgrowth.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 #include <math.h>
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
 
 float
 sPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
diff --git a/SRC/spruneL.c b/SRC/spruneL.c
index 5970270..6a32424 100644
--- a/SRC/spruneL.c
+++ b/SRC/spruneL.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
 
 void
 spruneL(
diff --git a/SRC/sreadhb.c b/SRC/sreadhb.c
index 9f8dd03..e0cf193 100644
--- a/SRC/sreadhb.c
+++ b/SRC/sreadhb.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -9,7 +8,7 @@
  */
 #include <stdio.h>
 #include <stdlib.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 
 /* Eat up the rest of the current line */
diff --git a/SRC/ssnode_bmod.c b/SRC/ssnode_bmod.c
index 1b11eda..6ba0f52 100644
--- a/SRC/ssnode_bmod.c
+++ b/SRC/ssnode_bmod.c
@@ -19,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 
 /*
diff --git a/SRC/ssnode_dfs.c b/SRC/ssnode_dfs.c
index 95a51be..eb14fc0 100644
--- a/SRC/ssnode_dfs.c
+++ b/SRC/ssnode_dfs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
 
 int
 ssnode_dfs (
diff --git a/SRC/ssp_blas2.c b/SRC/ssp_blas2.c
index b7917c9..174db34 100644
--- a/SRC/ssp_blas2.c
+++ b/SRC/ssp_blas2.c
@@ -11,7 +11,7 @@
  * Purpose:		Sparse BLAS 2, using some dense BLAS 2 operations.
  */
 
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 /* 
  * Function prototypes 
diff --git a/SRC/ssp_blas2.c.bak b/SRC/ssp_blas2.c.bak
deleted file mode 100644
index 994de34..0000000
--- a/SRC/ssp_blas2.c.bak
+++ /dev/null
@@ -1,469 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
- * File name:		ssp_blas2.c
- * Purpose:		Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-
-#include "ssp_defs.h"
-
-/* 
- * Function prototypes 
- */
-void susolve(int, int, float*, float*);
-void slsolve(int, int, float*, float*);
-void smatvec(int, int, int, float*, float*, float*);
-
-
-int
-sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
-         SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info)
-{
-/*
- *   Purpose
- *   =======
- *
- *   sp_strsv() solves one of the systems of equations   
- *       A*x = b,   or   A'*x = b,
- *   where b and x are n element vectors and A is a sparse unit , or   
- *   non-unit, upper or lower triangular matrix.   
- *   No test for singularity or near-singularity is included in this   
- *   routine. Such tests must be performed before calling this routine.   
- *
- *   Parameters   
- *   ==========   
- *
- *   uplo   - (input) char*
- *            On entry, uplo specifies whether the matrix is an upper or   
- *             lower triangular matrix as follows:   
- *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
- *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
- *
- *   trans  - (input) char*
- *             On entry, trans specifies the equations to be solved as   
- *             follows:   
- *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.   
- *                trans = 'C' or 'c'   A'*x = b.   
- *
- *   diag   - (input) char*
- *             On entry, diag specifies whether or not A is unit   
- *             triangular as follows:   
- *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
- *                diag = 'N' or 'n'   A is not assumed to be unit   
- *                                    triangular.   
- *	     
- *   L       - (input) SuperMatrix*
- *	       The factor L from the factorization Pr*A*Pc=L*U. Use
- *             compressed row subscripts storage for supernodes,
- *             i.e., L has types: Stype = SC, Dtype = SLU_S, Mtype = TRLU.
- *
- *   U       - (input) SuperMatrix*
- *	        The factor U from the factorization Pr*A*Pc=L*U.
- *	        U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.
- *    
- *   x       - (input/output) float*
- *             Before entry, the incremented array X must contain the n   
- *             element right-hand side vector b. On exit, X is overwritten 
- *             with the solution vector x.
- *
- *   info    - (output) int*
- *             If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-	 ftcs2 = _cptofcd("N", strlen("N")),
-	 ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    SCformat *Lstore;
-    NCformat *Ustore;
-    float   *Lval, *Uval;
-    int incx = 1, incy = 1;
-    float alpha = 1.0, beta = 1.0;
-    int nrow;
-    int fsupc, nsupr, nsupc, luptr, istart, irow;
-    int i, k, iptr, jcol;
-    float *work;
-    flops_t solve_ops;
-
-    /* Test the input parameters */
-    *info = 0;
-    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
-    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") ) *info = -2;
-    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
-    if ( *info ) {
-	i = -(*info);
-	xerbla_("sp_strsv", &i);
-	return 0;
-    }
-
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-
-    if ( !(work = floatCalloc(L->nrow)) )
-	ABORT("Malloc fails for work in sp_strsv().");
-    
-    if ( lsame_(trans, "N") ) {	/* Form x := inv(A)*x. */
-	
-	if ( lsame_(uplo, "L") ) {
-	    /* Form x := inv(L)*x */
-    	    if ( L->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = 0; k <= Lstore->nsuper; k++) {
-		fsupc = L_FST_SUPC(k);
-		istart = L_SUB_START(fsupc);
-		nsupr = L_SUB_START(fsupc+1) - istart;
-		nsupc = L_FST_SUPC(k+1) - fsupc;
-		luptr = L_NZ_START(fsupc);
-		nrow = nsupr - nsupc;
-
-	        solve_ops += nsupc * (nsupc - 1);
-	        solve_ops += 2 * nrow * nsupc;
-
-		if ( nsupc == 1 ) {
-		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
-			irow = L_SUB(iptr);
-			++luptr;
-			x[irow] -= x[fsupc] * Lval[luptr];
-		    }
-		} else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		    STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-		       	&x[fsupc], &incx);
-		
-		    SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#else
-		    strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
-		       	&x[fsupc], &incx);
-		
-		    sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#endif
-#else
-		    slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-		
-		    smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
-                             &x[fsupc], &work[0] );
-#endif		
-		
-		    iptr = istart + nsupc;
-		    for (i = 0; i < nrow; ++i, ++iptr) {
-			irow = L_SUB(iptr);
-			x[irow] -= work[i];	/* Scatter */
-			work[i] = 0.0;
-
-		    }
-	 	}
-	    } /* for k ... */
-	    
-	} else {
-	    /* Form x := inv(U)*x */
-	    
-	    if ( U->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = Lstore->nsuper; k >= 0; k--) {
-	    	fsupc = L_FST_SUPC(k);
-	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-	    	nsupc = L_FST_SUPC(k+1) - fsupc;
-	    	luptr = L_NZ_START(fsupc);
-		
-    	        solve_ops += nsupc * (nsupc + 1);
-
-		if ( nsupc == 1 ) {
-		    x[fsupc] /= Lval[luptr];
-		    for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
-			irow = U_SUB(i);
-			x[irow] -= x[fsupc] * Uval[i];
-		    }
-		} else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		    STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
-		       &x[fsupc], &incx);
-#else
-		    strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#endif
-#else		
-		    susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-#endif		
-
-		    for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-		        solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-		    	for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
-				i++) {
-			    irow = U_SUB(i);
-			    x[irow] -= x[jcol] * Uval[i];
-		    	}
-                    }
-		}
-	    } /* for k ... */
-	    
-	}
-    } else { /* Form x := inv(A')*x */
-	
-	if ( lsame_(uplo, "L") ) {
-	    /* Form x := inv(L')*x */
-    	    if ( L->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = Lstore->nsuper; k >= 0; --k) {
-	    	fsupc = L_FST_SUPC(k);
-	    	istart = L_SUB_START(fsupc);
-	    	nsupr = L_SUB_START(fsupc+1) - istart;
-	    	nsupc = L_FST_SUPC(k+1) - fsupc;
-	    	luptr = L_NZ_START(fsupc);
-
-		solve_ops += 2 * (nsupr - nsupc) * nsupc;
-
-		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-		    iptr = istart + nsupc;
-		    for (i = L_NZ_START(jcol) + nsupc; 
-				i < L_NZ_START(jcol+1); i++) {
-			irow = L_SUB(iptr);
-			x[jcol] -= x[irow] * Lval[i];
-			iptr++;
-		    }
-		}
-		
-		if ( nsupc > 1 ) {
-		    solve_ops += nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-		    STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-			&x[fsupc], &incx);
-#else
-		    strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
-			&x[fsupc], &incx);
-#endif
-		}
-	    }
-	} else {
-	    /* Form x := inv(U')*x */
-	    if ( U->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = 0; k <= Lstore->nsuper; k++) {
-	    	fsupc = L_FST_SUPC(k);
-	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-	    	nsupc = L_FST_SUPC(k+1) - fsupc;
-	    	luptr = L_NZ_START(fsupc);
-
-		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-		    solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-			irow = U_SUB(i);
-			x[jcol] -= x[irow] * Uval[i];
-		    }
-		}
-
-		solve_ops += nsupc * (nsupc + 1);
-
-		if ( nsupc == 1 ) {
-		    x[fsupc] /= Lval[luptr];
-		} else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-		    STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-			    &x[fsupc], &incx);
-#else
-		    strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
-			    &x[fsupc], &incx);
-#endif
-		}
-	    } /* for k ... */
-	}
-    }
-
-    stat->ops[SOLVE] += solve_ops;
-    SUPERLU_FREE(work);
-    return 0;
-}
-
-
-
-
-int
-sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x, 
-	 int incx, float beta, float *y, int incy)
-{
-/*  Purpose   
-    =======   
-
-    sp_sgemv()  performs one of the matrix-vector operations   
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
-    where alpha and beta are scalars, x and y are vectors and A is a
-    sparse A->nrow by A->ncol matrix.   
-
-    Parameters   
-    ==========   
-
-    TRANS  - (input) char*
-             On entry, TRANS specifies the operation to be performed as   
-             follows:   
-                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
-                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
-                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
-
-    ALPHA  - (input) float
-             On entry, ALPHA specifies the scalar alpha.   
-
-    A      - (input) SuperMatrix*
-             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
-             Currently, the type of A can be:
-                 Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. 
-             In the future, more general A can be handled.
-
-    X      - (input) float*, array of DIMENSION at least   
-             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
-             Before entry, the incremented array X must contain the   
-             vector x.   
-
-    INCX   - (input) int
-             On entry, INCX specifies the increment for the elements of   
-             X. INCX must not be zero.   
-
-    BETA   - (input) float
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then Y need not be set on input.   
-
-    Y      - (output) float*,  array of DIMENSION at least   
-             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
-             Before entry with BETA non-zero, the incremented array Y   
-             must contain the vector y. On exit, Y is overwritten by the 
-             updated vector y.
-	     
-    INCY   - (input) int
-             On entry, INCY specifies the increment for the elements of   
-             Y. INCY must not be zero.   
-
-    ==== Sparse Level 2 Blas routine.   
-*/
-
-    /* Local variables */
-    NCformat *Astore;
-    float   *Aval;
-    int info;
-    float temp;
-    int lenx, leny, i, j, irow;
-    int iy, jx, jy, kx, ky;
-    int notran;
-
-    notran = lsame_(trans, "N");
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Test the input parameters */
-    info = 0;
-    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
-    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
-    else if (incx == 0) info = 5;
-    else if (incy == 0)	info = 8;
-    if (info != 0) {
-	xerbla_("sp_sgemv ", &info);
-	return 0;
-    }
-
-    /* Quick return if possible. */
-    if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
-	return 0;
-
-    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
-       up the start points in  X  and  Y. */
-    if (lsame_(trans, "N")) {
-	lenx = A->ncol;
-	leny = A->nrow;
-    } else {
-	lenx = A->nrow;
-	leny = A->ncol;
-    }
-    if (incx > 0) kx = 0;
-    else kx =  - (lenx - 1) * incx;
-    if (incy > 0) ky = 0;
-    else ky =  - (leny - 1) * incy;
-
-    /* Start the operations. In this version the elements of A are   
-       accessed sequentially with one pass through A. */
-    /* First form  y := beta*y. */
-    if (beta != 1.) {
-	if (incy == 1) {
-	    if (beta == 0.)
-		for (i = 0; i < leny; ++i) y[i] = 0.;
-	    else
-		for (i = 0; i < leny; ++i) y[i] = beta * y[i];
-	} else {
-	    iy = ky;
-	    if (beta == 0.)
-		for (i = 0; i < leny; ++i) {
-		    y[iy] = 0.;
-		    iy += incy;
-		}
-	    else
-		for (i = 0; i < leny; ++i) {
-		    y[iy] = beta * y[iy];
-		    iy += incy;
-		}
-	}
-    }
-    
-    if (alpha == 0.) return 0;
-
-    if ( notran ) {
-	/* Form  y := alpha*A*x + y. */
-	jx = kx;
-	if (incy == 1) {
-	    for (j = 0; j < A->ncol; ++j) {
-		if (x[jx] != 0.) {
-		    temp = alpha * x[jx];
-		    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-			irow = Astore->rowind[i];
-			y[irow] += temp * Aval[i];
-		    }
-		}
-		jx += incx;
-	    }
-	} else {
-	    ABORT("Not implemented.");
-	}
-    } else {
-	/* Form  y := alpha*A'*x + y. */
-	jy = ky;
-	if (incx == 1) {
-	    for (j = 0; j < A->ncol; ++j) {
-		temp = 0.;
-		for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-		    irow = Astore->rowind[i];
-		    temp += Aval[i] * x[irow];
-		}
-		y[jy] += alpha * temp;
-		jy += incy;
-	    }
-	} else {
-	    ABORT("Not implemented.");
-	}
-    }
-    return 0;
-} /* sp_sgemv */
-
-
-
diff --git a/SRC/ssp_blas3.c b/SRC/ssp_blas3.c
index 9b45292..6a416a5 100644
--- a/SRC/ssp_blas3.c
+++ b/SRC/ssp_blas3.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
  * Purpose:		Sparse BLAS3, using some dense BLAS3 operations.
  */
 
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
 
 int
 sp_sgemm(char *transa, char *transb, int m, int n, int k, 
diff --git a/SRC/sutil.c b/SRC/sutil.c
index 1a66061..a023a3f 100644
--- a/SRC/sutil.c
+++ b/SRC/sutil.c
@@ -20,7 +20,7 @@
 */
 
 #include <math.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 void
 sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 
@@ -267,16 +267,19 @@ void
 sPrint_Dense_Matrix(char *what, SuperMatrix *A)
 {
     DNformat     *Astore;
-    register int i;
+    register int i, j, lda = Astore->lda;
     float       *dp;
     
     printf("\nDense matrix %s:\n", what);
     printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
     Astore = (DNformat *) A->Store;
     dp = (float *) Astore->nzval;
-    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
+    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
     printf("\nnzval: ");
-    for (i = 0; i < A->nrow; ++i) printf("%f  ", dp[i]);
+    for (j = 0; j < A->ncol; ++j) {
+        for (i = 0; i < A->nrow; ++i) printf("%f  ", dp[i + j*lda]);
+        printf("\n");
+    }
     printf("\n");
     fflush(stdout);
 }
diff --git a/SRC/util.c b/SRC/util.c
index a95e1ef..bfe5f88 100644
--- a/SRC/util.c
+++ b/SRC/util.c
@@ -19,8 +19,7 @@
 */
 
 #include <math.h>
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
 
 /* 
  * Global statistics variale
@@ -49,6 +48,24 @@ void set_default_options(superlu_options_t *options)
     options->PrintStat = YES;
 }
 
+/*
+ * Print the options setting.
+ */
+void print_options(superlu_options_t *options)
+{
+    printf(".. options:\n");
+    printf("\tFact\t %8d\n", options->Fact);
+    printf("\tEquil\t %8d\n", options->Equil);
+    printf("\tColPerm\t %8d\n", options->ColPerm);
+    printf("\tDiagPivotThresh %8.4f\n", options->DiagPivotThresh);
+    printf("\tTrans\t %8d\n", options->Trans);
+    printf("\tIterRefine\t%4d\n", options->IterRefine);
+    printf("\tSymmetricMode\t%4d\n", options->SymmetricMode);
+    printf("\tPivotGrowth\t%4d\n", options->PivotGrowth);
+    printf("\tConditionNumber\t%4d\n", options->ConditionNumber);
+    printf("..\n");
+}
+
 /* Deallocate the structure pointing to the actual storage of the matrix. */
 void
 Destroy_SuperMatrix_Store(SuperMatrix *A)
diff --git a/SRC/xerbla.c b/SRC/xerbla.c
index c598282..bffd66b 100644
--- a/SRC/xerbla.c
+++ b/SRC/xerbla.c
@@ -1,3 +1,6 @@
+#include <stdio.h>
+#include "slu_Cnames.h"
+
 /* Subroutine */ int xerbla_(char *srname, int *info)
 {
 /*  -- LAPACK auxiliary routine (version 2.0) --   
diff --git a/SRC/zcolumn_bmod.c b/SRC/zcolumn_bmod.c
index 7f2ef75..2082ad6 100644
--- a/SRC/zcolumn_bmod.c
+++ b/SRC/zcolumn_bmod.c
@@ -21,7 +21,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 /* 
  * Function prototypes 
diff --git a/SRC/zcolumn_dfs.c b/SRC/zcolumn_dfs.c
index bfae8a0..92a20e2 100644
--- a/SRC/zcolumn_dfs.c
+++ b/SRC/zcolumn_dfs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 /* What type of supernodes we want */
 #define T2_SUPER
diff --git a/SRC/zcopy_to_ucol.c b/SRC/zcopy_to_ucol.c
index 7c8969b..e5731cf 100644
--- a/SRC/zcopy_to_ucol.c
+++ b/SRC/zcopy_to_ucol.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
 
 int
 zcopy_to_ucol(
diff --git a/SRC/zgscon.c b/SRC/zgscon.c
index f811069..4e254c8 100644
--- a/SRC/zgscon.c
+++ b/SRC/zgscon.c
@@ -11,7 +11,7 @@
  * History:     Modified from lapack routines ZGECON.
  */
 #include <math.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 void
 zgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/zgsequ.c b/SRC/zgsequ.c
index 659ef1a..40e64e9 100644
--- a/SRC/zgsequ.c
+++ b/SRC/zgsequ.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
  * History:     Modified from LAPACK routine ZGEEQU
  */
 #include <math.h>
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
 
 void
 zgsequ(SuperMatrix *A, double *r, double *c, double *rowcnd,
diff --git a/SRC/zgsrfs.c b/SRC/zgsrfs.c
index 6c655fd..9cb57fd 100644
--- a/SRC/zgsrfs.c
+++ b/SRC/zgsrfs.c
@@ -11,7 +11,7 @@
  * History:     Modified from lapack routine ZGERFS
  */
 #include <math.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 void
 zgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
diff --git a/SRC/zgssv.c b/SRC/zgssv.c
index 4494ce7..dbbd870 100644
--- a/SRC/zgssv.c
+++ b/SRC/zgssv.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 3.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -7,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 void
 zgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
diff --git a/SRC/zgssvx.c b/SRC/zgssvx.c
index 6549da1..65ea538 100644
--- a/SRC/zgssvx.c
+++ b/SRC/zgssvx.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 void
 zgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
@@ -455,7 +455,7 @@ printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
 			       Astore->nzval, Astore->colind, Astore->rowptr,
 			       SLU_NC, A->Dtype, A->Mtype);
 	if ( notran ) { /* Reverse the transpose argument. */
-            trant = CONJ;
+	    trant = TRANS;
 	    notran = 0;
 	} else {
 	    trant = NOTRANS;
diff --git a/SRC/zgstrf.c b/SRC/zgstrf.c
index 2a68b45..5cfef9b 100644
--- a/SRC/zgstrf.c
+++ b/SRC/zgstrf.c
@@ -19,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 void
 zgstrf (superlu_options_t *options, SuperMatrix *A, double drop_tol,
@@ -182,8 +182,8 @@ zgstrf (superlu_options_t *options, SuperMatrix *A, double drop_tol,
  */
     /* Local working arrays */
     NCPformat *Astore;
-    int       *iperm_r; /* inverse of perm_r;
-			   used when options->Fact == SamePattern_SameRowPerm */
+    int       *iperm_r = NULL; /* inverse of perm_r; used when 
+                                  options->Fact == SamePattern_SameRowPerm */
     int       *iperm_c; /* inverse of perm_c */
     int       *iwork;
     doublecomplex    *zwork;
diff --git a/SRC/zgstrs.c b/SRC/zgstrs.c
index 95bcba7..a9a5b65 100644
--- a/SRC/zgstrs.c
+++ b/SRC/zgstrs.c
@@ -19,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 
 /* 
diff --git a/SRC/zgstrs.c.bak b/SRC/zgstrs.c.bak
deleted file mode 100644
index 40dc89c..0000000
--- a/SRC/zgstrs.c.bak
+++ /dev/null
@@ -1,339 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-  Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "zsp_defs.h"
-
-
-/* 
- * Function prototypes 
- */
-void zusolve(int, int, doublecomplex*, doublecomplex*);
-void zlsolve(int, int, doublecomplex*, doublecomplex*);
-void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*);
-
-
-void
-zgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
-        int *perm_c, int *perm_r, SuperMatrix *B,
-        SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * ZGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- * ZGSTRF.
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *
- * L       (input) SuperMatrix*
- *         The factor L from the factorization Pr*A*Pc=L*U as computed by
- *         zgstrf(). Use compressed row subscripts storage for supernodes,
- *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
- *
- * U       (input) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U as computed by
- *         zgstrf(). Use column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_TRU.
- *
- * perm_c  (input) int*, dimension (L->ncol)
- *	   Column permutation vector, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *
- * perm_r  (input) int*, dimension (L->nrow)
- *         Row permutation vector, which defines the permutation matrix Pr; 
- *         perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation count.
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- * 	   = 0: successful exit
- *	   < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-#endif
-    int      incx = 1, incy = 1;
-#ifdef USE_VENDOR_BLAS
-    doublecomplex   alpha = {1.0, 0.0}, beta = {1.0, 0.0};
-    doublecomplex   *work_col;
-#endif
-    doublecomplex   temp_comp;
-    DNformat *Bstore;
-    doublecomplex   *Bmat;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    doublecomplex   *Lval, *Uval;
-    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
-    int      i, j, k, iptr, jcol, n, ldb, nrhs;
-    doublecomplex   *work, *rhs_work, *soln;
-    flops_t  solve_ops;
-    void zprint_soln();
-
-    /* Test input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    ldb = Bstore->lda;
-    nrhs = B->ncol;
-    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-	      L->Stype != SLU_SC || L->Dtype != SLU_Z || L->Mtype != SLU_TRLU )
-	*info = -2;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-	      U->Stype != SLU_NC || U->Dtype != SLU_Z || U->Mtype != SLU_TRU )
-	*info = -3;
-    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
-	      B->Stype != SLU_DN || B->Dtype != SLU_Z || B->Mtype != SLU_GE )
-	*info = -6;
-    if ( *info ) {
-	i = -(*info);
-	xerbla_("zgstrs", &i);
-	return;
-    }
-
-    n = L->nrow;
-    work = doublecomplexCalloc(n * nrhs);
-    if ( !work ) ABORT("Malloc fails for local work[].");
-    soln = doublecomplexMalloc(n);
-    if ( !soln ) ABORT("Malloc fails for local soln[].");
-
-    Bmat = Bstore->nzval;
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    
-    if ( trans == NOTRANS ) {
-	/* Permute right hand sides to form Pr*B */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-	
-	/* Forward solve PLy=Pb. */
-	for (k = 0; k <= Lstore->nsuper; k++) {
-	    fsupc = L_FST_SUPC(k);
-	    istart = L_SUB_START(fsupc);
-	    nsupr = L_SUB_START(fsupc+1) - istart;
-	    nsupc = L_FST_SUPC(k+1) - fsupc;
-	    nrow = nsupr - nsupc;
-
-	    solve_ops += 4 * nsupc * (nsupc - 1) * nrhs;
-	    solve_ops += 8 * nrow * nsupc * nrhs;
-	    
-	    if ( nsupc == 1 ) {
-		for (j = 0; j < nrhs; j++) {
-		    rhs_work = &Bmat[j*ldb];
-	    	    luptr = L_NZ_START(fsupc);
-		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
-			irow = L_SUB(iptr);
-			++luptr;
-			zz_mult(&temp_comp, &rhs_work[fsupc], &Lval[luptr]);
-			z_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
-		    }
-		}
-	    } else {
-	    	luptr = L_NZ_START(fsupc);
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		ftcs1 = _cptofcd("L", strlen("L"));
-		ftcs2 = _cptofcd("N", strlen("N"));
-		ftcs3 = _cptofcd("U", strlen("U"));
-		CTRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-		
-		CGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
-			&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-			&beta, &work[0], &n );
-#else
-		ztrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-		
-		zgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
-			&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-			&beta, &work[0], &n );
-#endif
-		for (j = 0; j < nrhs; j++) {
-		    rhs_work = &Bmat[j*ldb];
-		    work_col = &work[j*n];
-		    iptr = istart + nsupc;
-		    for (i = 0; i < nrow; i++) {
-			irow = L_SUB(iptr);
-			z_sub(&rhs_work[irow], &rhs_work[irow], &work_col[i]);
-			work_col[i].r = 0.0;
-	                work_col[i].i = 0.0;
-			iptr++;
-		    }
-		}
-#else		
-		for (j = 0; j < nrhs; j++) {
-		    rhs_work = &Bmat[j*ldb];
-		    zlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
-		    zmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
-			    &rhs_work[fsupc], &work[0] );
-
-		    iptr = istart + nsupc;
-		    for (i = 0; i < nrow; i++) {
-			irow = L_SUB(iptr);
-			z_sub(&rhs_work[irow], &rhs_work[irow], &work[i]);
-			work[i].r = 0.;
-	                work[i].i = 0.;
-			iptr++;
-		    }
-		}
-#endif		    
-	    } /* else ... */
-	} /* for L-solve */
-
-#ifdef DEBUG
-  	printf("After L-solve: y=\n");
-	zprint_soln(n, nrhs, Bmat);
-#endif
-
-	/*
-	 * Back solve Ux=y.
-	 */
-	for (k = Lstore->nsuper; k >= 0; k--) {
-	    fsupc = L_FST_SUPC(k);
-	    istart = L_SUB_START(fsupc);
-	    nsupr = L_SUB_START(fsupc+1) - istart;
-	    nsupc = L_FST_SUPC(k+1) - fsupc;
-	    luptr = L_NZ_START(fsupc);
-
-	    solve_ops += 4 * nsupc * (nsupc + 1) * nrhs;
-
-	    if ( nsupc == 1 ) {
-		rhs_work = &Bmat[0];
-		for (j = 0; j < nrhs; j++) {
-		    z_div(&rhs_work[fsupc], &rhs_work[fsupc], &Lval[luptr]);
-		    rhs_work += ldb;
-		}
-	    } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		ftcs1 = _cptofcd("L", strlen("L"));
-		ftcs2 = _cptofcd("U", strlen("U"));
-		ftcs3 = _cptofcd("N", strlen("N"));
-		CTRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#else
-		ztrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
-		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#endif
-#else		
-		for (j = 0; j < nrhs; j++)
-		    zusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-#endif		
-	    }
-
-	    for (j = 0; j < nrhs; ++j) {
-		rhs_work = &Bmat[j*ldb];
-		for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
-		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
-			irow = U_SUB(i);
-			zz_mult(&temp_comp, &rhs_work[jcol], &Uval[i]);
-			z_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
-		    }
-		}
-	    }
-	    
-	} /* for U-solve */
-
-#ifdef DEBUG
-  	printf("After U-solve: x=\n");
-	zprint_soln(n, nrhs, Bmat);
-#endif
-
-	/* Compute the final solution X := Pc*X. */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-	
-        stat->ops[SOLVE] = solve_ops;
-
-    } else { /* Solve A'*X=B */
-	/* Permute right hand sides to form Pc'*B. */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-
-	stat->ops[SOLVE] = 0;
-	
-	for (k = 0; k < nrhs; ++k) {
-	    
-	    /* Multiply by inv(U'). */
-	    sp_ztrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-	    
-	    /* Multiply by inv(L'). */
-	    sp_ztrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-	    
-	}
-	
-	/* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
-	for (i = 0; i < nrhs; i++) {
-	    rhs_work = &Bmat[i*ldb];
-	    for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
-	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-	}
-
-    }
-
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(soln);
-}
-
-/*
- * Diagnostic print of the solution vector 
- */
-void
-zprint_soln(int n, int nrhs, doublecomplex *soln)
-{
-    int i;
-
-    for (i = 0; i < n; i++) 
-  	printf("\t%d: %.4f\n", i, soln[i]);
-}
diff --git a/SRC/zlacon.c b/SRC/zlacon.c
index e240371..19382a2 100644
--- a/SRC/zlacon.c
+++ b/SRC/zlacon.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,8 @@
  *
  */
 #include <math.h>
-#include "Cnames.h"
-#include "dcomplex.h"
+#include "slu_Cnames.h"
+#include "slu_dcomplex.h"
 
 int
 zlacon_(int *n, doublecomplex *v, doublecomplex *x, double *est, int *kase)
diff --git a/SRC/zlangs.c b/SRC/zlangs.c
index e178c6f..ad09d3d 100644
--- a/SRC/zlangs.c
+++ b/SRC/zlangs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
  * History:     Modified from lapack routine ZLANGE
  */
 #include <math.h>
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
 
 double zlangs(char *norm, SuperMatrix *A)
 {
diff --git a/SRC/zlaqgs.c b/SRC/zlaqgs.c
index d28393d..5b9d503 100644
--- a/SRC/zlaqgs.c
+++ b/SRC/zlaqgs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
  * History:     Modified from LAPACK routine ZLAQGE
  */
 #include <math.h>
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
 
 void
 zlaqgs(SuperMatrix *A, double *r, double *c, 
diff --git a/SRC/zmemory.c b/SRC/zmemory.c
index 0c79e9e..02ac640 100644
--- a/SRC/zmemory.c
+++ b/SRC/zmemory.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 /* Constants */
 #define NO_MEMTYPE  4      /* 0: lusup;
@@ -193,9 +193,10 @@ zLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
 	    zSetupSpace(work, lwork, &Glu->MemModel);
 	}
 	
-#ifdef DEBUG		   
-	printf("zLUMemInit() called: annz %d, MemModel %d\n", 
-		annz, Glu->MemModel);
+#if ( PRNTlevel >= 1 )
+	printf("zLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n", 
+	       FILL, nzlmax, nzumax);
+	fflush(stdout);
 #endif	
 	
 	/* Integer pointers for L\U factors */
@@ -234,6 +235,11 @@ zLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
 		printf("Not enough memory to perform factorization.\n");
 		return (zmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
 	    }
+#if ( PRNTlevel >= 1)
+	    printf("zLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", 
+		   nzlmax, nzumax);
+	    fflush(stdout);
+#endif
 	    lusup = (doublecomplex *) zexpand( &nzlumax, LUSUP, 0, 0, Glu );
 	    ucol  = (doublecomplex *) zexpand( &nzumax, UCOL, 0, 0, Glu );
 	    lsub  = (int *)    zexpand( &nzlmax, LSUB, 0, 0, Glu );
@@ -476,8 +482,7 @@ void
     else lword = sizeof(doublecomplex);
 
     if ( Glu->MemModel == SYSTEM ) {
-	new_mem = (void *) SUPERLU_MALLOC(new_len * lword);
-/*	new_mem = (void *) calloc(new_len, lword); */
+	new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
 	if ( no_expand != 0 ) {
 	    tries = 0;
 	    if ( keep_prev ) {
@@ -487,8 +492,7 @@ void
 		    if ( ++tries > 10 ) return (NULL);
 		    alpha = Reduce(alpha);
 		    new_len = alpha * *prev_len;
-		    new_mem = (void *) SUPERLU_MALLOC(new_len * lword); 
-/*		    new_mem = (void *) calloc(new_len, lword); */
+		    new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
 		}
 	    }
 	    if ( type == LSUB || type == USUB ) {
@@ -641,7 +645,7 @@ zallocateA(int n, int nnz, doublecomplex **a, int **asub, int **xa)
 doublecomplex *doublecomplexMalloc(int n)
 {
     doublecomplex *buf;
-    buf = (doublecomplex *) SUPERLU_MALLOC(n * sizeof(doublecomplex)); 
+    buf = (doublecomplex *) SUPERLU_MALLOC((size_t)n * sizeof(doublecomplex)); 
     if ( !buf ) {
 	ABORT("SUPERLU_MALLOC failed for buf in doublecomplexMalloc()\n");
     }
@@ -653,7 +657,7 @@ doublecomplex *doublecomplexCalloc(int n)
     doublecomplex *buf;
     register int i;
     doublecomplex zero = {0.0, 0.0};
-    buf = (doublecomplex *) SUPERLU_MALLOC(n * sizeof(doublecomplex));
+    buf = (doublecomplex *) SUPERLU_MALLOC((size_t)n * sizeof(doublecomplex));
     if ( !buf ) {
 	ABORT("SUPERLU_MALLOC failed for buf in doublecomplexCalloc()\n");
     }
diff --git a/SRC/zmemory.c b/SRC/zmemory.c.bak
similarity index 98%
copy from SRC/zmemory.c
copy to SRC/zmemory.c.bak
index 0c79e9e..874e3df 100644
--- a/SRC/zmemory.c
+++ b/SRC/zmemory.c.bak
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 /* Constants */
 #define NO_MEMTYPE  4      /* 0: lusup;
@@ -193,9 +193,10 @@ zLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
 	    zSetupSpace(work, lwork, &Glu->MemModel);
 	}
 	
-#ifdef DEBUG		   
-	printf("zLUMemInit() called: annz %d, MemModel %d\n", 
-		annz, Glu->MemModel);
+#if ( PRNTlevel >= 1 )
+	printf("zLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n", 
+	       FILL, nzlmax, nzumax);
+	fflush(stdout);
 #endif	
 	
 	/* Integer pointers for L\U factors */
@@ -234,6 +235,11 @@ zLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
 		printf("Not enough memory to perform factorization.\n");
 		return (zmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
 	    }
+#if ( PRNTlevel >= 1)
+	    printf("zzLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", 
+		   nzlmax, nzumax);
+	    fflush(stdout);
+#endif
 	    lusup = (doublecomplex *) zexpand( &nzlumax, LUSUP, 0, 0, Glu );
 	    ucol  = (doublecomplex *) zexpand( &nzumax, UCOL, 0, 0, Glu );
 	    lsub  = (int *)    zexpand( &nzlmax, LSUB, 0, 0, Glu );
diff --git a/SRC/zmyblas2.c b/SRC/zmyblas2.c
index 59450cd..45c67c1 100644
--- a/SRC/zmyblas2.c
+++ b/SRC/zmyblas2.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -14,7 +13,7 @@
  * Note:
  *     This is only used when the system lacks an efficient BLAS library.
  */
-#include "dcomplex.h"
+#include "slu_dcomplex.h"
 
 /*
  * Solves a dense UNIT lower triangular system. The unit lower 
diff --git a/SRC/zpanel_bmod.c b/SRC/zpanel_bmod.c
index 658c945..f910635 100644
--- a/SRC/zpanel_bmod.c
+++ b/SRC/zpanel_bmod.c
@@ -21,7 +21,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 /* 
  * Function prototypes 
diff --git a/SRC/zpanel_dfs.c b/SRC/zpanel_dfs.c
index c9ed6ce..3e535a8 100644
--- a/SRC/zpanel_dfs.c
+++ b/SRC/zpanel_dfs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
 
 void
 zpanel_dfs (
diff --git a/SRC/zpivotL.c b/SRC/zpivotL.c
index 26afc4c..20aacda 100644
--- a/SRC/zpivotL.c
+++ b/SRC/zpivotL.c
@@ -21,7 +21,7 @@
 
 #include <math.h>
 #include <stdlib.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 #undef DEBUG
 
diff --git a/SRC/zpivotgrowth.c b/SRC/zpivotgrowth.c
index 59e0c82..b8afeef 100644
--- a/SRC/zpivotgrowth.c
+++ b/SRC/zpivotgrowth.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 #include <math.h>
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
 
 double
 zPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
diff --git a/SRC/zpruneL.c b/SRC/zpruneL.c
index ee24f7f..25d003c 100644
--- a/SRC/zpruneL.c
+++ b/SRC/zpruneL.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
 
 void
 zpruneL(
diff --git a/SRC/zreadhb.c b/SRC/zreadhb.c
index c98951a..fcccab6 100644
--- a/SRC/zreadhb.c
+++ b/SRC/zreadhb.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -9,7 +8,7 @@
  */
 #include <stdio.h>
 #include <stdlib.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 
 /* Eat up the rest of the current line */
diff --git a/SRC/zsnode_bmod.c b/SRC/zsnode_bmod.c
index 8ec938b..c36d0fa 100644
--- a/SRC/zsnode_bmod.c
+++ b/SRC/zsnode_bmod.c
@@ -19,7 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 
 /*
diff --git a/SRC/zsnode_dfs.c b/SRC/zsnode_dfs.c
index b1bec95..a6bab8f 100644
--- a/SRC/zsnode_dfs.c
+++ b/SRC/zsnode_dfs.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -20,8 +19,7 @@
   the code was modified is included with the above copyright notice.
 */
 
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
 
 int
 zsnode_dfs (
diff --git a/SRC/zsp_blas2.c b/SRC/zsp_blas2.c
index 18470c5..e94c519 100644
--- a/SRC/zsp_blas2.c
+++ b/SRC/zsp_blas2.c
@@ -11,7 +11,7 @@
  * Purpose:		Sparse BLAS 2, using some dense BLAS 2 operations.
  */
 
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 /* 
  * Function prototypes 
@@ -132,7 +132,8 @@ sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
 		luptr = L_NZ_START(fsupc);
 		nrow = nsupr - nsupc;
 
-	        solve_ops += 4 * nsupc * (nsupc - 1);
+                /* 1 z_div costs 10 flops */
+	        solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc;
 	        solve_ops += 8 * nrow * nsupc;
 
 		if ( nsupc == 1 ) {
@@ -185,7 +186,8 @@ sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
 	    	nsupc = L_FST_SUPC(k+1) - fsupc;
 	    	luptr = L_NZ_START(fsupc);
 		
-    	        solve_ops += 4 * nsupc * (nsupc + 1);
+                /* 1 z_div costs 10 flops */
+    	        solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
 
 		if ( nsupc == 1 ) {
 		    z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
@@ -279,7 +281,8 @@ sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
 		    }
 		}
 
-		solve_ops += 4 * nsupc * (nsupc + 1);
+                /* 1 z_div costs 10 flops */
+		solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
 
 		if ( nsupc == 1 ) {
 		    z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
@@ -358,7 +361,8 @@ sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
 		    }
 		}
 
-		solve_ops += 4 * nsupc * (nsupc + 1);
+                /* 1 z_div costs 10 flops */
+		solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
  
 		if ( nsupc == 1 ) {
                     zz_conj(&temp, &Lval[luptr]);
diff --git a/SRC/zsp_blas2.c.bak b/SRC/zsp_blas2.c.bak
deleted file mode 100644
index 5ab0334..0000000
--- a/SRC/zsp_blas2.c.bak
+++ /dev/null
@@ -1,479 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
- * File name:		zsp_blas2.c
- * Purpose:		Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-
-#include "zsp_defs.h"
-
-/* 
- * Function prototypes 
- */
-void zusolve(int, int, doublecomplex*, doublecomplex*);
-void zlsolve(int, int, doublecomplex*, doublecomplex*);
-void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*);
-
-
-int
-sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
-         SuperMatrix *U, doublecomplex *x, SuperLUStat_t *stat, int *info)
-{
-/*
- *   Purpose
- *   =======
- *
- *   sp_ztrsv() solves one of the systems of equations   
- *       A*x = b,   or   A'*x = b,
- *   where b and x are n element vectors and A is a sparse unit , or   
- *   non-unit, upper or lower triangular matrix.   
- *   No test for singularity or near-singularity is included in this   
- *   routine. Such tests must be performed before calling this routine.   
- *
- *   Parameters   
- *   ==========   
- *
- *   uplo   - (input) char*
- *            On entry, uplo specifies whether the matrix is an upper or   
- *             lower triangular matrix as follows:   
- *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
- *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
- *
- *   trans  - (input) char*
- *             On entry, trans specifies the equations to be solved as   
- *             follows:   
- *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.   
- *                trans = 'C' or 'c'   A'*x = b.   
- *
- *   diag   - (input) char*
- *             On entry, diag specifies whether or not A is unit   
- *             triangular as follows:   
- *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
- *                diag = 'N' or 'n'   A is not assumed to be unit   
- *                                    triangular.   
- *	     
- *   L       - (input) SuperMatrix*
- *	       The factor L from the factorization Pr*A*Pc=L*U. Use
- *             compressed row subscripts storage for supernodes,
- *             i.e., L has types: Stype = SC, Dtype = SLU_Z, Mtype = TRLU.
- *
- *   U       - (input) SuperMatrix*
- *	        The factor U from the factorization Pr*A*Pc=L*U.
- *	        U has types: Stype = NC, Dtype = SLU_Z, Mtype = TRU.
- *    
- *   x       - (input/output) doublecomplex*
- *             Before entry, the incremented array X must contain the n   
- *             element right-hand side vector b. On exit, X is overwritten 
- *             with the solution vector x.
- *
- *   info    - (output) int*
- *             If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-	 ftcs2 = _cptofcd("N", strlen("N")),
-	 ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    SCformat *Lstore;
-    NCformat *Ustore;
-    doublecomplex   *Lval, *Uval;
-    int incx = 1, incy = 1;
-    doublecomplex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
-    doublecomplex comp_zero = {0.0, 0.0};
-    int nrow;
-    int fsupc, nsupr, nsupc, luptr, istart, irow;
-    int i, k, iptr, jcol;
-    doublecomplex *work;
-    flops_t solve_ops;
-
-    /* Test the input parameters */
-    *info = 0;
-    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
-    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") ) *info = -2;
-    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
-    if ( *info ) {
-	i = -(*info);
-	xerbla_("sp_ztrsv", &i);
-	return 0;
-    }
-
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-
-    if ( !(work = doublecomplexCalloc(L->nrow)) )
-	ABORT("Malloc fails for work in sp_ztrsv().");
-    
-    if ( lsame_(trans, "N") ) {	/* Form x := inv(A)*x. */
-	
-	if ( lsame_(uplo, "L") ) {
-	    /* Form x := inv(L)*x */
-    	    if ( L->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = 0; k <= Lstore->nsuper; k++) {
-		fsupc = L_FST_SUPC(k);
-		istart = L_SUB_START(fsupc);
-		nsupr = L_SUB_START(fsupc+1) - istart;
-		nsupc = L_FST_SUPC(k+1) - fsupc;
-		luptr = L_NZ_START(fsupc);
-		nrow = nsupr - nsupc;
-
-	        solve_ops += 4 * nsupc * (nsupc - 1);
-	        solve_ops += 8 * nrow * nsupc;
-
-		if ( nsupc == 1 ) {
-		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
-			irow = L_SUB(iptr);
-			++luptr;
-			zz_mult(&comp_zero, &x[fsupc], &Lval[luptr]);
-			z_sub(&x[irow], &x[irow], &comp_zero);
-		    }
-		} else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-		       	&x[fsupc], &incx);
-		
-		    CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#else
-		    ztrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
-		       	&x[fsupc], &incx);
-		
-		    zgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#endif
-#else
-		    zlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-		
-		    zmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
-                             &x[fsupc], &work[0] );
-#endif		
-		
-		    iptr = istart + nsupc;
-		    for (i = 0; i < nrow; ++i, ++iptr) {
-			irow = L_SUB(iptr);
-			z_sub(&x[irow], &x[irow], &work[i]); /* Scatter */
-			work[i] = comp_zero;
-
-		    }
-	 	}
-	    } /* for k ... */
-	    
-	} else {
-	    /* Form x := inv(U)*x */
-	    
-	    if ( U->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = Lstore->nsuper; k >= 0; k--) {
-	    	fsupc = L_FST_SUPC(k);
-	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-	    	nsupc = L_FST_SUPC(k+1) - fsupc;
-	    	luptr = L_NZ_START(fsupc);
-		
-    	        solve_ops += 4 * nsupc * (nsupc + 1);
-
-		if ( nsupc == 1 ) {
-		    z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
-		    for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
-			irow = U_SUB(i);
-			zz_mult(&comp_zero, &x[fsupc], &Uval[i]);
-			z_sub(&x[irow], &x[irow], &comp_zero);
-		    }
-		} else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-		    CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
-		       &x[fsupc], &incx);
-#else
-		    ztrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#endif
-#else		
-		    zusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-#endif		
-
-		    for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-		        solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-		    	for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
-				i++) {
-			    irow = U_SUB(i);
-			zz_mult(&comp_zero, &x[jcol], &Uval[i]);
-			z_sub(&x[irow], &x[irow], &comp_zero);
-		    	}
-                    }
-		}
-	    } /* for k ... */
-	    
-	}
-    } else { /* Form x := inv(A')*x */
-	
-	if ( lsame_(uplo, "L") ) {
-	    /* Form x := inv(L')*x */
-    	    if ( L->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = Lstore->nsuper; k >= 0; --k) {
-	    	fsupc = L_FST_SUPC(k);
-	    	istart = L_SUB_START(fsupc);
-	    	nsupr = L_SUB_START(fsupc+1) - istart;
-	    	nsupc = L_FST_SUPC(k+1) - fsupc;
-	    	luptr = L_NZ_START(fsupc);
-
-		solve_ops += 8 * (nsupr - nsupc) * nsupc;
-
-		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-		    iptr = istart + nsupc;
-		    for (i = L_NZ_START(jcol) + nsupc; 
-				i < L_NZ_START(jcol+1); i++) {
-			irow = L_SUB(iptr);
-			zz_mult(&comp_zero, &x[irow], &Lval[i]);
-		    	z_sub(&x[jcol], &x[jcol], &comp_zero);
-			iptr++;
-		    }
-		}
-		
-		if ( nsupc > 1 ) {
-		    solve_ops += 4 * nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-			&x[fsupc], &incx);
-#else
-		    ztrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
-			&x[fsupc], &incx);
-#endif
-		}
-	    }
-	} else {
-	    /* Form x := inv(U')*x */
-	    if ( U->nrow == 0 ) return 0; /* Quick return */
-	    
-	    for (k = 0; k <= Lstore->nsuper; k++) {
-	    	fsupc = L_FST_SUPC(k);
-	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-	    	nsupc = L_FST_SUPC(k+1) - fsupc;
-	    	luptr = L_NZ_START(fsupc);
-
-		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-			irow = U_SUB(i);
-			zz_mult(&comp_zero, &x[irow], &Uval[i]);
-		    	z_sub(&x[jcol], &x[jcol], &comp_zero);
-		    }
-		}
-
-		solve_ops += 4 * nsupc * (nsupc + 1);
-
-		if ( nsupc == 1 ) {
-		    z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
-		} else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-		    CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-			    &x[fsupc], &incx);
-#else
-		    ztrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
-			    &x[fsupc], &incx);
-#endif
-		}
-	    } /* for k ... */
-	}
-    }
-
-    stat->ops[SOLVE] += solve_ops;
-    SUPERLU_FREE(work);
-    return 0;
-}
-
-
-
-int
-sp_zgemv(char *trans, doublecomplex alpha, SuperMatrix *A, doublecomplex *x, 
-	 int incx, doublecomplex beta, doublecomplex *y, int incy)
-{
-/*  Purpose   
-    =======   
-
-    sp_zgemv()  performs one of the matrix-vector operations   
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
-    where alpha and beta are scalars, x and y are vectors and A is a
-    sparse A->nrow by A->ncol matrix.   
-
-    Parameters   
-    ==========   
-
-    TRANS  - (input) char*
-             On entry, TRANS specifies the operation to be performed as   
-             follows:   
-                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
-                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
-                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
-
-    ALPHA  - (input) doublecomplex
-             On entry, ALPHA specifies the scalar alpha.   
-
-    A      - (input) SuperMatrix*
-             Before entry, the leading m by n part of the array A must   
-             contain the matrix of coefficients.   
-
-    X      - (input) doublecomplex*, array of DIMENSION at least   
-             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
-             Before entry, the incremented array X must contain the   
-             vector x.   
-
-    INCX   - (input) int
-             On entry, INCX specifies the increment for the elements of   
-             X. INCX must not be zero.   
-
-    BETA   - (input) doublecomplex
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then Y need not be set on input.   
-
-    Y      - (output) doublecomplex*,  array of DIMENSION at least   
-             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
-             Before entry with BETA non-zero, the incremented array Y   
-             must contain the vector y. On exit, Y is overwritten by the 
-             updated vector y.
-	     
-    INCY   - (input) int
-             On entry, INCY specifies the increment for the elements of   
-             Y. INCY must not be zero.   
-
-    ==== Sparse Level 2 Blas routine.   
-*/
-
-    /* Local variables */
-    NCformat *Astore;
-    doublecomplex   *Aval;
-    int info;
-    doublecomplex temp, temp1;
-    int lenx, leny, i, j, irow;
-    int iy, jx, jy, kx, ky;
-    int notran;
-    doublecomplex comp_zero = {0.0, 0.0};
-    doublecomplex comp_one = {1.0, 0.0};
-
-    notran = lsame_(trans, "N");
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Test the input parameters */
-    info = 0;
-    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
-    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
-    else if (incx == 0) info = 5;
-    else if (incy == 0)	info = 8;
-    if (info != 0) {
-	xerbla_("sp_zgemv ", &info);
-	return 0;
-    }
-
-    /* Quick return if possible. */
-    if (A->nrow == 0 || A->ncol == 0 || 
-	z_eq(&alpha, &comp_zero) && 
-	z_eq(&beta, &comp_one))
-	return 0;
-
-
-    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
-       up the start points in  X  and  Y. */
-    if (lsame_(trans, "N")) {
-	lenx = A->ncol;
-	leny = A->nrow;
-    } else {
-	lenx = A->nrow;
-	leny = A->ncol;
-    }
-    if (incx > 0) kx = 0;
-    else kx =  - (lenx - 1) * incx;
-    if (incy > 0) ky = 0;
-    else ky =  - (leny - 1) * incy;
-
-    /* Start the operations. In this version the elements of A are   
-       accessed sequentially with one pass through A. */
-    /* First form  y := beta*y. */
-    if ( !z_eq(&beta, &comp_one) ) {
-	if (incy == 1) {
-	    if ( z_eq(&beta, &comp_zero) )
-		for (i = 0; i < leny; ++i) y[i] = comp_zero;
-	    else
-		for (i = 0; i < leny; ++i) 
-		  zz_mult(&y[i], &beta, &y[i]);
-	} else {
-	    iy = ky;
-	    if ( z_eq(&beta, &comp_zero) )
-		for (i = 0; i < leny; ++i) {
-		    y[iy] = comp_zero;
-		    iy += incy;
-		}
-	    else
-		for (i = 0; i < leny; ++i) {
-		    zz_mult(&y[iy], &beta, &y[iy]);
-		    iy += incy;
-		}
-	}
-    }
-    
-    if ( z_eq(&alpha, &comp_zero) ) return 0;
-
-    if ( notran ) {
-	/* Form  y := alpha*A*x + y. */
-	jx = kx;
-	if (incy == 1) {
-	    for (j = 0; j < A->ncol; ++j) {
-		if ( !z_eq(&x[jx], &comp_zero) ) {
-		    zz_mult(&temp, &alpha, &x[jx]);
-		    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-			irow = Astore->rowind[i];
-			zz_mult(&temp1, &temp,  &Aval[i]);
-			z_add(&y[irow], &y[irow], &temp1);
-		    }
-		}
-		jx += incx;
-	    }
-	} else {
-	    ABORT("Not implemented.");
-	}
-    } else {
-	/* Form  y := alpha*A'*x + y. */
-	jy = ky;
-	if (incx == 1) {
-	    for (j = 0; j < A->ncol; ++j) {
-		temp = comp_zero;
-		for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-		    irow = Astore->rowind[i];
-		    zz_mult(&temp1, &Aval[i], &x[irow]);
-		    z_add(&temp, &temp, &temp1);
-		}
-		zz_mult(&temp1, &alpha, &temp);
-		z_add(&y[jy], &y[jy], &temp1);
-		jy += incy;
-	    }
-	} else {
-	    ABORT("Not implemented.");
-	}
-    }
-    return 0;    
-} /* sp_zgemv */
-
diff --git a/SRC/zsp_blas3.c b/SRC/zsp_blas3.c
index 9825161..5dddf5a 100644
--- a/SRC/zsp_blas3.c
+++ b/SRC/zsp_blas3.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -12,8 +11,7 @@
  * Purpose:		Sparse BLAS3, using some dense BLAS3 operations.
  */
 
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
 
 int
 sp_zgemm(char *transa, char *transb, int m, int n, int k, 
diff --git a/SRC/zutil.c b/SRC/zutil.c
index d7c76b4..8e9dcba 100644
--- a/SRC/zutil.c
+++ b/SRC/zutil.c
@@ -20,7 +20,7 @@
 */
 
 #include <math.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 void
 zCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 
@@ -240,7 +240,8 @@ zPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
       for (j = c; j < c + nsup; ++j) {
 	d = Astore->nzval_colptr[j];
 	for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
-	  printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d++], dp[d++]);
+	  printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d], dp[d+1]);
+          d += 2;	
 	}
       }
     }
@@ -267,16 +268,19 @@ void
 zPrint_Dense_Matrix(char *what, SuperMatrix *A)
 {
     DNformat     *Astore;
-    register int i;
+    register int i, j, lda = Astore->lda;
     double       *dp;
     
     printf("\nDense matrix %s:\n", what);
     printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
     Astore = (DNformat *) A->Store;
     dp = (double *) Astore->nzval;
-    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,Astore->lda);
+    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
     printf("\nnzval: ");
-    for (i = 0; i < 2*A->nrow; ++i) printf("%f  ", dp[i]);
+    for (j = 0; j < A->ncol; ++j) {
+        for (i = 0; i < 2*A->nrow; ++i) printf("%f  ", dp[i + j*2*lda]);
+        printf("\n");
+    }
     printf("\n");
     fflush(stdout);
 }
diff --git a/TESTING/MATGEN/Cnames.h b/TESTING/MATGEN/Cnames.h
deleted file mode 120000
index 0398527..0000000
--- a/TESTING/MATGEN/Cnames.h
+++ /dev/null
@@ -1 +0,0 @@
-../../SRC/Cnames.h
\ No newline at end of file
diff --git a/TESTING/MATGEN/clatb4.c b/TESTING/MATGEN/clatb4.c
index ad2c3dd..2a5fba0 100644
--- a/TESTING/MATGEN/clatb4.c
+++ b/TESTING/MATGEN/clatb4.c
@@ -3,6 +3,7 @@
 	-lf2c -lm   (in that order)
 */
 
+#include <string.h>
 #include "f2c.h"
 
 /* Table of constant values */
diff --git a/TESTING/MATGEN/dlatb4.c b/TESTING/MATGEN/dlatb4.c
index c26760a..a1f4399 100644
--- a/TESTING/MATGEN/dlatb4.c
+++ b/TESTING/MATGEN/dlatb4.c
@@ -3,6 +3,7 @@
 	-lf2c -lm   (in that order)
 */
 
+#include <string.h>
 #include "f2c.h"
 
 /* Table of constant values */
diff --git a/TESTING/MATGEN/f2c.h b/TESTING/MATGEN/f2c.h
index caa33e1..3116864 100644
--- a/TESTING/MATGEN/f2c.h
+++ b/TESTING/MATGEN/f2c.h
@@ -4,7 +4,7 @@
 
 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 
-#include "Cnames.h"
+#include "slu_Cnames.h"
 
 #ifndef F2C_INCLUDE
 #define F2C_INCLUDE
diff --git a/TESTING/MATGEN/slatb4.c b/TESTING/MATGEN/slatb4.c
index 33387a5..4f814ef 100644
--- a/TESTING/MATGEN/slatb4.c
+++ b/TESTING/MATGEN/slatb4.c
@@ -3,6 +3,7 @@
 	-lf2c -lm   (in that order)
 */
 
+#include <string.h>
 #include "f2c.h"
 
 /* Table of constant values */
diff --git a/TESTING/MATGEN/slu_Cnames.h b/TESTING/MATGEN/slu_Cnames.h
new file mode 120000
index 0000000..e9e2972
--- /dev/null
+++ b/TESTING/MATGEN/slu_Cnames.h
@@ -0,0 +1 @@
+../../SRC/slu_Cnames.h
\ No newline at end of file
diff --git a/TESTING/MATGEN/zlatb4.c b/TESTING/MATGEN/zlatb4.c
index a16117d..4bfba46 100644
--- a/TESTING/MATGEN/zlatb4.c
+++ b/TESTING/MATGEN/zlatb4.c
@@ -3,6 +3,7 @@
 	-lf2c -lm   (in that order)
 */
 
+#include <string.h>
 #include "f2c.h"
 
 /* Table of constant values */
diff --git a/TESTING/Makefile b/TESTING/Makefile
index e8967e4..226de42 100644
--- a/TESTING/Makefile
+++ b/TESTING/Makefile
@@ -49,7 +49,10 @@ CLINTST = cdrive.o sp_cconvert.o \
 ZLINTST = zdrive.o sp_zconvert.o \
 	  sp_zget01.o sp_zget02.o sp_zget04.o sp_zget07.o
 
-all:    single double complex complex16
+all: testmat single double complex complex16
+
+testmat:
+	(cd MATGEN; $(MAKE))
 
 single: ./stest stest.out
 
diff --git a/TESTING/cdrive.c b/TESTING/cdrive.c
index 2d9502f..7b487af 100644
--- a/TESTING/cdrive.c
+++ b/TESTING/cdrive.c
@@ -11,7 +11,7 @@
  * Purpose:             MAIN test program
  */
 #include <string.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 #define NTESTS    5      /* Number of test types */
 #define NTYPES    11     /* Number of matrix types */
@@ -21,6 +21,10 @@
 #define	FMT2      "%10s:fact=%4d, trans=%4d, equed=%c, n=%d, imat=%d, test(%d)=%12.5g\n"
 #define FMT3      "%10s:info=%d, izero=%d, n=%d, nrhs=%d, imat=%d, nfail=%d\n"
 
+static void
+parse_command_line(int argc, char *argv[], char *matrix_type,
+		   int *n, int *w, int *relax, int *nrhs, int *maxsuper,
+		   int *rowblk, int *colblk, int *lwork, float *u);
 
 main(int argc, char *argv[])
 {
@@ -83,7 +87,6 @@ main(int argc, char *argv[])
     static trans_t transs[]  = {NOTRANS, TRANS, CONJ};
 
     /* Some function prototypes */ 
-    static void parse_command_line();
     extern int sp_cget01(int, int, SuperMatrix *, SuperMatrix *, 
 		         SuperMatrix *, int *, float *);
     extern int sp_cget02(trans_t, int, int, int, SuperMatrix *, complex *,
@@ -500,7 +503,7 @@ main(int argc, char *argv[])
 static void
 parse_command_line(int argc, char *argv[], char *matrix_type,
 		   int *n, int *w, int *relax, int *nrhs, int *maxsuper,
-		   int *rowblk, int *colblk, int *lwork, double *u)
+		   int *rowblk, int *colblk, int *lwork, float *u)
 {
     int c;
     extern char *optarg;
diff --git a/TESTING/ddrive.c b/TESTING/ddrive.c
index ef24d43..afa058e 100644
--- a/TESTING/ddrive.c
+++ b/TESTING/ddrive.c
@@ -11,7 +11,7 @@
  * Purpose:             MAIN test program
  */
 #include <string.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 #define NTESTS    5      /* Number of test types */
 #define NTYPES    11     /* Number of matrix types */
@@ -21,6 +21,10 @@
 #define	FMT2      "%10s:fact=%4d, trans=%4d, equed=%c, n=%d, imat=%d, test(%d)=%12.5g\n"
 #define FMT3      "%10s:info=%d, izero=%d, n=%d, nrhs=%d, imat=%d, nfail=%d\n"
 
+static void
+parse_command_line(int argc, char *argv[], char *matrix_type,
+		   int *n, int *w, int *relax, int *nrhs, int *maxsuper,
+		   int *rowblk, int *colblk, int *lwork, double *u);
 
 main(int argc, char *argv[])
 {
@@ -83,7 +87,6 @@ main(int argc, char *argv[])
     static trans_t transs[]  = {NOTRANS, TRANS, CONJ};
 
     /* Some function prototypes */ 
-    static void parse_command_line();
     extern int sp_dget01(int, int, SuperMatrix *, SuperMatrix *, 
 		         SuperMatrix *, int *, double *);
     extern int sp_dget02(trans_t, int, int, int, SuperMatrix *, double *,
diff --git a/TESTING/sdrive.c b/TESTING/sdrive.c
index 74d9fd0..0afb3e2 100644
--- a/TESTING/sdrive.c
+++ b/TESTING/sdrive.c
@@ -11,7 +11,7 @@
  * Purpose:             MAIN test program
  */
 #include <string.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 #define NTESTS    5      /* Number of test types */
 #define NTYPES    11     /* Number of matrix types */
@@ -21,6 +21,10 @@
 #define	FMT2      "%10s:fact=%4d, trans=%4d, equed=%c, n=%d, imat=%d, test(%d)=%12.5g\n"
 #define FMT3      "%10s:info=%d, izero=%d, n=%d, nrhs=%d, imat=%d, nfail=%d\n"
 
+static void
+parse_command_line(int argc, char *argv[], char *matrix_type,
+		   int *n, int *w, int *relax, int *nrhs, int *maxsuper,
+		   int *rowblk, int *colblk, int *lwork, float *u);
 
 main(int argc, char *argv[])
 {
@@ -83,7 +87,6 @@ main(int argc, char *argv[])
     static trans_t transs[]  = {NOTRANS, TRANS, CONJ};
 
     /* Some function prototypes */ 
-    static void parse_command_line();
     extern int sp_sget01(int, int, SuperMatrix *, SuperMatrix *, 
 		         SuperMatrix *, int *, float *);
     extern int sp_sget02(trans_t, int, int, int, SuperMatrix *, float *,
@@ -500,7 +503,7 @@ main(int argc, char *argv[])
 static void
 parse_command_line(int argc, char *argv[], char *matrix_type,
 		   int *n, int *w, int *relax, int *nrhs, int *maxsuper,
-		   int *rowblk, int *colblk, int *lwork, double *u)
+		   int *rowblk, int *colblk, int *lwork, float *u)
 {
     int c;
     extern char *optarg;
diff --git a/TESTING/sp_cconvert.c b/TESTING/sp_cconvert.c
index e4aab06..243400d 100644
--- a/TESTING/sp_cconvert.c
+++ b/TESTING/sp_cconvert.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
 
 /*
  * Convert a full matrix into a sparse matrix format. 
diff --git a/TESTING/sp_cget01.c b/TESTING/sp_cget01.c
index c1bf55a..8d642c2 100644
--- a/TESTING/sp_cget01.c
+++ b/TESTING/sp_cget01.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 #include <math.h>
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
 
 int sp_cget01(int m, int n, SuperMatrix *A, SuperMatrix *L, 
 		SuperMatrix *U, int *perm_r, float *resid)
diff --git a/TESTING/sp_cget02.c b/TESTING/sp_cget02.c
index a3416f5..a45c8e8 100644
--- a/TESTING/sp_cget02.c
+++ b/TESTING/sp_cget02.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 int sp_cget02(trans_t trans, int m, int n, int nrhs, SuperMatrix *A,
 	      complex *x, int ldx, complex *b, int ldb, float *resid)
diff --git a/TESTING/sp_cget04.c b/TESTING/sp_cget04.c
index 655d247..9291c94 100644
--- a/TESTING/sp_cget04.c
+++ b/TESTING/sp_cget04.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 #include <math.h>
-#include "csp_defs.h"
-#include "util.h"
+#include "slu_cdefs.h"
 
 int sp_cget04(int n, int nrhs, complex *x, int ldx, complex *xact,
 	      int ldxact, float rcond, float *resid)
diff --git a/TESTING/sp_cget07.c b/TESTING/sp_cget07.c
index 9d23660..2e69f03 100644
--- a/TESTING/sp_cget07.c
+++ b/TESTING/sp_cget07.c
@@ -7,7 +7,7 @@
  *
  */
 #include <math.h>
-#include "csp_defs.h"
+#include "slu_cdefs.h"
 
 int sp_cget07(trans_t trans, int n, int nrhs, SuperMatrix *A, complex *b, 
 	      int ldb, complex *x, int ldx, complex *xact, 
diff --git a/TESTING/sp_dconvert.c b/TESTING/sp_dconvert.c
index 7391e69..dec3b32 100644
--- a/TESTING/sp_dconvert.c
+++ b/TESTING/sp_dconvert.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
 
 /*
  * Convert a full matrix into a sparse matrix format. 
diff --git a/TESTING/sp_dget01.c b/TESTING/sp_dget01.c
index 54a31ed..d6b8594 100644
--- a/TESTING/sp_dget01.c
+++ b/TESTING/sp_dget01.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 #include <math.h>
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
 
 int sp_dget01(int m, int n, SuperMatrix *A, SuperMatrix *L, 
 		SuperMatrix *U, int *perm_r, double *resid)
diff --git a/TESTING/sp_dget02.c b/TESTING/sp_dget02.c
index eab3ec4..9d08dda 100644
--- a/TESTING/sp_dget02.c
+++ b/TESTING/sp_dget02.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 int sp_dget02(trans_t trans, int m, int n, int nrhs, SuperMatrix *A,
 	      double *x, int ldx, double *b, int ldb, double *resid)
diff --git a/TESTING/sp_dget04.c b/TESTING/sp_dget04.c
index dd3d1f4..dc88ed8 100644
--- a/TESTING/sp_dget04.c
+++ b/TESTING/sp_dget04.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 #include <math.h>
-#include "dsp_defs.h"
-#include "util.h"
+#include "slu_ddefs.h"
 
 int sp_dget04(int n, int nrhs, double *x, int ldx, double *xact,
 	      int ldxact, double rcond, double *resid)
diff --git a/TESTING/sp_dget07.c b/TESTING/sp_dget07.c
index ca78c22..2737c22 100644
--- a/TESTING/sp_dget07.c
+++ b/TESTING/sp_dget07.c
@@ -7,7 +7,7 @@
  *
  */
 #include <math.h>
-#include "dsp_defs.h"
+#include "slu_ddefs.h"
 
 int sp_dget07(trans_t trans, int n, int nrhs, SuperMatrix *A, double *b, 
 	      int ldb, double *x, int ldx, double *xact, 
diff --git a/TESTING/sp_ienv.c b/TESTING/sp_ienv.c
index a0a8509..480531b 100644
--- a/TESTING/sp_ienv.c
+++ b/TESTING/sp_ienv.c
@@ -2,6 +2,8 @@
  * File name:		sp_ienv.c
  * History:             Modified from lapack routine ILAENV
  */
+#include "slu_Cnames.h"
+
 int
 sp_ienv(int ispec)
 {
diff --git a/TESTING/sp_sconvert.c b/TESTING/sp_sconvert.c
index 4c51a07..0b35d78 100644
--- a/TESTING/sp_sconvert.c
+++ b/TESTING/sp_sconvert.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
 
 /*
  * Convert a full matrix into a sparse matrix format. 
diff --git a/TESTING/sp_sget01.c b/TESTING/sp_sget01.c
index 07c8d2e..5ab57b5 100644
--- a/TESTING/sp_sget01.c
+++ b/TESTING/sp_sget01.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 #include <math.h>
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
 
 int sp_sget01(int m, int n, SuperMatrix *A, SuperMatrix *L, 
 		SuperMatrix *U, int *perm_r, float *resid)
diff --git a/TESTING/sp_sget02.c b/TESTING/sp_sget02.c
index 892f068..90826d1 100644
--- a/TESTING/sp_sget02.c
+++ b/TESTING/sp_sget02.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 int sp_sget02(trans_t trans, int m, int n, int nrhs, SuperMatrix *A,
 	      float *x, int ldx, float *b, int ldb, float *resid)
diff --git a/TESTING/sp_sget04.c b/TESTING/sp_sget04.c
index d8c3c7a..cb4e6dc 100644
--- a/TESTING/sp_sget04.c
+++ b/TESTING/sp_sget04.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 #include <math.h>
-#include "ssp_defs.h"
-#include "util.h"
+#include "slu_sdefs.h"
 
 int sp_sget04(int n, int nrhs, float *x, int ldx, float *xact,
 	      int ldxact, float rcond, float *resid)
diff --git a/TESTING/sp_sget07.c b/TESTING/sp_sget07.c
index aaf9776..f7d86be 100644
--- a/TESTING/sp_sget07.c
+++ b/TESTING/sp_sget07.c
@@ -7,7 +7,7 @@
  *
  */
 #include <math.h>
-#include "ssp_defs.h"
+#include "slu_sdefs.h"
 
 int sp_sget07(trans_t trans, int n, int nrhs, SuperMatrix *A, float *b, 
 	      int ldb, float *x, int ldx, float *xact, 
diff --git a/TESTING/sp_zconvert.c b/TESTING/sp_zconvert.c
index 8f4e866..4e1d17e 100644
--- a/TESTING/sp_zconvert.c
+++ b/TESTING/sp_zconvert.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
 
 /*
  * Convert a full matrix into a sparse matrix format. 
diff --git a/TESTING/sp_zget01.c b/TESTING/sp_zget01.c
index 97c46d4..7ee93e2 100644
--- a/TESTING/sp_zget01.c
+++ b/TESTING/sp_zget01.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 #include <math.h>
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
 
 int sp_zget01(int m, int n, SuperMatrix *A, SuperMatrix *L, 
 		SuperMatrix *U, int *perm_r, double *resid)
diff --git a/TESTING/sp_zget02.c b/TESTING/sp_zget02.c
index 4bc9756..ed24d21 100644
--- a/TESTING/sp_zget02.c
+++ b/TESTING/sp_zget02.c
@@ -6,7 +6,7 @@
  * October 15, 2003
  *
  */
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 int sp_zget02(trans_t trans, int m, int n, int nrhs, SuperMatrix *A,
 	      doublecomplex *x, int ldx, doublecomplex *b, int ldb, double *resid)
diff --git a/TESTING/sp_zget04.c b/TESTING/sp_zget04.c
index fc5a820..3a2632d 100644
--- a/TESTING/sp_zget04.c
+++ b/TESTING/sp_zget04.c
@@ -1,5 +1,4 @@
 
-
 /*
  * -- SuperLU routine (version 2.0) --
  * Univ. of California Berkeley, Xerox Palo Alto Research Center,
@@ -8,8 +7,7 @@
  *
  */
 #include <math.h>
-#include "zsp_defs.h"
-#include "util.h"
+#include "slu_zdefs.h"
 
 int sp_zget04(int n, int nrhs, doublecomplex *x, int ldx, doublecomplex *xact,
 	      int ldxact, double rcond, double *resid)
diff --git a/TESTING/sp_zget07.c b/TESTING/sp_zget07.c
index c41fdcb..5f9ab12 100644
--- a/TESTING/sp_zget07.c
+++ b/TESTING/sp_zget07.c
@@ -7,7 +7,7 @@
  *
  */
 #include <math.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 int sp_zget07(trans_t trans, int n, int nrhs, SuperMatrix *A, doublecomplex *b, 
 	      int ldb, doublecomplex *x, int ldx, doublecomplex *xact, 
diff --git a/TESTING/zdrive.c b/TESTING/zdrive.c
index 166f310..1269fb0 100644
--- a/TESTING/zdrive.c
+++ b/TESTING/zdrive.c
@@ -11,7 +11,7 @@
  * Purpose:             MAIN test program
  */
 #include <string.h>
-#include "zsp_defs.h"
+#include "slu_zdefs.h"
 
 #define NTESTS    5      /* Number of test types */
 #define NTYPES    11     /* Number of matrix types */
@@ -21,6 +21,10 @@
 #define	FMT2      "%10s:fact=%4d, trans=%4d, equed=%c, n=%d, imat=%d, test(%d)=%12.5g\n"
 #define FMT3      "%10s:info=%d, izero=%d, n=%d, nrhs=%d, imat=%d, nfail=%d\n"
 
+static void
+parse_command_line(int argc, char *argv[], char *matrix_type,
+		   int *n, int *w, int *relax, int *nrhs, int *maxsuper,
+		   int *rowblk, int *colblk, int *lwork, double *u);
 
 main(int argc, char *argv[])
 {
@@ -83,7 +87,6 @@ main(int argc, char *argv[])
     static trans_t transs[]  = {NOTRANS, TRANS, CONJ};
 
     /* Some function prototypes */ 
-    static void parse_command_line();
     extern int sp_zget01(int, int, SuperMatrix *, SuperMatrix *, 
 		         SuperMatrix *, int *, double *);
     extern int sp_zget02(trans_t, int, int, int, SuperMatrix *, doublecomplex *,
diff --git a/make.inc b/make.inc
index 51fda64..58b609a 100644
--- a/make.inc
+++ b/make.inc
@@ -14,16 +14,12 @@
 #
 ############################################################################
 #
-#  The machine (platform) identifier to append to the library names
-#
-PLAT = _solaris
-
 #
 #  The name of the libraries to be created/linked to
 #
-TMGLIB       = tmglib$(PLAT).a
-SUPERLULIB   = superlu$(PLAT).a
-BLASLIB      = ../blas$(PLAT).a
+TMGLIB       = libtmglib.a
+SUPERLULIB   = libsuperlu_3.0.a
+BLASLIB      = ../libblas.a
 
 #
 #  The archiver and the flag(s) to use when building archive (library)
@@ -47,4 +43,4 @@ CDEFS        = -DAdd_
 #
 # The directory in which Matlab is installed
 #
-MATLAB	     = /usr/sww/matlab
+MATLAB	     = /usr/sww/pkg/matlab

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/superlu.git